llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfnmacc-vp.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
   2 ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=ilp32d \
   3 ; RUN:     -verify-machineinstrs < %s | FileCheck %s
   4 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=lp64d \
   5 ; RUN:     -verify-machineinstrs < %s | FileCheck %s
   6
   7 declare <2 x half> @llvm.vp.fma.v2f16(<2 x half>, <2 x half>, <2 x half>, <2 x i1>, i32)
   8 declare <2 x half> @llvm.vp.fneg.v2f16(<2 x half>, <2 x i1>, i32)
   9 declare <2 x half> @llvm.vp.merge.v2f16(<2 x i1>, <2 x half>, <2 x half>, i32)
  10 declare <2 x half> @llvm.vp.select.v2f16(<2 x i1>, <2 x half>, <2 x half>, i32)
  11
  12 define <2 x half> @vfnmacc_vv_v2f16(<2 x half> %a, <2 x half> %b, <2 x half> %c, <2 x i1> %m, i32 zeroext %evl) {
  13 ; CHECK-LABEL: vfnmacc_vv_v2f16:
  14 ; CHECK:       # %bb.0:
  15 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
  16 ; CHECK-NEXT:    vfnmacc.vv v10, v8, v9, v0.t
  17 ; CHECK-NEXT:    vmv1r.v v8, v10
  18 ; CHECK-NEXT:    ret
  19   %nega = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %a, <2 x i1> splat (i1 -1), i32 %evl)
  20   %negc = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %c, <2 x i1> splat (i1 -1), i32 %evl)
  21   %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %nega, <2 x half> %b, <2 x half> %negc, <2 x i1> splat (i1 -1), i32 %evl)
  22   %u = call <2 x half> @llvm.vp.merge.v2f16(<2 x i1> %m, <2 x half> %v, <2 x half> %c, i32 %evl)
  23   ret <2 x half> %u
  24 }
  25
  26 define <2 x half> @vfnmacc_vv_v2f16_unmasked(<2 x half> %a, <2 x half> %b, <2 x half> %c, <2 x i1> %m, i32 zeroext %evl) {
  27 ; CHECK-LABEL: vfnmacc_vv_v2f16_unmasked:
  28 ; CHECK:       # %bb.0:
  29 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, ma
  30 ; CHECK-NEXT:    vfnmacc.vv v10, v8, v9
  31 ; CHECK-NEXT:    vmv1r.v v8, v10
  32 ; CHECK-NEXT:    ret
  33   %nega = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %a, <2 x i1> splat (i1 -1), i32 %evl)
  34   %negc = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %c, <2 x i1> splat (i1 -1), i32 %evl)
  35   %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %nega, <2 x half> %b, <2 x half> %negc, <2 x i1> splat (i1 -1), i32 %evl)
  36   %u = call <2 x half> @llvm.vp.merge.v2f16(<2 x i1> splat (i1 -1), <2 x half> %v, <2 x half> %c, i32 %evl)
  37   ret <2 x half> %u
  38 }
  39
  40 define <2 x half> @vfnmacc_vf_v2f16(<2 x half> %a, half %b, <2 x half> %c, <2 x i1> %m, i32 zeroext %evl) {
  41 ; CHECK-LABEL: vfnmacc_vf_v2f16:
  42 ; CHECK:       # %bb.0:
  43 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
  44 ; CHECK-NEXT:    vfnmacc.vf v9, fa0, v8, v0.t
  45 ; CHECK-NEXT:    vmv1r.v v8, v9
  46 ; CHECK-NEXT:    ret
  47   %elt.head = insertelement <2 x half> poison, half %b, i32 0
  48   %vb = shufflevector <2 x half> %elt.head, <2 x half> poison, <2 x i32> zeroinitializer
  49   %nega = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %a, <2 x i1> splat (i1 -1), i32 %evl)
  50   %negc = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %c, <2 x i1> splat (i1 -1), i32 %evl)
  51   %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %nega, <2 x half> %vb, <2 x half> %negc, <2 x i1> splat (i1 -1), i32 %evl)
  52   %u = call <2 x half> @llvm.vp.merge.v2f16(<2 x i1> %m, <2 x half> %v, <2 x half> %c, i32 %evl)
  53   ret <2 x half> %u
  54 }
  55
  56 define <2 x half> @vfnmacc_vf_v2f16_commute(<2 x half> %a, half %b, <2 x half> %c, <2 x i1> %m, i32 zeroext %evl) {
  57 ; CHECK-LABEL: vfnmacc_vf_v2f16_commute:
  58 ; CHECK:       # %bb.0:
  59 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
  60 ; CHECK-NEXT:    vfnmacc.vf v9, fa0, v8, v0.t
  61 ; CHECK-NEXT:    vmv1r.v v8, v9
  62 ; CHECK-NEXT:    ret
  63   %elt.head = insertelement <2 x half> poison, half %b, i32 0
  64   %vb = shufflevector <2 x half> %elt.head, <2 x half> poison, <2 x i32> zeroinitializer
  65   %nega = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %a, <2 x i1> splat (i1 -1), i32 %evl)
  66   %negc = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %c, <2 x i1> splat (i1 -1), i32 %evl)
  67   %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %vb, <2 x half> %nega, <2 x half> %negc, <2 x i1> splat (i1 -1), i32 %evl)
  68   %u = call <2 x half> @llvm.vp.merge.v2f16(<2 x i1> %m, <2 x half> %v, <2 x half> %c, i32 %evl)
  69   ret <2 x half> %u
  70 }
  71
  72 define <2 x half> @vfnmacc_vf_v2f16_unmasked(<2 x half> %a, half %b, <2 x half> %c, i32 zeroext %evl) {
  73 ; CHECK-LABEL: vfnmacc_vf_v2f16_unmasked:
  74 ; CHECK:       # %bb.0:
  75 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, ma
  76 ; CHECK-NEXT:    vfnmacc.vf v9, fa0, v8
  77 ; CHECK-NEXT:    vmv1r.v v8, v9
  78 ; CHECK-NEXT:    ret
  79   %elt.head = insertelement <2 x half> poison, half %b, i32 0
  80   %vb = shufflevector <2 x half> %elt.head, <2 x half> poison, <2 x i32> zeroinitializer
  81   %nega = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %a, <2 x i1> splat (i1 -1), i32 %evl)
  82   %negc = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %c, <2 x i1> splat (i1 -1), i32 %evl)
  83   %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %nega, <2 x half> %vb, <2 x half> %negc, <2 x i1> splat (i1 -1), i32 %evl)
  84   %u = call <2 x half> @llvm.vp.merge.v2f16(<2 x i1> splat (i1 -1), <2 x half> %v, <2 x half> %c, i32 %evl)
  85   ret <2 x half> %u
  86 }
  87
  88 define <2 x half> @vfnmacc_vv_v2f16_ta(<2 x half> %a, <2 x half> %b, <2 x half> %c, <2 x i1> %m, i32 zeroext %evl) {
  89 ; CHECK-LABEL: vfnmacc_vv_v2f16_ta:
  90 ; CHECK:       # %bb.0:
  91 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
  92 ; CHECK-NEXT:    vfnmacc.vv v10, v8, v9, v0.t
  93 ; CHECK-NEXT:    vmv1r.v v8, v10
  94 ; CHECK-NEXT:    ret
  95   %nega = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %a, <2 x i1> splat (i1 -1), i32 %evl)
  96   %negc = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %c, <2 x i1> splat (i1 -1), i32 %evl)
  97   %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %nega, <2 x half> %b, <2 x half> %negc, <2 x i1> splat (i1 -1), i32 %evl)
  98   %u = call <2 x half> @llvm.vp.select.v2f16(<2 x i1> %m, <2 x half> %v, <2 x half> %c, i32 %evl)
  99   ret <2 x half> %u
 100 }
 101
 102 define <2 x half> @vfnmacc_vf_v2f16_ta(<2 x half> %a, half %b, <2 x half> %c, <2 x i1> %m, i32 zeroext %evl) {
 103 ; CHECK-LABEL: vfnmacc_vf_v2f16_ta:
 104 ; CHECK:       # %bb.0:
 105 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
 106 ; CHECK-NEXT:    vfnmacc.vf v9, fa0, v8, v0.t
 107 ; CHECK-NEXT:    vmv1r.v v8, v9
 108 ; CHECK-NEXT:    ret
 109   %elt.head = insertelement <2 x half> poison, half %b, i32 0
 110   %vb = shufflevector <2 x half> %elt.head, <2 x half> poison, <2 x i32> zeroinitializer
 111   %nega = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %a, <2 x i1> splat (i1 -1), i32 %evl)
 112   %negc = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %c, <2 x i1> splat (i1 -1), i32 %evl)
 113   %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %nega, <2 x half> %vb, <2 x half> %negc, <2 x i1> splat (i1 -1), i32 %evl)
 114   %u = call <2 x half> @llvm.vp.select.v2f16(<2 x i1> %m, <2 x half> %v, <2 x half> %c, i32 %evl)
 115   ret <2 x half> %u
 116 }
 117
 118 define <2 x half> @vfnmacc_vf_v2f16_commute_ta(<2 x half> %a, half %b, <2 x half> %c, <2 x i1> %m, i32 zeroext %evl) {
 119 ; CHECK-LABEL: vfnmacc_vf_v2f16_commute_ta:
 120 ; CHECK:       # %bb.0:
 121 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
 122 ; CHECK-NEXT:    vfnmacc.vf v9, fa0, v8, v0.t
 123 ; CHECK-NEXT:    vmv1r.v v8, v9
 124 ; CHECK-NEXT:    ret
 125   %elt.head = insertelement <2 x half> poison, half %b, i32 0
 126   %vb = shufflevector <2 x half> %elt.head, <2 x half> poison, <2 x i32> zeroinitializer
 127   %nega = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %a, <2 x i1> splat (i1 -1), i32 %evl)
 128   %negc = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %c, <2 x i1> splat (i1 -1), i32 %evl)
 129   %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %vb, <2 x half> %nega, <2 x half> %negc, <2 x i1> splat (i1 -1), i32 %evl)
 130   %u = call <2 x half> @llvm.vp.select.v2f16(<2 x i1> %m, <2 x half> %v, <2 x half> %c, i32 %evl)
 131   ret <2 x half> %u
 132 }
 133
 134 declare <4 x half> @llvm.vp.fma.v4f16(<4 x half>, <4 x half>, <4 x half>, <4 x i1>, i32)
 135 declare <4 x half> @llvm.vp.fneg.v4f16(<4 x half>, <4 x i1>, i32)
 136 declare <4 x half> @llvm.vp.merge.v4f16(<4 x i1>, <4 x half>, <4 x half>, i32)
 137 declare <4 x half> @llvm.vp.select.v4f16(<4 x i1>, <4 x half>, <4 x half>, i32)
 138
 139 define <4 x half> @vfnmacc_vv_v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c, <4 x i1> %m, i32 zeroext %evl) {
 140 ; CHECK-LABEL: vfnmacc_vv_v4f16:
 141 ; CHECK:       # %bb.0:
 142 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
 143 ; CHECK-NEXT:    vfnmacc.vv v10, v8, v9, v0.t
 144 ; CHECK-NEXT:    vmv1r.v v8, v10
 145 ; CHECK-NEXT:    ret
 146   %nega = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %a, <4 x i1> splat (i1 -1), i32 %evl)
 147   %negc = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %c, <4 x i1> splat (i1 -1), i32 %evl)
 148   %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %nega, <4 x half> %b, <4 x half> %negc, <4 x i1> splat (i1 -1), i32 %evl)
 149   %u = call <4 x half> @llvm.vp.merge.v4f16(<4 x i1> %m, <4 x half> %v, <4 x half> %c, i32 %evl)
 150   ret <4 x half> %u
 151 }
 152
 153 define <4 x half> @vfnmacc_vv_v4f16_unmasked(<4 x half> %a, <4 x half> %b, <4 x half> %c, <4 x i1> %m, i32 zeroext %evl) {
 154 ; CHECK-LABEL: vfnmacc_vv_v4f16_unmasked:
 155 ; CHECK:       # %bb.0:
 156 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, ma
 157 ; CHECK-NEXT:    vfnmacc.vv v10, v8, v9
 158 ; CHECK-NEXT:    vmv1r.v v8, v10
 159 ; CHECK-NEXT:    ret
 160   %nega = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %a, <4 x i1> splat (i1 -1), i32 %evl)
 161   %negc = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %c, <4 x i1> splat (i1 -1), i32 %evl)
 162   %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %nega, <4 x half> %b, <4 x half> %negc, <4 x i1> splat (i1 -1), i32 %evl)
 163   %u = call <4 x half> @llvm.vp.merge.v4f16(<4 x i1> splat (i1 -1), <4 x half> %v, <4 x half> %c, i32 %evl)
 164   ret <4 x half> %u
 165 }
 166
 167 define <4 x half> @vfnmacc_vf_v4f16(<4 x half> %a, half %b, <4 x half> %c, <4 x i1> %m, i32 zeroext %evl) {
 168 ; CHECK-LABEL: vfnmacc_vf_v4f16:
 169 ; CHECK:       # %bb.0:
 170 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
 171 ; CHECK-NEXT:    vfnmacc.vf v9, fa0, v8, v0.t
 172 ; CHECK-NEXT:    vmv1r.v v8, v9
 173 ; CHECK-NEXT:    ret
 174   %elt.head = insertelement <4 x half> poison, half %b, i32 0
 175   %vb = shufflevector <4 x half> %elt.head, <4 x half> poison, <4 x i32> zeroinitializer
 176   %nega = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %a, <4 x i1> splat (i1 -1), i32 %evl)
 177   %negc = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %c, <4 x i1> splat (i1 -1), i32 %evl)
 178   %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %nega, <4 x half> %vb, <4 x half> %negc, <4 x i1> splat (i1 -1), i32 %evl)
 179   %u = call <4 x half> @llvm.vp.merge.v4f16(<4 x i1> %m, <4 x half> %v, <4 x half> %c, i32 %evl)
 180   ret <4 x half> %u
 181 }
 182
 183 define <4 x half> @vfnmacc_vf_v4f16_commute(<4 x half> %a, half %b, <4 x half> %c, <4 x i1> %m, i32 zeroext %evl) {
 184 ; CHECK-LABEL: vfnmacc_vf_v4f16_commute:
 185 ; CHECK:       # %bb.0:
 186 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
 187 ; CHECK-NEXT:    vfnmacc.vf v9, fa0, v8, v0.t
 188 ; CHECK-NEXT:    vmv1r.v v8, v9
 189 ; CHECK-NEXT:    ret
 190   %elt.head = insertelement <4 x half> poison, half %b, i32 0
 191   %vb = shufflevector <4 x half> %elt.head, <4 x half> poison, <4 x i32> zeroinitializer
 192   %nega = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %a, <4 x i1> splat (i1 -1), i32 %evl)
 193   %negc = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %c, <4 x i1> splat (i1 -1), i32 %evl)
 194   %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %vb, <4 x half> %nega, <4 x half> %negc, <4 x i1> splat (i1 -1), i32 %evl)
 195   %u = call <4 x half> @llvm.vp.merge.v4f16(<4 x i1> %m, <4 x half> %v, <4 x half> %c, i32 %evl)
 196   ret <4 x half> %u
 197 }
 198
 199 define <4 x half> @vfnmacc_vf_v4f16_unmasked(<4 x half> %a, half %b, <4 x half> %c, i32 zeroext %evl) {
 200 ; CHECK-LABEL: vfnmacc_vf_v4f16_unmasked:
 201 ; CHECK:       # %bb.0:
 202 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, ma
 203 ; CHECK-NEXT:    vfnmacc.vf v9, fa0, v8
 204 ; CHECK-NEXT:    vmv1r.v v8, v9
 205 ; CHECK-NEXT:    ret
 206   %elt.head = insertelement <4 x half> poison, half %b, i32 0
 207   %vb = shufflevector <4 x half> %elt.head, <4 x half> poison, <4 x i32> zeroinitializer
 208   %nega = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %a, <4 x i1> splat (i1 -1), i32 %evl)
 209   %negc = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %c, <4 x i1> splat (i1 -1), i32 %evl)
 210   %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %nega, <4 x half> %vb, <4 x half> %negc, <4 x i1> splat (i1 -1), i32 %evl)
 211   %u = call <4 x half> @llvm.vp.merge.v4f16(<4 x i1> splat (i1 -1), <4 x half> %v, <4 x half> %c, i32 %evl)
 212   ret <4 x half> %u
 213 }
 214
 215 define <4 x half> @vfnmacc_vv_v4f16_ta(<4 x half> %a, <4 x half> %b, <4 x half> %c, <4 x i1> %m, i32 zeroext %evl) {
 216 ; CHECK-LABEL: vfnmacc_vv_v4f16_ta:
 217 ; CHECK:       # %bb.0:
 218 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
 219 ; CHECK-NEXT:    vfnmacc.vv v10, v8, v9, v0.t
 220 ; CHECK-NEXT:    vmv1r.v v8, v10
 221 ; CHECK-NEXT:    ret
 222   %nega = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %a, <4 x i1> splat (i1 -1), i32 %evl)
 223   %negc = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %c, <4 x i1> splat (i1 -1), i32 %evl)
 224   %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %nega, <4 x half> %b, <4 x half> %negc, <4 x i1> splat (i1 -1), i32 %evl)
 225   %u = call <4 x half> @llvm.vp.select.v4f16(<4 x i1> %m, <4 x half> %v, <4 x half> %c, i32 %evl)
 226   ret <4 x half> %u
 227 }
 228
 229 define <4 x half> @vfnmacc_vf_v4f16_ta(<4 x half> %a, half %b, <4 x half> %c, <4 x i1> %m, i32 zeroext %evl) {
 230 ; CHECK-LABEL: vfnmacc_vf_v4f16_ta:
 231 ; CHECK:       # %bb.0:
 232 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
 233 ; CHECK-NEXT:    vfnmacc.vf v9, fa0, v8, v0.t
 234 ; CHECK-NEXT:    vmv1r.v v8, v9
 235 ; CHECK-NEXT:    ret
 236   %elt.head = insertelement <4 x half> poison, half %b, i32 0
 237   %vb = shufflevector <4 x half> %elt.head, <4 x half> poison, <4 x i32> zeroinitializer
 238   %nega = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %a, <4 x i1> splat (i1 -1), i32 %evl)
 239   %negc = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %c, <4 x i1> splat (i1 -1), i32 %evl)
 240   %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %nega, <4 x half> %vb, <4 x half> %negc, <4 x i1> splat (i1 -1), i32 %evl)
 241   %u = call <4 x half> @llvm.vp.select.v4f16(<4 x i1> %m, <4 x half> %v, <4 x half> %c, i32 %evl)
 242   ret <4 x half> %u
 243 }
 244
 245 define <4 x half> @vfnmacc_vf_v4f16_commute_ta(<4 x half> %a, half %b, <4 x half> %c, <4 x i1> %m, i32 zeroext %evl) {
 246 ; CHECK-LABEL: vfnmacc_vf_v4f16_commute_ta:
 247 ; CHECK:       # %bb.0:
 248 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
 249 ; CHECK-NEXT:    vfnmacc.vf v9, fa0, v8, v0.t
 250 ; CHECK-NEXT:    vmv1r.v v8, v9
 251 ; CHECK-NEXT:    ret
 252   %elt.head = insertelement <4 x half> poison, half %b, i32 0
 253   %vb = shufflevector <4 x half> %elt.head, <4 x half> poison, <4 x i32> zeroinitializer
 254   %nega = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %a, <4 x i1> splat (i1 -1), i32 %evl)
 255   %negc = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %c, <4 x i1> splat (i1 -1), i32 %evl)
 256   %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %vb, <4 x half> %nega, <4 x half> %negc, <4 x i1> splat (i1 -1), i32 %evl)
 257   %u = call <4 x half> @llvm.vp.select.v4f16(<4 x i1> %m, <4 x half> %v, <4 x half> %c, i32 %evl)
 258   ret <4 x half> %u
 259 }
 260
 261 declare <8 x half> @llvm.vp.fma.v8f16(<8 x half>, <8 x half>, <8 x half>, <8 x i1>, i32)
 262 declare <8 x half> @llvm.vp.fneg.v8f16(<8 x half>, <8 x i1>, i32)
 263 declare <8 x half> @llvm.vp.merge.v8f16(<8 x i1>, <8 x half>, <8 x half>, i32)
 264 declare <8 x half> @llvm.vp.select.v8f16(<8 x i1>, <8 x half>, <8 x half>, i32)
 265
 266 define <8 x half> @vfnmacc_vv_v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %c, <8 x i1> %m, i32 zeroext %evl) {
 267 ; CHECK-LABEL: vfnmacc_vv_v8f16:
 268 ; CHECK:       # %bb.0:
 269 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
 270 ; CHECK-NEXT:    vfnmacc.vv v10, v8, v9, v0.t
 271 ; CHECK-NEXT:    vmv1r.v v8, v10
 272 ; CHECK-NEXT:    ret
 273   %nega = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %a, <8 x i1> splat (i1 -1), i32 %evl)
 274   %negc = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %c, <8 x i1> splat (i1 -1), i32 %evl)
 275   %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %nega, <8 x half> %b, <8 x half> %negc, <8 x i1> splat (i1 -1), i32 %evl)
 276   %u = call <8 x half> @llvm.vp.merge.v8f16(<8 x i1> %m, <8 x half> %v, <8 x half> %c, i32 %evl)
 277   ret <8 x half> %u
 278 }
 279
 280 define <8 x half> @vfnmacc_vv_v8f16_unmasked(<8 x half> %a, <8 x half> %b, <8 x half> %c, <8 x i1> %m, i32 zeroext %evl) {
 281 ; CHECK-LABEL: vfnmacc_vv_v8f16_unmasked:
 282 ; CHECK:       # %bb.0:
 283 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, ma
 284 ; CHECK-NEXT:    vfnmacc.vv v10, v8, v9
 285 ; CHECK-NEXT:    vmv1r.v v8, v10
 286 ; CHECK-NEXT:    ret
 287   %nega = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %a, <8 x i1> splat (i1 -1), i32 %evl)
 288   %negc = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %c, <8 x i1> splat (i1 -1), i32 %evl)
 289   %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %nega, <8 x half> %b, <8 x half> %negc, <8 x i1> splat (i1 -1), i32 %evl)
 290   %u = call <8 x half> @llvm.vp.merge.v8f16(<8 x i1> splat (i1 -1), <8 x half> %v, <8 x half> %c, i32 %evl)
 291   ret <8 x half> %u
 292 }
 293
 294 define <8 x half> @vfnmacc_vf_v8f16(<8 x half> %a, half %b, <8 x half> %c, <8 x i1> %m, i32 zeroext %evl) {
 295 ; CHECK-LABEL: vfnmacc_vf_v8f16:
 296 ; CHECK:       # %bb.0:
 297 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
 298 ; CHECK-NEXT:    vfnmacc.vf v9, fa0, v8, v0.t
 299 ; CHECK-NEXT:    vmv1r.v v8, v9
 300 ; CHECK-NEXT:    ret
 301   %elt.head = insertelement <8 x half> poison, half %b, i32 0
 302   %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
 303   %nega = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %a, <8 x i1> splat (i1 -1), i32 %evl)
 304   %negc = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %c, <8 x i1> splat (i1 -1), i32 %evl)
 305   %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %nega, <8 x half> %vb, <8 x half> %negc, <8 x i1> splat (i1 -1), i32 %evl)
 306   %u = call <8 x half> @llvm.vp.merge.v8f16(<8 x i1> %m, <8 x half> %v, <8 x half> %c, i32 %evl)
 307   ret <8 x half> %u
 308 }
 309
 310 define <8 x half> @vfnmacc_vf_v8f16_commute(<8 x half> %a, half %b, <8 x half> %c, <8 x i1> %m, i32 zeroext %evl) {
 311 ; CHECK-LABEL: vfnmacc_vf_v8f16_commute:
 312 ; CHECK:       # %bb.0:
 313 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
 314 ; CHECK-NEXT:    vfnmacc.vf v9, fa0, v8, v0.t
 315 ; CHECK-NEXT:    vmv1r.v v8, v9
 316 ; CHECK-NEXT:    ret
 317   %elt.head = insertelement <8 x half> poison, half %b, i32 0
 318   %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
 319   %nega = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %a, <8 x i1> splat (i1 -1), i32 %evl)
 320   %negc = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %c, <8 x i1> splat (i1 -1), i32 %evl)
 321   %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %vb, <8 x half> %nega, <8 x half> %negc, <8 x i1> splat (i1 -1), i32 %evl)
 322   %u = call <8 x half> @llvm.vp.merge.v8f16(<8 x i1> %m, <8 x half> %v, <8 x half> %c, i32 %evl)
 323   ret <8 x half> %u
 324 }
 325
 326 define <8 x half> @vfnmacc_vf_v8f16_unmasked(<8 x half> %a, half %b, <8 x half> %c, i32 zeroext %evl) {
 327 ; CHECK-LABEL: vfnmacc_vf_v8f16_unmasked:
 328 ; CHECK:       # %bb.0:
 329 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, ma
 330 ; CHECK-NEXT:    vfnmacc.vf v9, fa0, v8
 331 ; CHECK-NEXT:    vmv1r.v v8, v9
 332 ; CHECK-NEXT:    ret
 333   %elt.head = insertelement <8 x half> poison, half %b, i32 0
 334   %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
 335   %nega = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %a, <8 x i1> splat (i1 -1), i32 %evl)
 336   %negc = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %c, <8 x i1> splat (i1 -1), i32 %evl)
 337   %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %nega, <8 x half> %vb, <8 x half> %negc, <8 x i1> splat (i1 -1), i32 %evl)
 338   %u = call <8 x half> @llvm.vp.merge.v8f16(<8 x i1> splat (i1 -1), <8 x half> %v, <8 x half> %c, i32 %evl)
 339   ret <8 x half> %u
 340 }
 341
 342 define <8 x half> @vfnmacc_vv_v8f16_ta(<8 x half> %a, <8 x half> %b, <8 x half> %c, <8 x i1> %m, i32 zeroext %evl) {
 343 ; CHECK-LABEL: vfnmacc_vv_v8f16_ta:
 344 ; CHECK:       # %bb.0:
 345 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
 346 ; CHECK-NEXT:    vfnmacc.vv v10, v8, v9, v0.t
 347 ; CHECK-NEXT:    vmv.v.v v8, v10
 348 ; CHECK-NEXT:    ret
 349   %nega = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %a, <8 x i1> splat (i1 -1), i32 %evl)
 350   %negc = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %c, <8 x i1> splat (i1 -1), i32 %evl)
 351   %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %nega, <8 x half> %b, <8 x half> %negc, <8 x i1> splat (i1 -1), i32 %evl)
 352   %u = call <8 x half> @llvm.vp.select.v8f16(<8 x i1> %m, <8 x half> %v, <8 x half> %c, i32 %evl)
 353   ret <8 x half> %u
 354 }
 355
 356 define <8 x half> @vfnmacc_vf_v8f16_ta(<8 x half> %a, half %b, <8 x half> %c, <8 x i1> %m, i32 zeroext %evl) {
 357 ; CHECK-LABEL: vfnmacc_vf_v8f16_ta:
 358 ; CHECK:       # %bb.0:
 359 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
 360 ; CHECK-NEXT:    vfnmacc.vf v9, fa0, v8, v0.t
 361 ; CHECK-NEXT:    vmv.v.v v8, v9
 362 ; CHECK-NEXT:    ret
 363   %elt.head = insertelement <8 x half> poison, half %b, i32 0
 364   %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
 365   %nega = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %a, <8 x i1> splat (i1 -1), i32 %evl)
 366   %negc = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %c, <8 x i1> splat (i1 -1), i32 %evl)
 367   %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %nega, <8 x half> %vb, <8 x half> %negc, <8 x i1> splat (i1 -1), i32 %evl)
 368   %u = call <8 x half> @llvm.vp.select.v8f16(<8 x i1> %m, <8 x half> %v, <8 x half> %c, i32 %evl)
 369   ret <8 x half> %u
 370 }
 371
 372 define <8 x half> @vfnmacc_vf_v8f16_commute_ta(<8 x half> %a, half %b, <8 x half> %c, <8 x i1> %m, i32 zeroext %evl) {
 373 ; CHECK-LABEL: vfnmacc_vf_v8f16_commute_ta:
 374 ; CHECK:       # %bb.0:
 375 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
 376 ; CHECK-NEXT:    vfnmacc.vf v9, fa0, v8, v0.t
 377 ; CHECK-NEXT:    vmv.v.v v8, v9
 378 ; CHECK-NEXT:    ret
 379   %elt.head = insertelement <8 x half> poison, half %b, i32 0
 380   %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
 381   %nega = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %a, <8 x i1> splat (i1 -1), i32 %evl)
 382   %negc = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %c, <8 x i1> splat (i1 -1), i32 %evl)
 383   %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %vb, <8 x half> %nega, <8 x half> %negc, <8 x i1> splat (i1 -1), i32 %evl)
 384   %u = call <8 x half> @llvm.vp.select.v8f16(<8 x i1> %m, <8 x half> %v, <8 x half> %c, i32 %evl)
 385   ret <8 x half> %u
 386 }
 387
 388 declare <16 x half> @llvm.vp.fma.v16f16(<16 x half>, <16 x half>, <16 x half>, <16 x i1>, i32)
 389 declare <16 x half> @llvm.vp.fneg.v16f16(<16 x half>, <16 x i1>, i32)
 390 declare <16 x half> @llvm.vp.merge.v16f16(<16 x i1>, <16 x half>, <16 x half>, i32)
 391 declare <16 x half> @llvm.vp.select.v16f16(<16 x i1>, <16 x half>, <16 x half>, i32)
 392
 393 define <16 x half> @vfnmacc_vv_v16f16(<16 x half> %a, <16 x half> %b, <16 x half> %c, <16 x i1> %m, i32 zeroext %evl) {
 394 ; CHECK-LABEL: vfnmacc_vv_v16f16:
 395 ; CHECK:       # %bb.0:
 396 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
 397 ; CHECK-NEXT:    vfnmacc.vv v12, v8, v10, v0.t
 398 ; CHECK-NEXT:    vmv2r.v v8, v12
 399 ; CHECK-NEXT:    ret
 400   %nega = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %a, <16 x i1> splat (i1 -1), i32 %evl)
 401   %negc = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %c, <16 x i1> splat (i1 -1), i32 %evl)
 402   %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %nega, <16 x half> %b, <16 x half> %negc, <16 x i1> splat (i1 -1), i32 %evl)
 403   %u = call <16 x half> @llvm.vp.merge.v16f16(<16 x i1> %m, <16 x half> %v, <16 x half> %c, i32 %evl)
 404   ret <16 x half> %u
 405 }
 406
 407 define <16 x half> @vfnmacc_vv_v16f16_unmasked(<16 x half> %a, <16 x half> %b, <16 x half> %c, <16 x i1> %m, i32 zeroext %evl) {
 408 ; CHECK-LABEL: vfnmacc_vv_v16f16_unmasked:
 409 ; CHECK:       # %bb.0:
 410 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, ma
 411 ; CHECK-NEXT:    vfnmacc.vv v12, v8, v10
 412 ; CHECK-NEXT:    vmv2r.v v8, v12
 413 ; CHECK-NEXT:    ret
 414   %nega = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %a, <16 x i1> splat (i1 -1), i32 %evl)
 415   %negc = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %c, <16 x i1> splat (i1 -1), i32 %evl)
 416   %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %nega, <16 x half> %b, <16 x half> %negc, <16 x i1> splat (i1 -1), i32 %evl)
 417   %u = call <16 x half> @llvm.vp.merge.v16f16(<16 x i1> splat (i1 -1), <16 x half> %v, <16 x half> %c, i32 %evl)
 418   ret <16 x half> %u
 419 }
 420
 421 define <16 x half> @vfnmacc_vf_v16f16(<16 x half> %a, half %b, <16 x half> %c, <16 x i1> %m, i32 zeroext %evl) {
 422 ; CHECK-LABEL: vfnmacc_vf_v16f16:
 423 ; CHECK:       # %bb.0:
 424 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
 425 ; CHECK-NEXT:    vfnmacc.vf v10, fa0, v8, v0.t
 426 ; CHECK-NEXT:    vmv2r.v v8, v10
 427 ; CHECK-NEXT:    ret
 428   %elt.head = insertelement <16 x half> poison, half %b, i32 0
 429   %vb = shufflevector <16 x half> %elt.head, <16 x half> poison, <16 x i32> zeroinitializer
 430   %nega = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %a, <16 x i1> splat (i1 -1), i32 %evl)
 431   %negc = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %c, <16 x i1> splat (i1 -1), i32 %evl)
 432   %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %nega, <16 x half> %vb, <16 x half> %negc, <16 x i1> splat (i1 -1), i32 %evl)
 433   %u = call <16 x half> @llvm.vp.merge.v16f16(<16 x i1> %m, <16 x half> %v, <16 x half> %c, i32 %evl)
 434   ret <16 x half> %u
 435 }
 436
 437 define <16 x half> @vfnmacc_vf_v16f16_commute(<16 x half> %a, half %b, <16 x half> %c, <16 x i1> %m, i32 zeroext %evl) {
 438 ; CHECK-LABEL: vfnmacc_vf_v16f16_commute:
 439 ; CHECK:       # %bb.0:
 440 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
 441 ; CHECK-NEXT:    vfnmacc.vf v10, fa0, v8, v0.t
 442 ; CHECK-NEXT:    vmv2r.v v8, v10
 443 ; CHECK-NEXT:    ret
 444   %elt.head = insertelement <16 x half> poison, half %b, i32 0
 445   %vb = shufflevector <16 x half> %elt.head, <16 x half> poison, <16 x i32> zeroinitializer
 446   %nega = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %a, <16 x i1> splat (i1 -1), i32 %evl)
 447   %negc = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %c, <16 x i1> splat (i1 -1), i32 %evl)
 448   %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %vb, <16 x half> %nega, <16 x half> %negc, <16 x i1> splat (i1 -1), i32 %evl)
 449   %u = call <16 x half> @llvm.vp.merge.v16f16(<16 x i1> %m, <16 x half> %v, <16 x half> %c, i32 %evl)
 450   ret <16 x half> %u
 451 }
 452
 453 define <16 x half> @vfnmacc_vf_v16f16_unmasked(<16 x half> %a, half %b, <16 x half> %c, i32 zeroext %evl) {
 454 ; CHECK-LABEL: vfnmacc_vf_v16f16_unmasked:
 455 ; CHECK:       # %bb.0:
 456 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, ma
 457 ; CHECK-NEXT:    vfnmacc.vf v10, fa0, v8
 458 ; CHECK-NEXT:    vmv2r.v v8, v10
 459 ; CHECK-NEXT:    ret
 460   %elt.head = insertelement <16 x half> poison, half %b, i32 0
 461   %vb = shufflevector <16 x half> %elt.head, <16 x half> poison, <16 x i32> zeroinitializer
 462   %nega = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %a, <16 x i1> splat (i1 -1), i32 %evl)
 463   %negc = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %c, <16 x i1> splat (i1 -1), i32 %evl)
 464   %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %nega, <16 x half> %vb, <16 x half> %negc, <16 x i1> splat (i1 -1), i32 %evl)
 465   %u = call <16 x half> @llvm.vp.merge.v16f16(<16 x i1> splat (i1 -1), <16 x half> %v, <16 x half> %c, i32 %evl)
 466   ret <16 x half> %u
 467 }
 468
 469 define <16 x half> @vfnmacc_vv_v16f16_ta(<16 x half> %a, <16 x half> %b, <16 x half> %c, <16 x i1> %m, i32 zeroext %evl) {
 470 ; CHECK-LABEL: vfnmacc_vv_v16f16_ta:
 471 ; CHECK:       # %bb.0:
 472 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
 473 ; CHECK-NEXT:    vfnmacc.vv v12, v8, v10, v0.t
 474 ; CHECK-NEXT:    vmv.v.v v8, v12
 475 ; CHECK-NEXT:    ret
 476   %nega = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %a, <16 x i1> splat (i1 -1), i32 %evl)
 477   %negc = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %c, <16 x i1> splat (i1 -1), i32 %evl)
 478   %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %nega, <16 x half> %b, <16 x half> %negc, <16 x i1> splat (i1 -1), i32 %evl)
 479   %u = call <16 x half> @llvm.vp.select.v16f16(<16 x i1> %m, <16 x half> %v, <16 x half> %c, i32 %evl)
 480   ret <16 x half> %u
 481 }
 482
 483 define <16 x half> @vfnmacc_vf_v16f16_ta(<16 x half> %a, half %b, <16 x half> %c, <16 x i1> %m, i32 zeroext %evl) {
 484 ; CHECK-LABEL: vfnmacc_vf_v16f16_ta:
 485 ; CHECK:       # %bb.0:
 486 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
 487 ; CHECK-NEXT:    vfnmacc.vf v10, fa0, v8, v0.t
 488 ; CHECK-NEXT:    vmv.v.v v8, v10
 489 ; CHECK-NEXT:    ret
 490   %elt.head = insertelement <16 x half> poison, half %b, i32 0
 491   %vb = shufflevector <16 x half> %elt.head, <16 x half> poison, <16 x i32> zeroinitializer
 492   %nega = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %a, <16 x i1> splat (i1 -1), i32 %evl)
 493   %negc = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %c, <16 x i1> splat (i1 -1), i32 %evl)
 494   %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %nega, <16 x half> %vb, <16 x half> %negc, <16 x i1> splat (i1 -1), i32 %evl)
 495   %u = call <16 x half> @llvm.vp.select.v16f16(<16 x i1> %m, <16 x half> %v, <16 x half> %c, i32 %evl)
 496   ret <16 x half> %u
 497 }
 498
 499 define <16 x half> @vfnmacc_vf_v16f16_commute_ta(<16 x half> %a, half %b, <16 x half> %c, <16 x i1> %m, i32 zeroext %evl) {
 500 ; CHECK-LABEL: vfnmacc_vf_v16f16_commute_ta:
 501 ; CHECK:       # %bb.0:
 502 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
 503 ; CHECK-NEXT:    vfnmacc.vf v10, fa0, v8, v0.t
 504 ; CHECK-NEXT:    vmv.v.v v8, v10
 505 ; CHECK-NEXT:    ret
 506   %elt.head = insertelement <16 x half> poison, half %b, i32 0
 507   %vb = shufflevector <16 x half> %elt.head, <16 x half> poison, <16 x i32> zeroinitializer
 508   %nega = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %a, <16 x i1> splat (i1 -1), i32 %evl)
 509   %negc = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %c, <16 x i1> splat (i1 -1), i32 %evl)
 510   %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %vb, <16 x half> %nega, <16 x half> %negc, <16 x i1> splat (i1 -1), i32 %evl)
 511   %u = call <16 x half> @llvm.vp.select.v16f16(<16 x i1> %m, <16 x half> %v, <16 x half> %c, i32 %evl)
 512   ret <16 x half> %u
 513 }
 514
 515 declare <32 x half> @llvm.vp.fma.v32f16(<32 x half>, <32 x half>, <32 x half>, <32 x i1>, i32)
 516 declare <32 x half> @llvm.vp.fneg.v32f16(<32 x half>, <32 x i1>, i32)
 517 declare <32 x half> @llvm.vp.merge.v32f16(<32 x i1>, <32 x half>, <32 x half>, i32)
 518 declare <32 x half> @llvm.vp.select.v32f16(<32 x i1>, <32 x half>, <32 x half>, i32)
 519
 520 define <32 x half> @vfnmacc_vv_v32f16(<32 x half> %a, <32 x half> %b, <32 x half> %c, <32 x i1> %m, i32 zeroext %evl) {
 521 ; CHECK-LABEL: vfnmacc_vv_v32f16:
 522 ; CHECK:       # %bb.0:
 523 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
 524 ; CHECK-NEXT:    vfnmacc.vv v16, v8, v12, v0.t
 525 ; CHECK-NEXT:    vmv4r.v v8, v16
 526 ; CHECK-NEXT:    ret
 527   %nega = call <32 x half> @llvm.vp.fneg.v32f16(<32 x half> %a, <32 x i1> splat (i1 -1), i32 %evl)
 528   %negc = call <32 x half> @llvm.vp.fneg.v32f16(<32 x half> %c, <32 x i1> splat (i1 -1), i32 %evl)
 529   %v = call <32 x half> @llvm.vp.fma.v32f16(<32 x half> %nega, <32 x half> %b, <32 x half> %negc, <32 x i1> splat (i1 -1), i32 %evl)
 530   %u = call <32 x half> @llvm.vp.merge.v32f16(<32 x i1> %m, <32 x half> %v, <32 x half> %c, i32 %evl)
 531   ret <32 x half> %u
 532 }
 533
 534 define <32 x half> @vfnmacc_vv_v32f16_unmasked(<32 x half> %a, <32 x half> %b, <32 x half> %c, <32 x i1> %m, i32 zeroext %evl) {
 535 ; CHECK-LABEL: vfnmacc_vv_v32f16_unmasked:
 536 ; CHECK:       # %bb.0:
 537 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, ma
 538 ; CHECK-NEXT:    vfnmacc.vv v16, v8, v12
 539 ; CHECK-NEXT:    vmv4r.v v8, v16
 540 ; CHECK-NEXT:    ret
 541   %nega = call <32 x half> @llvm.vp.fneg.v32f16(<32 x half> %a, <32 x i1> splat (i1 -1), i32 %evl)
 542   %negc = call <32 x half> @llvm.vp.fneg.v32f16(<32 x half> %c, <32 x i1> splat (i1 -1), i32 %evl)
 543   %v = call <32 x half> @llvm.vp.fma.v32f16(<32 x half> %nega, <32 x half> %b, <32 x half> %negc, <32 x i1> splat (i1 -1), i32 %evl)
 544   %u = call <32 x half> @llvm.vp.merge.v32f16(<32 x i1> splat (i1 -1), <32 x half> %v, <32 x half> %c, i32 %evl)
 545   ret <32 x half> %u
 546 }
 547
 548 define <32 x half> @vfnmacc_vf_v32f16(<32 x half> %a, half %b, <32 x half> %c, <32 x i1> %m, i32 zeroext %evl) {
 549 ; CHECK-LABEL: vfnmacc_vf_v32f16:
 550 ; CHECK:       # %bb.0:
 551 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
 552 ; CHECK-NEXT:    vfnmacc.vf v12, fa0, v8, v0.t
 553 ; CHECK-NEXT:    vmv4r.v v8, v12
 554 ; CHECK-NEXT:    ret
 555   %elt.head = insertelement <32 x half> poison, half %b, i32 0
 556   %vb = shufflevector <32 x half> %elt.head, <32 x half> poison, <32 x i32> zeroinitializer
 557   %nega = call <32 x half> @llvm.vp.fneg.v32f16(<32 x half> %a, <32 x i1> splat (i1 -1), i32 %evl)
 558   %negc = call <32 x half> @llvm.vp.fneg.v32f16(<32 x half> %c, <32 x i1> splat (i1 -1), i32 %evl)
 559   %v = call <32 x half> @llvm.vp.fma.v32f16(<32 x half> %nega, <32 x half> %vb, <32 x half> %negc, <32 x i1> splat (i1 -1), i32 %evl)
 560   %u = call <32 x half> @llvm.vp.merge.v32f16(<32 x i1> %m, <32 x half> %v, <32 x half> %c, i32 %evl)
 561   ret <32 x half> %u
 562 }
 563
 564 define <32 x half> @vfnmacc_vf_v32f16_commute(<32 x half> %a, half %b, <32 x half> %c, <32 x i1> %m, i32 zeroext %evl) {
 565 ; CHECK-LABEL: vfnmacc_vf_v32f16_commute:
 566 ; CHECK:       # %bb.0:
 567 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
 568 ; CHECK-NEXT:    vfnmacc.vf v12, fa0, v8, v0.t
 569 ; CHECK-NEXT:    vmv4r.v v8, v12
 570 ; CHECK-NEXT:    ret
 571   %elt.head = insertelement <32 x half> poison, half %b, i32 0
 572   %vb = shufflevector <32 x half> %elt.head, <32 x half> poison, <32 x i32> zeroinitializer
 573   %nega = call <32 x half> @llvm.vp.fneg.v32f16(<32 x half> %a, <32 x i1> splat (i1 -1), i32 %evl)
 574   %negc = call <32 x half> @llvm.vp.fneg.v32f16(<32 x half> %c, <32 x i1> splat (i1 -1), i32 %evl)
 575   %v = call <32 x half> @llvm.vp.fma.v32f16(<32 x half> %vb, <32 x half> %nega, <32 x half> %negc, <32 x i1> splat (i1 -1), i32 %evl)
 576   %u = call <32 x half> @llvm.vp.merge.v32f16(<32 x i1> %m, <32 x half> %v, <32 x half> %c, i32 %evl)
 577   ret <32 x half> %u
 578 }
 579
 580 define <32 x half> @vfnmacc_vf_v32f16_unmasked(<32 x half> %a, half %b, <32 x half> %c, i32 zeroext %evl) {
 581 ; CHECK-LABEL: vfnmacc_vf_v32f16_unmasked:
 582 ; CHECK:       # %bb.0:
 583 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, ma
 584 ; CHECK-NEXT:    vfnmacc.vf v12, fa0, v8
 585 ; CHECK-NEXT:    vmv4r.v v8, v12
 586 ; CHECK-NEXT:    ret
 587   %elt.head = insertelement <32 x half> poison, half %b, i32 0
 588   %vb = shufflevector <32 x half> %elt.head, <32 x half> poison, <32 x i32> zeroinitializer
 589   %nega = call <32 x half> @llvm.vp.fneg.v32f16(<32 x half> %a, <32 x i1> splat (i1 -1), i32 %evl)
 590   %negc = call <32 x half> @llvm.vp.fneg.v32f16(<32 x half> %c, <32 x i1> splat (i1 -1), i32 %evl)
 591   %v = call <32 x half> @llvm.vp.fma.v32f16(<32 x half> %nega, <32 x half> %vb, <32 x half> %negc, <32 x i1> splat (i1 -1), i32 %evl)
 592   %u = call <32 x half> @llvm.vp.merge.v32f16(<32 x i1> splat (i1 -1), <32 x half> %v, <32 x half> %c, i32 %evl)
 593   ret <32 x half> %u
 594 }
 595
 596 define <32 x half> @vfnmacc_vv_v32f16_ta(<32 x half> %a, <32 x half> %b, <32 x half> %c, <32 x i1> %m, i32 zeroext %evl) {
 597 ; CHECK-LABEL: vfnmacc_vv_v32f16_ta:
 598 ; CHECK:       # %bb.0:
 599 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
 600 ; CHECK-NEXT:    vfnmacc.vv v16, v8, v12, v0.t
 601 ; CHECK-NEXT:    vmv.v.v v8, v16
 602 ; CHECK-NEXT:    ret
 603   %nega = call <32 x half> @llvm.vp.fneg.v32f16(<32 x half> %a, <32 x i1> splat (i1 -1), i32 %evl)
 604   %negc = call <32 x half> @llvm.vp.fneg.v32f16(<32 x half> %c, <32 x i1> splat (i1 -1), i32 %evl)
 605   %v = call <32 x half> @llvm.vp.fma.v32f16(<32 x half> %nega, <32 x half> %b, <32 x half> %negc, <32 x i1> splat (i1 -1), i32 %evl)
 606   %u = call <32 x half> @llvm.vp.select.v32f16(<32 x i1> %m, <32 x half> %v, <32 x half> %c, i32 %evl)
 607   ret <32 x half> %u
 608 }
 609
 610 define <32 x half> @vfnmacc_vf_v32f16_ta(<32 x half> %a, half %b, <32 x half> %c, <32 x i1> %m, i32 zeroext %evl) {
 611 ; CHECK-LABEL: vfnmacc_vf_v32f16_ta:
 612 ; CHECK:       # %bb.0:
 613 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
 614 ; CHECK-NEXT:    vfnmacc.vf v12, fa0, v8, v0.t
 615 ; CHECK-NEXT:    vmv.v.v v8, v12
 616 ; CHECK-NEXT:    ret
 617   %elt.head = insertelement <32 x half> poison, half %b, i32 0
 618   %vb = shufflevector <32 x half> %elt.head, <32 x half> poison, <32 x i32> zeroinitializer
 619   %nega = call <32 x half> @llvm.vp.fneg.v32f16(<32 x half> %a, <32 x i1> splat (i1 -1), i32 %evl)
 620   %negc = call <32 x half> @llvm.vp.fneg.v32f16(<32 x half> %c, <32 x i1> splat (i1 -1), i32 %evl)
 621   %v = call <32 x half> @llvm.vp.fma.v32f16(<32 x half> %nega, <32 x half> %vb, <32 x half> %negc, <32 x i1> splat (i1 -1), i32 %evl)
 622   %u = call <32 x half> @llvm.vp.select.v32f16(<32 x i1> %m, <32 x half> %v, <32 x half> %c, i32 %evl)
 623   ret <32 x half> %u
 624 }
 625
 626 define <32 x half> @vfnmacc_vf_v32f16_commute_ta(<32 x half> %a, half %b, <32 x half> %c, <32 x i1> %m, i32 zeroext %evl) {
 627 ; CHECK-LABEL: vfnmacc_vf_v32f16_commute_ta:
 628 ; CHECK:       # %bb.0:
 629 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
 630 ; CHECK-NEXT:    vfnmacc.vf v12, fa0, v8, v0.t
 631 ; CHECK-NEXT:    vmv.v.v v8, v12
 632 ; CHECK-NEXT:    ret
 633   %elt.head = insertelement <32 x half> poison, half %b, i32 0
 634   %vb = shufflevector <32 x half> %elt.head, <32 x half> poison, <32 x i32> zeroinitializer
 635   %nega = call <32 x half> @llvm.vp.fneg.v32f16(<32 x half> %a, <32 x i1> splat (i1 -1), i32 %evl)
 636   %negc = call <32 x half> @llvm.vp.fneg.v32f16(<32 x half> %c, <32 x i1> splat (i1 -1), i32 %evl)
 637   %v = call <32 x half> @llvm.vp.fma.v32f16(<32 x half> %vb, <32 x half> %nega, <32 x half> %negc, <32 x i1> splat (i1 -1), i32 %evl)
 638   %u = call <32 x half> @llvm.vp.select.v32f16(<32 x i1> %m, <32 x half> %v, <32 x half> %c, i32 %evl)
 639   ret <32 x half> %u
 640 }
 641
 642 declare <2 x float> @llvm.vp.fma.v2f32(<2 x float>, <2 x float>, <2 x float>, <2 x i1>, i32)
 643 declare <2 x float> @llvm.vp.fneg.v2f32(<2 x float>, <2 x i1>, i32)
 644 declare <2 x float> @llvm.vp.merge.v2f32(<2 x i1>, <2 x float>, <2 x float>, i32)
 645 declare <2 x float> @llvm.vp.select.v2f32(<2 x i1>, <2 x float>, <2 x float>, i32)
 646
 647 define <2 x float> @vfnmacc_vv_v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c, <2 x i1> %m, i32 zeroext %evl) {
 648 ; CHECK-LABEL: vfnmacc_vv_v2f32:
 649 ; CHECK:       # %bb.0:
 650 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
 651 ; CHECK-NEXT:    vfnmacc.vv v10, v8, v9, v0.t
 652 ; CHECK-NEXT:    vmv1r.v v8, v10
 653 ; CHECK-NEXT:    ret
 654   %nega = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %a, <2 x i1> splat (i1 -1), i32 %evl)
 655   %negc = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %c, <2 x i1> splat (i1 -1), i32 %evl)
 656   %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %nega, <2 x float> %b, <2 x float> %negc, <2 x i1> splat (i1 -1), i32 %evl)
 657   %u = call <2 x float> @llvm.vp.merge.v2f32(<2 x i1> %m, <2 x float> %v, <2 x float> %c, i32 %evl)
 658   ret <2 x float> %u
 659 }
 660
 661 define <2 x float> @vfnmacc_vv_v2f32_unmasked(<2 x float> %a, <2 x float> %b, <2 x float> %c, <2 x i1> %m, i32 zeroext %evl) {
 662 ; CHECK-LABEL: vfnmacc_vv_v2f32_unmasked:
 663 ; CHECK:       # %bb.0:
 664 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, ma
 665 ; CHECK-NEXT:    vfnmacc.vv v10, v8, v9
 666 ; CHECK-NEXT:    vmv1r.v v8, v10
 667 ; CHECK-NEXT:    ret
 668   %nega = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %a, <2 x i1> splat (i1 -1), i32 %evl)
 669   %negc = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %c, <2 x i1> splat (i1 -1), i32 %evl)
 670   %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %nega, <2 x float> %b, <2 x float> %negc, <2 x i1> splat (i1 -1), i32 %evl)
 671   %u = call <2 x float> @llvm.vp.merge.v2f32(<2 x i1> splat (i1 -1), <2 x float> %v, <2 x float> %c, i32 %evl)
 672   ret <2 x float> %u
 673 }
 674
 675 define <2 x float> @vfnmacc_vf_v2f32(<2 x float> %a, float %b, <2 x float> %c, <2 x i1> %m, i32 zeroext %evl) {
 676 ; CHECK-LABEL: vfnmacc_vf_v2f32:
 677 ; CHECK:       # %bb.0:
 678 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
 679 ; CHECK-NEXT:    vfnmacc.vf v9, fa0, v8, v0.t
 680 ; CHECK-NEXT:    vmv1r.v v8, v9
 681 ; CHECK-NEXT:    ret
 682   %elt.head = insertelement <2 x float> poison, float %b, i32 0
 683   %vb = shufflevector <2 x float> %elt.head, <2 x float> poison, <2 x i32> zeroinitializer
 684   %nega = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %a, <2 x i1> splat (i1 -1), i32 %evl)
 685   %negc = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %c, <2 x i1> splat (i1 -1), i32 %evl)
 686   %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %nega, <2 x float> %vb, <2 x float> %negc, <2 x i1> splat (i1 -1), i32 %evl)
 687   %u = call <2 x float> @llvm.vp.merge.v2f32(<2 x i1> %m, <2 x float> %v, <2 x float> %c, i32 %evl)
 688   ret <2 x float> %u
 689 }
 690
 691 define <2 x float> @vfnmacc_vf_v2f32_commute(<2 x float> %a, float %b, <2 x float> %c, <2 x i1> %m, i32 zeroext %evl) {
 692 ; CHECK-LABEL: vfnmacc_vf_v2f32_commute:
 693 ; CHECK:       # %bb.0:
 694 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
 695 ; CHECK-NEXT:    vfnmacc.vf v9, fa0, v8, v0.t
 696 ; CHECK-NEXT:    vmv1r.v v8, v9
 697 ; CHECK-NEXT:    ret
 698   %elt.head = insertelement <2 x float> poison, float %b, i32 0
 699   %vb = shufflevector <2 x float> %elt.head, <2 x float> poison, <2 x i32> zeroinitializer
 700   %nega = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %a, <2 x i1> splat (i1 -1), i32 %evl)
 701   %negc = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %c, <2 x i1> splat (i1 -1), i32 %evl)
 702   %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %vb, <2 x float> %nega, <2 x float> %negc, <2 x i1> splat (i1 -1), i32 %evl)
 703   %u = call <2 x float> @llvm.vp.merge.v2f32(<2 x i1> %m, <2 x float> %v, <2 x float> %c, i32 %evl)
 704   ret <2 x float> %u
 705 }
 706
 707 define <2 x float> @vfnmacc_vf_v2f32_unmasked(<2 x float> %a, float %b, <2 x float> %c, i32 zeroext %evl) {
 708 ; CHECK-LABEL: vfnmacc_vf_v2f32_unmasked:
 709 ; CHECK:       # %bb.0:
 710 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, ma
 711 ; CHECK-NEXT:    vfnmacc.vf v9, fa0, v8
 712 ; CHECK-NEXT:    vmv1r.v v8, v9
 713 ; CHECK-NEXT:    ret
 714   %elt.head = insertelement <2 x float> poison, float %b, i32 0
 715   %vb = shufflevector <2 x float> %elt.head, <2 x float> poison, <2 x i32> zeroinitializer
 716   %nega = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %a, <2 x i1> splat (i1 -1), i32 %evl)
 717   %negc = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %c, <2 x i1> splat (i1 -1), i32 %evl)
 718   %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %nega, <2 x float> %vb, <2 x float> %negc, <2 x i1> splat (i1 -1), i32 %evl)
 719   %u = call <2 x float> @llvm.vp.merge.v2f32(<2 x i1> splat (i1 -1), <2 x float> %v, <2 x float> %c, i32 %evl)
 720   ret <2 x float> %u
 721 }
 722
 723 define <2 x float> @vfnmacc_vv_v2f32_ta(<2 x float> %a, <2 x float> %b, <2 x float> %c, <2 x i1> %m, i32 zeroext %evl) {
 724 ; CHECK-LABEL: vfnmacc_vv_v2f32_ta:
 725 ; CHECK:       # %bb.0:
 726 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
 727 ; CHECK-NEXT:    vfnmacc.vv v10, v8, v9, v0.t
 728 ; CHECK-NEXT:    vmv1r.v v8, v10
 729 ; CHECK-NEXT:    ret
 730   %nega = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %a, <2 x i1> splat (i1 -1), i32 %evl)
 731   %negc = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %c, <2 x i1> splat (i1 -1), i32 %evl)
 732   %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %nega, <2 x float> %b, <2 x float> %negc, <2 x i1> splat (i1 -1), i32 %evl)
 733   %u = call <2 x float> @llvm.vp.select.v2f32(<2 x i1> %m, <2 x float> %v, <2 x float> %c, i32 %evl)
 734   ret <2 x float> %u
 735 }
 736
 737 define <2 x float> @vfnmacc_vf_v2f32_ta(<2 x float> %a, float %b, <2 x float> %c, <2 x i1> %m, i32 zeroext %evl) {
 738 ; CHECK-LABEL: vfnmacc_vf_v2f32_ta:
 739 ; CHECK:       # %bb.0:
 740 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
 741 ; CHECK-NEXT:    vfnmacc.vf v9, fa0, v8, v0.t
 742 ; CHECK-NEXT:    vmv1r.v v8, v9
 743 ; CHECK-NEXT:    ret
 744   %elt.head = insertelement <2 x float> poison, float %b, i32 0
 745   %vb = shufflevector <2 x float> %elt.head, <2 x float> poison, <2 x i32> zeroinitializer
 746   %nega = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %a, <2 x i1> splat (i1 -1), i32 %evl)
 747   %negc = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %c, <2 x i1> splat (i1 -1), i32 %evl)
 748   %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %nega, <2 x float> %vb, <2 x float> %negc, <2 x i1> splat (i1 -1), i32 %evl)
 749   %u = call <2 x float> @llvm.vp.select.v2f32(<2 x i1> %m, <2 x float> %v, <2 x float> %c, i32 %evl)
 750   ret <2 x float> %u
 751 }
 752
 753 define <2 x float> @vfnmacc_vf_v2f32_commute_ta(<2 x float> %a, float %b, <2 x float> %c, <2 x i1> %m, i32 zeroext %evl) {
 754 ; CHECK-LABEL: vfnmacc_vf_v2f32_commute_ta:
 755 ; CHECK:       # %bb.0:
 756 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
 757 ; CHECK-NEXT:    vfnmacc.vf v9, fa0, v8, v0.t
 758 ; CHECK-NEXT:    vmv1r.v v8, v9
 759 ; CHECK-NEXT:    ret
 760   %elt.head = insertelement <2 x float> poison, float %b, i32 0
 761   %vb = shufflevector <2 x float> %elt.head, <2 x float> poison, <2 x i32> zeroinitializer
 762   %nega = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %a, <2 x i1> splat (i1 -1), i32 %evl)
 763   %negc = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %c, <2 x i1> splat (i1 -1), i32 %evl)
 764   %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %vb, <2 x float> %nega, <2 x float> %negc, <2 x i1> splat (i1 -1), i32 %evl)
 765   %u = call <2 x float> @llvm.vp.select.v2f32(<2 x i1> %m, <2 x float> %v, <2 x float> %c, i32 %evl)
 766   ret <2 x float> %u
 767 }
 768
 769 declare <4 x float> @llvm.vp.fma.v4f32(<4 x float>, <4 x float>, <4 x float>, <4 x i1>, i32)
 770 declare <4 x float> @llvm.vp.fneg.v4f32(<4 x float>, <4 x i1>, i32)
 771 declare <4 x float> @llvm.vp.merge.v4f32(<4 x i1>, <4 x float>, <4 x float>, i32)
 772 declare <4 x float> @llvm.vp.select.v4f32(<4 x i1>, <4 x float>, <4 x float>, i32)
 773
 774 define <4 x float> @vfnmacc_vv_v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x i1> %m, i32 zeroext %evl) {
 775 ; CHECK-LABEL: vfnmacc_vv_v4f32:
 776 ; CHECK:       # %bb.0:
 777 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
 778 ; CHECK-NEXT:    vfnmacc.vv v10, v8, v9, v0.t
 779 ; CHECK-NEXT:    vmv1r.v v8, v10
 780 ; CHECK-NEXT:    ret
 781   %nega = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %a, <4 x i1> splat (i1 -1), i32 %evl)
 782   %negc = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %c, <4 x i1> splat (i1 -1), i32 %evl)
 783   %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %nega, <4 x float> %b, <4 x float> %negc, <4 x i1> splat (i1 -1), i32 %evl)
 784   %u = call <4 x float> @llvm.vp.merge.v4f32(<4 x i1> %m, <4 x float> %v, <4 x float> %c, i32 %evl)
 785   ret <4 x float> %u
 786 }
 787
 788 define <4 x float> @vfnmacc_vv_v4f32_unmasked(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x i1> %m, i32 zeroext %evl) {
 789 ; CHECK-LABEL: vfnmacc_vv_v4f32_unmasked:
 790 ; CHECK:       # %bb.0:
 791 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, ma
 792 ; CHECK-NEXT:    vfnmacc.vv v10, v8, v9
 793 ; CHECK-NEXT:    vmv1r.v v8, v10
 794 ; CHECK-NEXT:    ret
 795   %nega = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %a, <4 x i1> splat (i1 -1), i32 %evl)
 796   %negc = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %c, <4 x i1> splat (i1 -1), i32 %evl)
 797   %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %nega, <4 x float> %b, <4 x float> %negc, <4 x i1> splat (i1 -1), i32 %evl)
 798   %u = call <4 x float> @llvm.vp.merge.v4f32(<4 x i1> splat (i1 -1), <4 x float> %v, <4 x float> %c, i32 %evl)
 799   ret <4 x float> %u
 800 }
 801
 802 define <4 x float> @vfnmacc_vf_v4f32(<4 x float> %a, float %b, <4 x float> %c, <4 x i1> %m, i32 zeroext %evl) {
 803 ; CHECK-LABEL: vfnmacc_vf_v4f32:
 804 ; CHECK:       # %bb.0:
 805 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
 806 ; CHECK-NEXT:    vfnmacc.vf v9, fa0, v8, v0.t
 807 ; CHECK-NEXT:    vmv1r.v v8, v9
 808 ; CHECK-NEXT:    ret
 809   %elt.head = insertelement <4 x float> poison, float %b, i32 0
 810   %vb = shufflevector <4 x float> %elt.head, <4 x float> poison, <4 x i32> zeroinitializer
 811   %nega = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %a, <4 x i1> splat (i1 -1), i32 %evl)
 812   %negc = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %c, <4 x i1> splat (i1 -1), i32 %evl)
 813   %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %nega, <4 x float> %vb, <4 x float> %negc, <4 x i1> splat (i1 -1), i32 %evl)
 814   %u = call <4 x float> @llvm.vp.merge.v4f32(<4 x i1> %m, <4 x float> %v, <4 x float> %c, i32 %evl)
 815   ret <4 x float> %u
 816 }
 817
 818 define <4 x float> @vfnmacc_vf_v4f32_commute(<4 x float> %a, float %b, <4 x float> %c, <4 x i1> %m, i32 zeroext %evl) {
 819 ; CHECK-LABEL: vfnmacc_vf_v4f32_commute:
 820 ; CHECK:       # %bb.0:
 821 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
 822 ; CHECK-NEXT:    vfnmacc.vf v9, fa0, v8, v0.t
 823 ; CHECK-NEXT:    vmv1r.v v8, v9
 824 ; CHECK-NEXT:    ret
 825   %elt.head = insertelement <4 x float> poison, float %b, i32 0
 826   %vb = shufflevector <4 x float> %elt.head, <4 x float> poison, <4 x i32> zeroinitializer
 827   %nega = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %a, <4 x i1> splat (i1 -1), i32 %evl)
 828   %negc = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %c, <4 x i1> splat (i1 -1), i32 %evl)
 829   %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %vb, <4 x float> %nega, <4 x float> %negc, <4 x i1> splat (i1 -1), i32 %evl)
 830   %u = call <4 x float> @llvm.vp.merge.v4f32(<4 x i1> %m, <4 x float> %v, <4 x float> %c, i32 %evl)
 831   ret <4 x float> %u
 832 }
 833
 834 define <4 x float> @vfnmacc_vf_v4f32_unmasked(<4 x float> %a, float %b, <4 x float> %c, i32 zeroext %evl) {
 835 ; CHECK-LABEL: vfnmacc_vf_v4f32_unmasked:
 836 ; CHECK:       # %bb.0:
 837 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, ma
 838 ; CHECK-NEXT:    vfnmacc.vf v9, fa0, v8
 839 ; CHECK-NEXT:    vmv1r.v v8, v9
 840 ; CHECK-NEXT:    ret
 841   %elt.head = insertelement <4 x float> poison, float %b, i32 0
 842   %vb = shufflevector <4 x float> %elt.head, <4 x float> poison, <4 x i32> zeroinitializer
 843   %nega = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %a, <4 x i1> splat (i1 -1), i32 %evl)
 844   %negc = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %c, <4 x i1> splat (i1 -1), i32 %evl)
 845   %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %nega, <4 x float> %vb, <4 x float> %negc, <4 x i1> splat (i1 -1), i32 %evl)
 846   %u = call <4 x float> @llvm.vp.merge.v4f32(<4 x i1> splat (i1 -1), <4 x float> %v, <4 x float> %c, i32 %evl)
 847   ret <4 x float> %u
 848 }
 849
 850 define <4 x float> @vfnmacc_vv_v4f32_ta(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x i1> %m, i32 zeroext %evl) {
 851 ; CHECK-LABEL: vfnmacc_vv_v4f32_ta:
 852 ; CHECK:       # %bb.0:
 853 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
 854 ; CHECK-NEXT:    vfnmacc.vv v10, v8, v9, v0.t
 855 ; CHECK-NEXT:    vmv.v.v v8, v10
 856 ; CHECK-NEXT:    ret
 857   %nega = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %a, <4 x i1> splat (i1 -1), i32 %evl)
 858   %negc = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %c, <4 x i1> splat (i1 -1), i32 %evl)
 859   %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %nega, <4 x float> %b, <4 x float> %negc, <4 x i1> splat (i1 -1), i32 %evl)
 860   %u = call <4 x float> @llvm.vp.select.v4f32(<4 x i1> %m, <4 x float> %v, <4 x float> %c, i32 %evl)
 861   ret <4 x float> %u
 862 }
 863
 864 define <4 x float> @vfnmacc_vf_v4f32_ta(<4 x float> %a, float %b, <4 x float> %c, <4 x i1> %m, i32 zeroext %evl) {
 865 ; CHECK-LABEL: vfnmacc_vf_v4f32_ta:
 866 ; CHECK:       # %bb.0:
 867 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
 868 ; CHECK-NEXT:    vfnmacc.vf v9, fa0, v8, v0.t
 869 ; CHECK-NEXT:    vmv.v.v v8, v9
 870 ; CHECK-NEXT:    ret
 871   %elt.head = insertelement <4 x float> poison, float %b, i32 0
 872   %vb = shufflevector <4 x float> %elt.head, <4 x float> poison, <4 x i32> zeroinitializer
 873   %nega = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %a, <4 x i1> splat (i1 -1), i32 %evl)
 874   %negc = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %c, <4 x i1> splat (i1 -1), i32 %evl)
 875   %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %nega, <4 x float> %vb, <4 x float> %negc, <4 x i1> splat (i1 -1), i32 %evl)
 876   %u = call <4 x float> @llvm.vp.select.v4f32(<4 x i1> %m, <4 x float> %v, <4 x float> %c, i32 %evl)
 877   ret <4 x float> %u
 878 }
 879
 880 define <4 x float> @vfnmacc_vf_v4f32_commute_ta(<4 x float> %a, float %b, <4 x float> %c, <4 x i1> %m, i32 zeroext %evl) {
 881 ; CHECK-LABEL: vfnmacc_vf_v4f32_commute_ta:
 882 ; CHECK:       # %bb.0:
 883 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
 884 ; CHECK-NEXT:    vfnmacc.vf v9, fa0, v8, v0.t
 885 ; CHECK-NEXT:    vmv.v.v v8, v9
 886 ; CHECK-NEXT:    ret
 887   %elt.head = insertelement <4 x float> poison, float %b, i32 0
 888   %vb = shufflevector <4 x float> %elt.head, <4 x float> poison, <4 x i32> zeroinitializer
 889   %nega = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %a, <4 x i1> splat (i1 -1), i32 %evl)
 890   %negc = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %c, <4 x i1> splat (i1 -1), i32 %evl)
 891   %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %vb, <4 x float> %nega, <4 x float> %negc, <4 x i1> splat (i1 -1), i32 %evl)
 892   %u = call <4 x float> @llvm.vp.select.v4f32(<4 x i1> %m, <4 x float> %v, <4 x float> %c, i32 %evl)
 893   ret <4 x float> %u
 894 }
 895
 896 declare <8 x float> @llvm.vp.fma.v8f32(<8 x float>, <8 x float>, <8 x float>, <8 x i1>, i32)
 897 declare <8 x float> @llvm.vp.fneg.v8f32(<8 x float>, <8 x i1>, i32)
 898 declare <8 x float> @llvm.vp.merge.v8f32(<8 x i1>, <8 x float>, <8 x float>, i32)
 899 declare <8 x float> @llvm.vp.select.v8f32(<8 x i1>, <8 x float>, <8 x float>, i32)
 900
 901 define <8 x float> @vfnmacc_vv_v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c, <8 x i1> %m, i32 zeroext %evl) {
 902 ; CHECK-LABEL: vfnmacc_vv_v8f32:
 903 ; CHECK:       # %bb.0:
 904 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
 905 ; CHECK-NEXT:    vfnmacc.vv v12, v8, v10, v0.t
 906 ; CHECK-NEXT:    vmv2r.v v8, v12
 907 ; CHECK-NEXT:    ret
 908   %nega = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %a, <8 x i1> splat (i1 -1), i32 %evl)
 909   %negc = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %c, <8 x i1> splat (i1 -1), i32 %evl)
 910   %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %nega, <8 x float> %b, <8 x float> %negc, <8 x i1> splat (i1 -1), i32 %evl)
 911   %u = call <8 x float> @llvm.vp.merge.v8f32(<8 x i1> %m, <8 x float> %v, <8 x float> %c, i32 %evl)
 912   ret <8 x float> %u
 913 }
 914
 915 define <8 x float> @vfnmacc_vv_v8f32_unmasked(<8 x float> %a, <8 x float> %b, <8 x float> %c, <8 x i1> %m, i32 zeroext %evl) {
 916 ; CHECK-LABEL: vfnmacc_vv_v8f32_unmasked:
 917 ; CHECK:       # %bb.0:
 918 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, ma
 919 ; CHECK-NEXT:    vfnmacc.vv v12, v8, v10
 920 ; CHECK-NEXT:    vmv2r.v v8, v12
 921 ; CHECK-NEXT:    ret
 922   %nega = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %a, <8 x i1> splat (i1 -1), i32 %evl)
 923   %negc = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %c, <8 x i1> splat (i1 -1), i32 %evl)
 924   %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %nega, <8 x float> %b, <8 x float> %negc, <8 x i1> splat (i1 -1), i32 %evl)
 925   %u = call <8 x float> @llvm.vp.merge.v8f32(<8 x i1> splat (i1 -1), <8 x float> %v, <8 x float> %c, i32 %evl)
 926   ret <8 x float> %u
 927 }
 928
 929 define <8 x float> @vfnmacc_vf_v8f32(<8 x float> %a, float %b, <8 x float> %c, <8 x i1> %m, i32 zeroext %evl) {
 930 ; CHECK-LABEL: vfnmacc_vf_v8f32:
 931 ; CHECK:       # %bb.0:
 932 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
 933 ; CHECK-NEXT:    vfnmacc.vf v10, fa0, v8, v0.t
 934 ; CHECK-NEXT:    vmv2r.v v8, v10
 935 ; CHECK-NEXT:    ret
 936   %elt.head = insertelement <8 x float> poison, float %b, i32 0
 937   %vb = shufflevector <8 x float> %elt.head, <8 x float> poison, <8 x i32> zeroinitializer
 938   %nega = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %a, <8 x i1> splat (i1 -1), i32 %evl)
 939   %negc = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %c, <8 x i1> splat (i1 -1), i32 %evl)
 940   %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %nega, <8 x float> %vb, <8 x float> %negc, <8 x i1> splat (i1 -1), i32 %evl)
 941   %u = call <8 x float> @llvm.vp.merge.v8f32(<8 x i1> %m, <8 x float> %v, <8 x float> %c, i32 %evl)
 942   ret <8 x float> %u
 943 }
 944
 945 define <8 x float> @vfnmacc_vf_v8f32_commute(<8 x float> %a, float %b, <8 x float> %c, <8 x i1> %m, i32 zeroext %evl) {
 946 ; CHECK-LABEL: vfnmacc_vf_v8f32_commute:
 947 ; CHECK:       # %bb.0:
 948 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
 949 ; CHECK-NEXT:    vfnmacc.vf v10, fa0, v8, v0.t
 950 ; CHECK-NEXT:    vmv2r.v v8, v10
 951 ; CHECK-NEXT:    ret
 952   %elt.head = insertelement <8 x float> poison, float %b, i32 0
 953   %vb = shufflevector <8 x float> %elt.head, <8 x float> poison, <8 x i32> zeroinitializer
 954   %nega = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %a, <8 x i1> splat (i1 -1), i32 %evl)
 955   %negc = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %c, <8 x i1> splat (i1 -1), i32 %evl)
 956   %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %vb, <8 x float> %nega, <8 x float> %negc, <8 x i1> splat (i1 -1), i32 %evl)
 957   %u = call <8 x float> @llvm.vp.merge.v8f32(<8 x i1> %m, <8 x float> %v, <8 x float> %c, i32 %evl)
 958   ret <8 x float> %u
 959 }
 960
 961 define <8 x float> @vfnmacc_vf_v8f32_unmasked(<8 x float> %a, float %b, <8 x float> %c, i32 zeroext %evl) {
 962 ; CHECK-LABEL: vfnmacc_vf_v8f32_unmasked:
 963 ; CHECK:       # %bb.0:
 964 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, ma
 965 ; CHECK-NEXT:    vfnmacc.vf v10, fa0, v8
 966 ; CHECK-NEXT:    vmv2r.v v8, v10
 967 ; CHECK-NEXT:    ret
 968   %elt.head = insertelement <8 x float> poison, float %b, i32 0
 969   %vb = shufflevector <8 x float> %elt.head, <8 x float> poison, <8 x i32> zeroinitializer
 970   %nega = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %a, <8 x i1> splat (i1 -1), i32 %evl)
 971   %negc = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %c, <8 x i1> splat (i1 -1), i32 %evl)
 972   %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %nega, <8 x float> %vb, <8 x float> %negc, <8 x i1> splat (i1 -1), i32 %evl)
 973   %u = call <8 x float> @llvm.vp.merge.v8f32(<8 x i1> splat (i1 -1), <8 x float> %v, <8 x float> %c, i32 %evl)
 974   ret <8 x float> %u
 975 }
 976
 977 define <8 x float> @vfnmacc_vv_v8f32_ta(<8 x float> %a, <8 x float> %b, <8 x float> %c, <8 x i1> %m, i32 zeroext %evl) {
 978 ; CHECK-LABEL: vfnmacc_vv_v8f32_ta:
 979 ; CHECK:       # %bb.0:
 980 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
 981 ; CHECK-NEXT:    vfnmacc.vv v12, v8, v10, v0.t
 982 ; CHECK-NEXT:    vmv.v.v v8, v12
 983 ; CHECK-NEXT:    ret
 984   %nega = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %a, <8 x i1> splat (i1 -1), i32 %evl)
 985   %negc = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %c, <8 x i1> splat (i1 -1), i32 %evl)
 986   %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %nega, <8 x float> %b, <8 x float> %negc, <8 x i1> splat (i1 -1), i32 %evl)
 987   %u = call <8 x float> @llvm.vp.select.v8f32(<8 x i1> %m, <8 x float> %v, <8 x float> %c, i32 %evl)
 988   ret <8 x float> %u
 989 }
 990
 991 define <8 x float> @vfnmacc_vf_v8f32_ta(<8 x float> %a, float %b, <8 x float> %c, <8 x i1> %m, i32 zeroext %evl) {
 992 ; CHECK-LABEL: vfnmacc_vf_v8f32_ta:
 993 ; CHECK:       # %bb.0:
 994 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
 995 ; CHECK-NEXT:    vfnmacc.vf v10, fa0, v8, v0.t
 996 ; CHECK-NEXT:    vmv.v.v v8, v10
 997 ; CHECK-NEXT:    ret
 998   %elt.head = insertelement <8 x float> poison, float %b, i32 0
 999   %vb = shufflevector <8 x float> %elt.head, <8 x float> poison, <8 x i32> zeroinitializer
1000   %nega = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %a, <8 x i1> splat (i1 -1), i32 %evl)
1001   %negc = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %c, <8 x i1> splat (i1 -1), i32 %evl)
1002   %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %nega, <8 x float> %vb, <8 x float> %negc, <8 x i1> splat (i1 -1), i32 %evl)
1003   %u = call <8 x float> @llvm.vp.select.v8f32(<8 x i1> %m, <8 x float> %v, <8 x float> %c, i32 %evl)
1004   ret <8 x float> %u
1005 }
1006
1007 define <8 x float> @vfnmacc_vf_v8f32_commute_ta(<8 x float> %a, float %b, <8 x float> %c, <8 x i1> %m, i32 zeroext %evl) {
1008 ; CHECK-LABEL: vfnmacc_vf_v8f32_commute_ta:
1009 ; CHECK:       # %bb.0:
1010 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
1011 ; CHECK-NEXT:    vfnmacc.vf v10, fa0, v8, v0.t
1012 ; CHECK-NEXT:    vmv.v.v v8, v10
1013 ; CHECK-NEXT:    ret
1014   %elt.head = insertelement <8 x float> poison, float %b, i32 0
1015   %vb = shufflevector <8 x float> %elt.head, <8 x float> poison, <8 x i32> zeroinitializer
1016   %nega = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %a, <8 x i1> splat (i1 -1), i32 %evl)
1017   %negc = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %c, <8 x i1> splat (i1 -1), i32 %evl)
1018   %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %vb, <8 x float> %nega, <8 x float> %negc, <8 x i1> splat (i1 -1), i32 %evl)
1019   %u = call <8 x float> @llvm.vp.select.v8f32(<8 x i1> %m, <8 x float> %v, <8 x float> %c, i32 %evl)
1020   ret <8 x float> %u
1021 }
1022
1023 declare <16 x float> @llvm.vp.fma.v16f32(<16 x float>, <16 x float>, <16 x float>, <16 x i1>, i32)
1024 declare <16 x float> @llvm.vp.fneg.v16f32(<16 x float>, <16 x i1>, i32)
1025 declare <16 x float> @llvm.vp.merge.v16f32(<16 x i1>, <16 x float>, <16 x float>, i32)
1026 declare <16 x float> @llvm.vp.select.v16f32(<16 x i1>, <16 x float>, <16 x float>, i32)
1027
1028 define <16 x float> @vfnmacc_vv_v16f32(<16 x float> %a, <16 x float> %b, <16 x float> %c, <16 x i1> %m, i32 zeroext %evl) {
1029 ; CHECK-LABEL: vfnmacc_vv_v16f32:
1030 ; CHECK:       # %bb.0:
1031 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
1032 ; CHECK-NEXT:    vfnmacc.vv v16, v8, v12, v0.t
1033 ; CHECK-NEXT:    vmv4r.v v8, v16
1034 ; CHECK-NEXT:    ret
1035   %nega = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %a, <16 x i1> splat (i1 -1), i32 %evl)
1036   %negc = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %c, <16 x i1> splat (i1 -1), i32 %evl)
1037   %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %nega, <16 x float> %b, <16 x float> %negc, <16 x i1> splat (i1 -1), i32 %evl)
1038   %u = call <16 x float> @llvm.vp.merge.v16f32(<16 x i1> %m, <16 x float> %v, <16 x float> %c, i32 %evl)
1039   ret <16 x float> %u
1040 }
1041
1042 define <16 x float> @vfnmacc_vv_v16f32_unmasked(<16 x float> %a, <16 x float> %b, <16 x float> %c, <16 x i1> %m, i32 zeroext %evl) {
1043 ; CHECK-LABEL: vfnmacc_vv_v16f32_unmasked:
1044 ; CHECK:       # %bb.0:
1045 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, ma
1046 ; CHECK-NEXT:    vfnmacc.vv v16, v8, v12
1047 ; CHECK-NEXT:    vmv4r.v v8, v16
1048 ; CHECK-NEXT:    ret
1049   %nega = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %a, <16 x i1> splat (i1 -1), i32 %evl)
1050   %negc = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %c, <16 x i1> splat (i1 -1), i32 %evl)
1051   %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %nega, <16 x float> %b, <16 x float> %negc, <16 x i1> splat (i1 -1), i32 %evl)
1052   %u = call <16 x float> @llvm.vp.merge.v16f32(<16 x i1> splat (i1 -1), <16 x float> %v, <16 x float> %c, i32 %evl)
1053   ret <16 x float> %u
1054 }
1055
1056 define <16 x float> @vfnmacc_vf_v16f32(<16 x float> %a, float %b, <16 x float> %c, <16 x i1> %m, i32 zeroext %evl) {
1057 ; CHECK-LABEL: vfnmacc_vf_v16f32:
1058 ; CHECK:       # %bb.0:
1059 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
1060 ; CHECK-NEXT:    vfnmacc.vf v12, fa0, v8, v0.t
1061 ; CHECK-NEXT:    vmv4r.v v8, v12
1062 ; CHECK-NEXT:    ret
1063   %elt.head = insertelement <16 x float> poison, float %b, i32 0
1064   %vb = shufflevector <16 x float> %elt.head, <16 x float> poison, <16 x i32> zeroinitializer
1065   %nega = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %a, <16 x i1> splat (i1 -1), i32 %evl)
1066   %negc = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %c, <16 x i1> splat (i1 -1), i32 %evl)
1067   %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %nega, <16 x float> %vb, <16 x float> %negc, <16 x i1> splat (i1 -1), i32 %evl)
1068   %u = call <16 x float> @llvm.vp.merge.v16f32(<16 x i1> %m, <16 x float> %v, <16 x float> %c, i32 %evl)
1069   ret <16 x float> %u
1070 }
1071
1072 define <16 x float> @vfnmacc_vf_v16f32_commute(<16 x float> %a, float %b, <16 x float> %c, <16 x i1> %m, i32 zeroext %evl) {
1073 ; CHECK-LABEL: vfnmacc_vf_v16f32_commute:
1074 ; CHECK:       # %bb.0:
1075 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
1076 ; CHECK-NEXT:    vfnmacc.vf v12, fa0, v8, v0.t
1077 ; CHECK-NEXT:    vmv4r.v v8, v12
1078 ; CHECK-NEXT:    ret
1079   %elt.head = insertelement <16 x float> poison, float %b, i32 0
1080   %vb = shufflevector <16 x float> %elt.head, <16 x float> poison, <16 x i32> zeroinitializer
1081   %nega = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %a, <16 x i1> splat (i1 -1), i32 %evl)
1082   %negc = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %c, <16 x i1> splat (i1 -1), i32 %evl)
1083   %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %vb, <16 x float> %nega, <16 x float> %negc, <16 x i1> splat (i1 -1), i32 %evl)
1084   %u = call <16 x float> @llvm.vp.merge.v16f32(<16 x i1> %m, <16 x float> %v, <16 x float> %c, i32 %evl)
1085   ret <16 x float> %u
1086 }
1087
1088 define <16 x float> @vfnmacc_vf_v16f32_unmasked(<16 x float> %a, float %b, <16 x float> %c, i32 zeroext %evl) {
1089 ; CHECK-LABEL: vfnmacc_vf_v16f32_unmasked:
1090 ; CHECK:       # %bb.0:
1091 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, ma
1092 ; CHECK-NEXT:    vfnmacc.vf v12, fa0, v8
1093 ; CHECK-NEXT:    vmv4r.v v8, v12
1094 ; CHECK-NEXT:    ret
1095   %elt.head = insertelement <16 x float> poison, float %b, i32 0
1096   %vb = shufflevector <16 x float> %elt.head, <16 x float> poison, <16 x i32> zeroinitializer
1097   %nega = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %a, <16 x i1> splat (i1 -1), i32 %evl)
1098   %negc = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %c, <16 x i1> splat (i1 -1), i32 %evl)
1099   %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %nega, <16 x float> %vb, <16 x float> %negc, <16 x i1> splat (i1 -1), i32 %evl)
1100   %u = call <16 x float> @llvm.vp.merge.v16f32(<16 x i1> splat (i1 -1), <16 x float> %v, <16 x float> %c, i32 %evl)
1101   ret <16 x float> %u
1102 }
1103
1104 define <16 x float> @vfnmacc_vv_v16f32_ta(<16 x float> %a, <16 x float> %b, <16 x float> %c, <16 x i1> %m, i32 zeroext %evl) {
1105 ; CHECK-LABEL: vfnmacc_vv_v16f32_ta:
1106 ; CHECK:       # %bb.0:
1107 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
1108 ; CHECK-NEXT:    vfnmacc.vv v16, v8, v12, v0.t
1109 ; CHECK-NEXT:    vmv.v.v v8, v16
1110 ; CHECK-NEXT:    ret
1111   %nega = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %a, <16 x i1> splat (i1 -1), i32 %evl)
1112   %negc = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %c, <16 x i1> splat (i1 -1), i32 %evl)
1113   %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %nega, <16 x float> %b, <16 x float> %negc, <16 x i1> splat (i1 -1), i32 %evl)
1114   %u = call <16 x float> @llvm.vp.select.v16f32(<16 x i1> %m, <16 x float> %v, <16 x float> %c, i32 %evl)
1115   ret <16 x float> %u
1116 }
1117
1118 define <16 x float> @vfnmacc_vf_v16f32_ta(<16 x float> %a, float %b, <16 x float> %c, <16 x i1> %m, i32 zeroext %evl) {
1119 ; CHECK-LABEL: vfnmacc_vf_v16f32_ta:
1120 ; CHECK:       # %bb.0:
1121 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
1122 ; CHECK-NEXT:    vfnmacc.vf v12, fa0, v8, v0.t
1123 ; CHECK-NEXT:    vmv.v.v v8, v12
1124 ; CHECK-NEXT:    ret
1125   %elt.head = insertelement <16 x float> poison, float %b, i32 0
1126   %vb = shufflevector <16 x float> %elt.head, <16 x float> poison, <16 x i32> zeroinitializer
1127   %nega = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %a, <16 x i1> splat (i1 -1), i32 %evl)
1128   %negc = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %c, <16 x i1> splat (i1 -1), i32 %evl)
1129   %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %nega, <16 x float> %vb, <16 x float> %negc, <16 x i1> splat (i1 -1), i32 %evl)
1130   %u = call <16 x float> @llvm.vp.select.v16f32(<16 x i1> %m, <16 x float> %v, <16 x float> %c, i32 %evl)
1131   ret <16 x float> %u
1132 }
1133
1134 define <16 x float> @vfnmacc_vf_v16f32_commute_ta(<16 x float> %a, float %b, <16 x float> %c, <16 x i1> %m, i32 zeroext %evl) {
1135 ; CHECK-LABEL: vfnmacc_vf_v16f32_commute_ta:
1136 ; CHECK:       # %bb.0:
1137 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
1138 ; CHECK-NEXT:    vfnmacc.vf v12, fa0, v8, v0.t
1139 ; CHECK-NEXT:    vmv.v.v v8, v12
1140 ; CHECK-NEXT:    ret
1141   %elt.head = insertelement <16 x float> poison, float %b, i32 0
1142   %vb = shufflevector <16 x float> %elt.head, <16 x float> poison, <16 x i32> zeroinitializer
1143   %nega = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %a, <16 x i1> splat (i1 -1), i32 %evl)
1144   %negc = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %c, <16 x i1> splat (i1 -1), i32 %evl)
1145   %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %vb, <16 x float> %nega, <16 x float> %negc, <16 x i1> splat (i1 -1), i32 %evl)
1146   %u = call <16 x float> @llvm.vp.select.v16f32(<16 x i1> %m, <16 x float> %v, <16 x float> %c, i32 %evl)
1147   ret <16 x float> %u
1148 }
1149
1150 declare <2 x double> @llvm.vp.fma.v2f64(<2 x double>, <2 x double>, <2 x double>, <2 x i1>, i32)
1151 declare <2 x double> @llvm.vp.fneg.v2f64(<2 x double>, <2 x i1>, i32)
1152 declare <2 x double> @llvm.vp.merge.v2f64(<2 x i1>, <2 x double>, <2 x double>, i32)
1153 declare <2 x double> @llvm.vp.select.v2f64(<2 x i1>, <2 x double>, <2 x double>, i32)
1154
1155 define <2 x double> @vfnmacc_vv_v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c, <2 x i1> %m, i32 zeroext %evl) {
1156 ; CHECK-LABEL: vfnmacc_vv_v2f64:
1157 ; CHECK:       # %bb.0:
1158 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
1159 ; CHECK-NEXT:    vfnmacc.vv v10, v8, v9, v0.t
1160 ; CHECK-NEXT:    vmv1r.v v8, v10
1161 ; CHECK-NEXT:    ret
1162   %nega = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %a, <2 x i1> splat (i1 -1), i32 %evl)
1163   %negc = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %c, <2 x i1> splat (i1 -1), i32 %evl)
1164   %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %nega, <2 x double> %b, <2 x double> %negc, <2 x i1> splat (i1 -1), i32 %evl)
1165   %u = call <2 x double> @llvm.vp.merge.v2f64(<2 x i1> %m, <2 x double> %v, <2 x double> %c, i32 %evl)
1166   ret <2 x double> %u
1167 }
1168
1169 define <2 x double> @vfnmacc_vv_v2f64_unmasked(<2 x double> %a, <2 x double> %b, <2 x double> %c, <2 x i1> %m, i32 zeroext %evl) {
1170 ; CHECK-LABEL: vfnmacc_vv_v2f64_unmasked:
1171 ; CHECK:       # %bb.0:
1172 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, ma
1173 ; CHECK-NEXT:    vfnmacc.vv v10, v8, v9
1174 ; CHECK-NEXT:    vmv1r.v v8, v10
1175 ; CHECK-NEXT:    ret
1176   %nega = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %a, <2 x i1> splat (i1 -1), i32 %evl)
1177   %negc = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %c, <2 x i1> splat (i1 -1), i32 %evl)
1178   %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %nega, <2 x double> %b, <2 x double> %negc, <2 x i1> splat (i1 -1), i32 %evl)
1179   %u = call <2 x double> @llvm.vp.merge.v2f64(<2 x i1> splat (i1 -1), <2 x double> %v, <2 x double> %c, i32 %evl)
1180   ret <2 x double> %u
1181 }
1182
1183 define <2 x double> @vfnmacc_vf_v2f64(<2 x double> %a, double %b, <2 x double> %c, <2 x i1> %m, i32 zeroext %evl) {
1184 ; CHECK-LABEL: vfnmacc_vf_v2f64:
1185 ; CHECK:       # %bb.0:
1186 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
1187 ; CHECK-NEXT:    vfnmacc.vf v9, fa0, v8, v0.t
1188 ; CHECK-NEXT:    vmv1r.v v8, v9
1189 ; CHECK-NEXT:    ret
1190   %elt.head = insertelement <2 x double> poison, double %b, i32 0
1191   %vb = shufflevector <2 x double> %elt.head, <2 x double> poison, <2 x i32> zeroinitializer
1192   %nega = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %a, <2 x i1> splat (i1 -1), i32 %evl)
1193   %negc = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %c, <2 x i1> splat (i1 -1), i32 %evl)
1194   %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %nega, <2 x double> %vb, <2 x double> %negc, <2 x i1> splat (i1 -1), i32 %evl)
1195   %u = call <2 x double> @llvm.vp.merge.v2f64(<2 x i1> %m, <2 x double> %v, <2 x double> %c, i32 %evl)
1196   ret <2 x double> %u
1197 }
1198
1199 define <2 x double> @vfnmacc_vf_v2f64_commute(<2 x double> %a, double %b, <2 x double> %c, <2 x i1> %m, i32 zeroext %evl) {
1200 ; CHECK-LABEL: vfnmacc_vf_v2f64_commute:
1201 ; CHECK:       # %bb.0:
1202 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
1203 ; CHECK-NEXT:    vfnmacc.vf v9, fa0, v8, v0.t
1204 ; CHECK-NEXT:    vmv1r.v v8, v9
1205 ; CHECK-NEXT:    ret
1206   %elt.head = insertelement <2 x double> poison, double %b, i32 0
1207   %vb = shufflevector <2 x double> %elt.head, <2 x double> poison, <2 x i32> zeroinitializer
1208   %nega = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %a, <2 x i1> splat (i1 -1), i32 %evl)
1209   %negc = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %c, <2 x i1> splat (i1 -1), i32 %evl)
1210   %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %vb, <2 x double> %nega, <2 x double> %negc, <2 x i1> splat (i1 -1), i32 %evl)
1211   %u = call <2 x double> @llvm.vp.merge.v2f64(<2 x i1> %m, <2 x double> %v, <2 x double> %c, i32 %evl)
1212   ret <2 x double> %u
1213 }
1214
1215 define <2 x double> @vfnmacc_vf_v2f64_unmasked(<2 x double> %a, double %b, <2 x double> %c, i32 zeroext %evl) {
1216 ; CHECK-LABEL: vfnmacc_vf_v2f64_unmasked:
1217 ; CHECK:       # %bb.0:
1218 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, ma
1219 ; CHECK-NEXT:    vfnmacc.vf v9, fa0, v8
1220 ; CHECK-NEXT:    vmv1r.v v8, v9
1221 ; CHECK-NEXT:    ret
1222   %elt.head = insertelement <2 x double> poison, double %b, i32 0
1223   %vb = shufflevector <2 x double> %elt.head, <2 x double> poison, <2 x i32> zeroinitializer
1224   %nega = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %a, <2 x i1> splat (i1 -1), i32 %evl)
1225   %negc = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %c, <2 x i1> splat (i1 -1), i32 %evl)
1226   %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %nega, <2 x double> %vb, <2 x double> %negc, <2 x i1> splat (i1 -1), i32 %evl)
1227   %u = call <2 x double> @llvm.vp.merge.v2f64(<2 x i1> splat (i1 -1), <2 x double> %v, <2 x double> %c, i32 %evl)
1228   ret <2 x double> %u
1229 }
1230
1231 define <2 x double> @vfnmacc_vv_v2f64_ta(<2 x double> %a, <2 x double> %b, <2 x double> %c, <2 x i1> %m, i32 zeroext %evl) {
1232 ; CHECK-LABEL: vfnmacc_vv_v2f64_ta:
1233 ; CHECK:       # %bb.0:
1234 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
1235 ; CHECK-NEXT:    vfnmacc.vv v10, v8, v9, v0.t
1236 ; CHECK-NEXT:    vmv.v.v v8, v10
1237 ; CHECK-NEXT:    ret
1238   %nega = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %a, <2 x i1> splat (i1 -1), i32 %evl)
1239   %negc = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %c, <2 x i1> splat (i1 -1), i32 %evl)
1240   %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %nega, <2 x double> %b, <2 x double> %negc, <2 x i1> splat (i1 -1), i32 %evl)
1241   %u = call <2 x double> @llvm.vp.select.v2f64(<2 x i1> %m, <2 x double> %v, <2 x double> %c, i32 %evl)
1242   ret <2 x double> %u
1243 }
1244
1245 define <2 x double> @vfnmacc_vf_v2f64_ta(<2 x double> %a, double %b, <2 x double> %c, <2 x i1> %m, i32 zeroext %evl) {
1246 ; CHECK-LABEL: vfnmacc_vf_v2f64_ta:
1247 ; CHECK:       # %bb.0:
1248 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
1249 ; CHECK-NEXT:    vfnmacc.vf v9, fa0, v8, v0.t
1250 ; CHECK-NEXT:    vmv.v.v v8, v9
1251 ; CHECK-NEXT:    ret
1252   %elt.head = insertelement <2 x double> poison, double %b, i32 0
1253   %vb = shufflevector <2 x double> %elt.head, <2 x double> poison, <2 x i32> zeroinitializer
1254   %nega = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %a, <2 x i1> splat (i1 -1), i32 %evl)
1255   %negc = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %c, <2 x i1> splat (i1 -1), i32 %evl)
1256   %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %nega, <2 x double> %vb, <2 x double> %negc, <2 x i1> splat (i1 -1), i32 %evl)
1257   %u = call <2 x double> @llvm.vp.select.v2f64(<2 x i1> %m, <2 x double> %v, <2 x double> %c, i32 %evl)
1258   ret <2 x double> %u
1259 }
1260
1261 define <2 x double> @vfnmacc_vf_v2f64_commute_ta(<2 x double> %a, double %b, <2 x double> %c, <2 x i1> %m, i32 zeroext %evl) {
1262 ; CHECK-LABEL: vfnmacc_vf_v2f64_commute_ta:
1263 ; CHECK:       # %bb.0:
1264 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
1265 ; CHECK-NEXT:    vfnmacc.vf v9, fa0, v8, v0.t
1266 ; CHECK-NEXT:    vmv.v.v v8, v9
1267 ; CHECK-NEXT:    ret
1268   %elt.head = insertelement <2 x double> poison, double %b, i32 0
1269   %vb = shufflevector <2 x double> %elt.head, <2 x double> poison, <2 x i32> zeroinitializer
1270   %nega = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %a, <2 x i1> splat (i1 -1), i32 %evl)
1271   %negc = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %c, <2 x i1> splat (i1 -1), i32 %evl)
1272   %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %vb, <2 x double> %nega, <2 x double> %negc, <2 x i1> splat (i1 -1), i32 %evl)
1273   %u = call <2 x double> @llvm.vp.select.v2f64(<2 x i1> %m, <2 x double> %v, <2 x double> %c, i32 %evl)
1274   ret <2 x double> %u
1275 }
1276
1277 declare <4 x double> @llvm.vp.fma.v4f64(<4 x double>, <4 x double>, <4 x double>, <4 x i1>, i32)
1278 declare <4 x double> @llvm.vp.fneg.v4f64(<4 x double>, <4 x i1>, i32)
1279 declare <4 x double> @llvm.vp.merge.v4f64(<4 x i1>, <4 x double>, <4 x double>, i32)
1280 declare <4 x double> @llvm.vp.select.v4f64(<4 x i1>, <4 x double>, <4 x double>, i32)
1281
1282 define <4 x double> @vfnmacc_vv_v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %c, <4 x i1> %m, i32 zeroext %evl) {
1283 ; CHECK-LABEL: vfnmacc_vv_v4f64:
1284 ; CHECK:       # %bb.0:
1285 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
1286 ; CHECK-NEXT:    vfnmacc.vv v12, v8, v10, v0.t
1287 ; CHECK-NEXT:    vmv2r.v v8, v12
1288 ; CHECK-NEXT:    ret
1289   %nega = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %a, <4 x i1> splat (i1 -1), i32 %evl)
1290   %negc = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %c, <4 x i1> splat (i1 -1), i32 %evl)
1291   %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %nega, <4 x double> %b, <4 x double> %negc, <4 x i1> splat (i1 -1), i32 %evl)
1292   %u = call <4 x double> @llvm.vp.merge.v4f64(<4 x i1> %m, <4 x double> %v, <4 x double> %c, i32 %evl)
1293   ret <4 x double> %u
1294 }
1295
1296 define <4 x double> @vfnmacc_vv_v4f64_unmasked(<4 x double> %a, <4 x double> %b, <4 x double> %c, <4 x i1> %m, i32 zeroext %evl) {
1297 ; CHECK-LABEL: vfnmacc_vv_v4f64_unmasked:
1298 ; CHECK:       # %bb.0:
1299 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, ma
1300 ; CHECK-NEXT:    vfnmacc.vv v12, v8, v10
1301 ; CHECK-NEXT:    vmv2r.v v8, v12
1302 ; CHECK-NEXT:    ret
1303   %nega = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %a, <4 x i1> splat (i1 -1), i32 %evl)
1304   %negc = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %c, <4 x i1> splat (i1 -1), i32 %evl)
1305   %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %nega, <4 x double> %b, <4 x double> %negc, <4 x i1> splat (i1 -1), i32 %evl)
1306   %u = call <4 x double> @llvm.vp.merge.v4f64(<4 x i1> splat (i1 -1), <4 x double> %v, <4 x double> %c, i32 %evl)
1307   ret <4 x double> %u
1308 }
1309
1310 define <4 x double> @vfnmacc_vf_v4f64(<4 x double> %a, double %b, <4 x double> %c, <4 x i1> %m, i32 zeroext %evl) {
1311 ; CHECK-LABEL: vfnmacc_vf_v4f64:
1312 ; CHECK:       # %bb.0:
1313 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
1314 ; CHECK-NEXT:    vfnmacc.vf v10, fa0, v8, v0.t
1315 ; CHECK-NEXT:    vmv2r.v v8, v10
1316 ; CHECK-NEXT:    ret
1317   %elt.head = insertelement <4 x double> poison, double %b, i32 0
1318   %vb = shufflevector <4 x double> %elt.head, <4 x double> poison, <4 x i32> zeroinitializer
1319   %nega = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %a, <4 x i1> splat (i1 -1), i32 %evl)
1320   %negc = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %c, <4 x i1> splat (i1 -1), i32 %evl)
1321   %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %nega, <4 x double> %vb, <4 x double> %negc, <4 x i1> splat (i1 -1), i32 %evl)
1322   %u = call <4 x double> @llvm.vp.merge.v4f64(<4 x i1> %m, <4 x double> %v, <4 x double> %c, i32 %evl)
1323   ret <4 x double> %u
1324 }
1325
1326 define <4 x double> @vfnmacc_vf_v4f64_commute(<4 x double> %a, double %b, <4 x double> %c, <4 x i1> %m, i32 zeroext %evl) {
1327 ; CHECK-LABEL: vfnmacc_vf_v4f64_commute:
1328 ; CHECK:       # %bb.0:
1329 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
1330 ; CHECK-NEXT:    vfnmacc.vf v10, fa0, v8, v0.t
1331 ; CHECK-NEXT:    vmv2r.v v8, v10
1332 ; CHECK-NEXT:    ret
1333   %elt.head = insertelement <4 x double> poison, double %b, i32 0
1334   %vb = shufflevector <4 x double> %elt.head, <4 x double> poison, <4 x i32> zeroinitializer
1335   %nega = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %a, <4 x i1> splat (i1 -1), i32 %evl)
1336   %negc = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %c, <4 x i1> splat (i1 -1), i32 %evl)
1337   %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %vb, <4 x double> %nega, <4 x double> %negc, <4 x i1> splat (i1 -1), i32 %evl)
1338   %u = call <4 x double> @llvm.vp.merge.v4f64(<4 x i1> %m, <4 x double> %v, <4 x double> %c, i32 %evl)
1339   ret <4 x double> %u
1340 }
1341
1342 define <4 x double> @vfnmacc_vf_v4f64_unmasked(<4 x double> %a, double %b, <4 x double> %c, i32 zeroext %evl) {
1343 ; CHECK-LABEL: vfnmacc_vf_v4f64_unmasked:
1344 ; CHECK:       # %bb.0:
1345 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, ma
1346 ; CHECK-NEXT:    vfnmacc.vf v10, fa0, v8
1347 ; CHECK-NEXT:    vmv2r.v v8, v10
1348 ; CHECK-NEXT:    ret
1349   %elt.head = insertelement <4 x double> poison, double %b, i32 0
1350   %vb = shufflevector <4 x double> %elt.head, <4 x double> poison, <4 x i32> zeroinitializer
1351   %nega = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %a, <4 x i1> splat (i1 -1), i32 %evl)
1352   %negc = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %c, <4 x i1> splat (i1 -1), i32 %evl)
1353   %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %nega, <4 x double> %vb, <4 x double> %negc, <4 x i1> splat (i1 -1), i32 %evl)
1354   %u = call <4 x double> @llvm.vp.merge.v4f64(<4 x i1> splat (i1 -1), <4 x double> %v, <4 x double> %c, i32 %evl)
1355   ret <4 x double> %u
1356 }
1357
1358 define <4 x double> @vfnmacc_vv_v4f64_ta(<4 x double> %a, <4 x double> %b, <4 x double> %c, <4 x i1> %m, i32 zeroext %evl) {
1359 ; CHECK-LABEL: vfnmacc_vv_v4f64_ta:
1360 ; CHECK:       # %bb.0:
1361 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
1362 ; CHECK-NEXT:    vfnmacc.vv v12, v8, v10, v0.t
1363 ; CHECK-NEXT:    vmv.v.v v8, v12
1364 ; CHECK-NEXT:    ret
1365   %nega = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %a, <4 x i1> splat (i1 -1), i32 %evl)
1366   %negc = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %c, <4 x i1> splat (i1 -1), i32 %evl)
1367   %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %nega, <4 x double> %b, <4 x double> %negc, <4 x i1> splat (i1 -1), i32 %evl)
1368   %u = call <4 x double> @llvm.vp.select.v4f64(<4 x i1> %m, <4 x double> %v, <4 x double> %c, i32 %evl)
1369   ret <4 x double> %u
1370 }
1371
1372 define <4 x double> @vfnmacc_vf_v4f64_ta(<4 x double> %a, double %b, <4 x double> %c, <4 x i1> %m, i32 zeroext %evl) {
1373 ; CHECK-LABEL: vfnmacc_vf_v4f64_ta:
1374 ; CHECK:       # %bb.0:
1375 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
1376 ; CHECK-NEXT:    vfnmacc.vf v10, fa0, v8, v0.t
1377 ; CHECK-NEXT:    vmv.v.v v8, v10
1378 ; CHECK-NEXT:    ret
1379   %elt.head = insertelement <4 x double> poison, double %b, i32 0
1380   %vb = shufflevector <4 x double> %elt.head, <4 x double> poison, <4 x i32> zeroinitializer
1381   %nega = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %a, <4 x i1> splat (i1 -1), i32 %evl)
1382   %negc = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %c, <4 x i1> splat (i1 -1), i32 %evl)
1383   %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %nega, <4 x double> %vb, <4 x double> %negc, <4 x i1> splat (i1 -1), i32 %evl)
1384   %u = call <4 x double> @llvm.vp.select.v4f64(<4 x i1> %m, <4 x double> %v, <4 x double> %c, i32 %evl)
1385   ret <4 x double> %u
1386 }
1387
1388 define <4 x double> @vfnmacc_vf_v4f64_commute_ta(<4 x double> %a, double %b, <4 x double> %c, <4 x i1> %m, i32 zeroext %evl) {
1389 ; CHECK-LABEL: vfnmacc_vf_v4f64_commute_ta:
1390 ; CHECK:       # %bb.0:
1391 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
1392 ; CHECK-NEXT:    vfnmacc.vf v10, fa0, v8, v0.t
1393 ; CHECK-NEXT:    vmv.v.v v8, v10
1394 ; CHECK-NEXT:    ret
1395   %elt.head = insertelement <4 x double> poison, double %b, i32 0
1396   %vb = shufflevector <4 x double> %elt.head, <4 x double> poison, <4 x i32> zeroinitializer
1397   %nega = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %a, <4 x i1> splat (i1 -1), i32 %evl)
1398   %negc = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %c, <4 x i1> splat (i1 -1), i32 %evl)
1399   %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %vb, <4 x double> %nega, <4 x double> %negc, <4 x i1> splat (i1 -1), i32 %evl)
1400   %u = call <4 x double> @llvm.vp.select.v4f64(<4 x i1> %m, <4 x double> %v, <4 x double> %c, i32 %evl)
1401   ret <4 x double> %u
1402 }
1403
1404 declare <8 x double> @llvm.vp.fma.v8f64(<8 x double>, <8 x double>, <8 x double>, <8 x i1>, i32)
1405 declare <8 x double> @llvm.vp.fneg.v8f64(<8 x double>, <8 x i1>, i32)
1406 declare <8 x double> @llvm.vp.merge.v8f64(<8 x i1>, <8 x double>, <8 x double>, i32)
1407 declare <8 x double> @llvm.vp.select.v8f64(<8 x i1>, <8 x double>, <8 x double>, i32)
1408
1409 define <8 x double> @vfnmacc_vv_v8f64(<8 x double> %a, <8 x double> %b, <8 x double> %c, <8 x i1> %m, i32 zeroext %evl) {
1410 ; CHECK-LABEL: vfnmacc_vv_v8f64:
1411 ; CHECK:       # %bb.0:
1412 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
1413 ; CHECK-NEXT:    vfnmacc.vv v16, v8, v12, v0.t
1414 ; CHECK-NEXT:    vmv4r.v v8, v16
1415 ; CHECK-NEXT:    ret
1416   %nega = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %a, <8 x i1> splat (i1 -1), i32 %evl)
1417   %negc = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %c, <8 x i1> splat (i1 -1), i32 %evl)
1418   %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %nega, <8 x double> %b, <8 x double> %negc, <8 x i1> splat (i1 -1), i32 %evl)
1419   %u = call <8 x double> @llvm.vp.merge.v8f64(<8 x i1> %m, <8 x double> %v, <8 x double> %c, i32 %evl)
1420   ret <8 x double> %u
1421 }
1422
1423 define <8 x double> @vfnmacc_vv_v8f64_unmasked(<8 x double> %a, <8 x double> %b, <8 x double> %c, <8 x i1> %m, i32 zeroext %evl) {
1424 ; CHECK-LABEL: vfnmacc_vv_v8f64_unmasked:
1425 ; CHECK:       # %bb.0:
1426 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, ma
1427 ; CHECK-NEXT:    vfnmacc.vv v16, v8, v12
1428 ; CHECK-NEXT:    vmv4r.v v8, v16
1429 ; CHECK-NEXT:    ret
1430   %nega = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %a, <8 x i1> splat (i1 -1), i32 %evl)
1431   %negc = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %c, <8 x i1> splat (i1 -1), i32 %evl)
1432   %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %nega, <8 x double> %b, <8 x double> %negc, <8 x i1> splat (i1 -1), i32 %evl)
1433   %u = call <8 x double> @llvm.vp.merge.v8f64(<8 x i1> splat (i1 -1), <8 x double> %v, <8 x double> %c, i32 %evl)
1434   ret <8 x double> %u
1435 }
1436
1437 define <8 x double> @vfnmacc_vf_v8f64(<8 x double> %a, double %b, <8 x double> %c, <8 x i1> %m, i32 zeroext %evl) {
1438 ; CHECK-LABEL: vfnmacc_vf_v8f64:
1439 ; CHECK:       # %bb.0:
1440 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
1441 ; CHECK-NEXT:    vfnmacc.vf v12, fa0, v8, v0.t
1442 ; CHECK-NEXT:    vmv4r.v v8, v12
1443 ; CHECK-NEXT:    ret
1444   %elt.head = insertelement <8 x double> poison, double %b, i32 0
1445   %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer
1446   %nega = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %a, <8 x i1> splat (i1 -1), i32 %evl)
1447   %negc = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %c, <8 x i1> splat (i1 -1), i32 %evl)
1448   %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %nega, <8 x double> %vb, <8 x double> %negc, <8 x i1> splat (i1 -1), i32 %evl)
1449   %u = call <8 x double> @llvm.vp.merge.v8f64(<8 x i1> %m, <8 x double> %v, <8 x double> %c, i32 %evl)
1450   ret <8 x double> %u
1451 }
1452
1453 define <8 x double> @vfnmacc_vf_v8f64_commute(<8 x double> %a, double %b, <8 x double> %c, <8 x i1> %m, i32 zeroext %evl) {
1454 ; CHECK-LABEL: vfnmacc_vf_v8f64_commute:
1455 ; CHECK:       # %bb.0:
1456 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
1457 ; CHECK-NEXT:    vfnmacc.vf v12, fa0, v8, v0.t
1458 ; CHECK-NEXT:    vmv4r.v v8, v12
1459 ; CHECK-NEXT:    ret
1460   %elt.head = insertelement <8 x double> poison, double %b, i32 0
1461   %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer
1462   %nega = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %a, <8 x i1> splat (i1 -1), i32 %evl)
1463   %negc = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %c, <8 x i1> splat (i1 -1), i32 %evl)
1464   %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %vb, <8 x double> %nega, <8 x double> %negc, <8 x i1> splat (i1 -1), i32 %evl)
1465   %u = call <8 x double> @llvm.vp.merge.v8f64(<8 x i1> %m, <8 x double> %v, <8 x double> %c, i32 %evl)
1466   ret <8 x double> %u
1467 }
1468
1469 define <8 x double> @vfnmacc_vf_v8f64_unmasked(<8 x double> %a, double %b, <8 x double> %c, i32 zeroext %evl) {
1470 ; CHECK-LABEL: vfnmacc_vf_v8f64_unmasked:
1471 ; CHECK:       # %bb.0:
1472 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, ma
1473 ; CHECK-NEXT:    vfnmacc.vf v12, fa0, v8
1474 ; CHECK-NEXT:    vmv4r.v v8, v12
1475 ; CHECK-NEXT:    ret
1476   %elt.head = insertelement <8 x double> poison, double %b, i32 0
1477   %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer
1478   %nega = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %a, <8 x i1> splat (i1 -1), i32 %evl)
1479   %negc = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %c, <8 x i1> splat (i1 -1), i32 %evl)
1480   %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %nega, <8 x double> %vb, <8 x double> %negc, <8 x i1> splat (i1 -1), i32 %evl)
1481   %u = call <8 x double> @llvm.vp.merge.v8f64(<8 x i1> splat (i1 -1), <8 x double> %v, <8 x double> %c, i32 %evl)
1482   ret <8 x double> %u
1483 }
1484
1485 define <8 x double> @vfnmacc_vv_v8f64_ta(<8 x double> %a, <8 x double> %b, <8 x double> %c, <8 x i1> %m, i32 zeroext %evl) {
1486 ; CHECK-LABEL: vfnmacc_vv_v8f64_ta:
1487 ; CHECK:       # %bb.0:
1488 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
1489 ; CHECK-NEXT:    vfnmacc.vv v16, v8, v12, v0.t
1490 ; CHECK-NEXT:    vmv.v.v v8, v16
1491 ; CHECK-NEXT:    ret
1492   %nega = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %a, <8 x i1> splat (i1 -1), i32 %evl)
1493   %negc = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %c, <8 x i1> splat (i1 -1), i32 %evl)
1494   %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %nega, <8 x double> %b, <8 x double> %negc, <8 x i1> splat (i1 -1), i32 %evl)
1495   %u = call <8 x double> @llvm.vp.select.v8f64(<8 x i1> %m, <8 x double> %v, <8 x double> %c, i32 %evl)
1496   ret <8 x double> %u
1497 }
1498
1499 define <8 x double> @vfnmacc_vf_v8f64_ta(<8 x double> %a, double %b, <8 x double> %c, <8 x i1> %m, i32 zeroext %evl) {
1500 ; CHECK-LABEL: vfnmacc_vf_v8f64_ta:
1501 ; CHECK:       # %bb.0:
1502 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
1503 ; CHECK-NEXT:    vfnmacc.vf v12, fa0, v8, v0.t
1504 ; CHECK-NEXT:    vmv.v.v v8, v12
1505 ; CHECK-NEXT:    ret
1506   %elt.head = insertelement <8 x double> poison, double %b, i32 0
1507   %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer
1508   %nega = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %a, <8 x i1> splat (i1 -1), i32 %evl)
1509   %negc = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %c, <8 x i1> splat (i1 -1), i32 %evl)
1510   %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %nega, <8 x double> %vb, <8 x double> %negc, <8 x i1> splat (i1 -1), i32 %evl)
1511   %u = call <8 x double> @llvm.vp.select.v8f64(<8 x i1> %m, <8 x double> %v, <8 x double> %c, i32 %evl)
1512   ret <8 x double> %u
1513 }
1514
1515 define <8 x double> @vfnmacc_vf_v8f64_commute_ta(<8 x double> %a, double %b, <8 x double> %c, <8 x i1> %m, i32 zeroext %evl) {
1516 ; CHECK-LABEL: vfnmacc_vf_v8f64_commute_ta:
1517 ; CHECK:       # %bb.0:
1518 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
1519 ; CHECK-NEXT:    vfnmacc.vf v12, fa0, v8, v0.t
1520 ; CHECK-NEXT:    vmv.v.v v8, v12
1521 ; CHECK-NEXT:    ret
1522   %elt.head = insertelement <8 x double> poison, double %b, i32 0
1523   %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer
1524   %nega = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %a, <8 x i1> splat (i1 -1), i32 %evl)
1525   %negc = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %c, <8 x i1> splat (i1 -1), i32 %evl)
1526   %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %vb, <8 x double> %nega, <8 x double> %negc, <8 x i1> splat (i1 -1), i32 %evl)
1527   %u = call <8 x double> @llvm.vp.select.v8f64(<8 x i1> %m, <8 x double> %v, <8 x double> %c, i32 %evl)
1528   ret <8 x double> %u
1529 }