llvm/test/CodeGen/RISCV/rvv/vfwmacc-vp.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
   2 ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=ilp32d \
   3 ; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
   4 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=lp64d \
   5 ; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
   6 ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v,+m -target-abi=ilp32d \
   7 ; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
   8 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v,+m -target-abi=lp64d \
   9 ; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
  10
  11 declare <vscale x 1 x float> @llvm.vp.fma.nxv1f32(<vscale x 1 x float>, <vscale x 1 x float>, <vscale x 1 x float>, <vscale x 1 x i1>, i32)
  12 declare <vscale x 1 x float> @llvm.vp.fneg.nxv1f32(<vscale x 1 x float>, <vscale x 1 x i1>, i32)
  13 declare <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half>, <vscale x 1 x i1>, i32)
  14 declare <vscale x 1 x float> @llvm.vp.merge.nxv1f32(<vscale x 1 x i1>, <vscale x 1 x float>, <vscale x 1 x float>, i32)
  15
  16 define <vscale x 1 x float> @vfmacc_vv_nxv1f32(<vscale x 1 x half> %a, <vscale x 1 x half> %b, <vscale x 1 x float> %c, <vscale x 1 x i1> %m, i32 zeroext %evl) {
  17 ; ZVFH-LABEL: vfmacc_vv_nxv1f32:
  18 ; ZVFH:       # %bb.0:
  19 ; ZVFH-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
  20 ; ZVFH-NEXT:    vfwmacc.vv v10, v8, v9, v0.t
  21 ; ZVFH-NEXT:    vmv1r.v v8, v10
  22 ; ZVFH-NEXT:    ret
  23 ;
  24 ; ZVFHMIN-LABEL: vfmacc_vv_nxv1f32:
  25 ; ZVFHMIN:       # %bb.0:
  26 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
  27 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8, v0.t
  28 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9, v0.t
  29 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
  30 ; ZVFHMIN-NEXT:    vfmadd.vv v8, v11, v10, v0.t
  31 ; ZVFHMIN-NEXT:    ret
  32   %aext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x i1> %m, i32 %evl)
  33   %bext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %b, <vscale x 1 x i1> %m, i32 %evl)
  34   %v = call <vscale x 1 x float> @llvm.vp.fma.nxv1f32(<vscale x 1 x float> %aext, <vscale x 1 x float> %bext, <vscale x 1 x float> %c, <vscale x 1 x i1> %m, i32 %evl)
  35   ret <vscale x 1 x float> %v
  36 }
  37
  38 define <vscale x 1 x float> @vfmacc_vv_nxv1f32_unmasked(<vscale x 1 x half> %a, <vscale x 1 x half> %b, <vscale x 1 x float> %c, i32 zeroext %evl) {
  39 ; ZVFH-LABEL: vfmacc_vv_nxv1f32_unmasked:
  40 ; ZVFH:       # %bb.0:
  41 ; ZVFH-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
  42 ; ZVFH-NEXT:    vfwmacc.vv v10, v8, v9
  43 ; ZVFH-NEXT:    vmv1r.v v8, v10
  44 ; ZVFH-NEXT:    ret
  45 ;
  46 ; ZVFHMIN-LABEL: vfmacc_vv_nxv1f32_unmasked:
  47 ; ZVFHMIN:       # %bb.0:
  48 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
  49 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8
  50 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
  51 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
  52 ; ZVFHMIN-NEXT:    vfmadd.vv v8, v11, v10
  53 ; ZVFHMIN-NEXT:    ret
  54   %aext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
  55   %bext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %b, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
  56   %v = call <vscale x 1 x float> @llvm.vp.fma.nxv1f32(<vscale x 1 x float> %aext, <vscale x 1 x float> %bext, <vscale x 1 x float> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
  57   ret <vscale x 1 x float> %v
  58 }
  59
  60 define <vscale x 1 x float> @vfmacc_vv_nxv1f32_tu(<vscale x 1 x half> %a, <vscale x 1 x half> %b, <vscale x 1 x float> %c, <vscale x 1 x i1> %m, i32 zeroext %evl) {
  61 ; ZVFH-LABEL: vfmacc_vv_nxv1f32_tu:
  62 ; ZVFH:       # %bb.0:
  63 ; ZVFH-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
  64 ; ZVFH-NEXT:    vfwmacc.vv v10, v8, v9, v0.t
  65 ; ZVFH-NEXT:    vmv1r.v v8, v10
  66 ; ZVFH-NEXT:    ret
  67 ;
  68 ; ZVFHMIN-LABEL: vfmacc_vv_nxv1f32_tu:
  69 ; ZVFHMIN:       # %bb.0:
  70 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
  71 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8
  72 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
  73 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, tu, mu
  74 ; ZVFHMIN-NEXT:    vfmacc.vv v10, v11, v8, v0.t
  75 ; ZVFHMIN-NEXT:    vmv1r.v v8, v10
  76 ; ZVFHMIN-NEXT:    ret
  77   %aext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
  78   %bext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %b, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
  79   %v = call <vscale x 1 x float> @llvm.vp.fma.nxv1f32(<vscale x 1 x float> %aext, <vscale x 1 x float> %bext, <vscale x 1 x float> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
  80   %u = call <vscale x 1 x float> @llvm.vp.merge.nxv1f32(<vscale x 1 x i1> %m, <vscale x 1 x float> %v, <vscale x 1 x float> %c, i32 %evl)
  81   ret <vscale x 1 x float> %u
  82 }
  83
  84 ; FIXME: Support this case?
  85 define <vscale x 1 x float> @vfmacc_vv_nxv1f32_masked__tu(<vscale x 1 x half> %a, <vscale x 1 x half> %b, <vscale x 1 x float> %c, <vscale x 1 x i1> %m, i32 zeroext %evl) {
  86 ; ZVFH-LABEL: vfmacc_vv_nxv1f32_masked__tu:
  87 ; ZVFH:       # %bb.0:
  88 ; ZVFH-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
  89 ; ZVFH-NEXT:    vfwmacc.vv v10, v8, v9, v0.t
  90 ; ZVFH-NEXT:    vmv1r.v v8, v10
  91 ; ZVFH-NEXT:    ret
  92 ;
  93 ; ZVFHMIN-LABEL: vfmacc_vv_nxv1f32_masked__tu:
  94 ; ZVFHMIN:       # %bb.0:
  95 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
  96 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8, v0.t
  97 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9, v0.t
  98 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
  99 ; ZVFHMIN-NEXT:    vfmadd.vv v8, v11, v10, v0.t
 100 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, tu, ma
 101 ; ZVFHMIN-NEXT:    vmerge.vvm v10, v10, v8, v0
 102 ; ZVFHMIN-NEXT:    vmv1r.v v8, v10
 103 ; ZVFHMIN-NEXT:    ret
 104   %aext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x i1> %m, i32 %evl)
 105   %bext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %b, <vscale x 1 x i1> %m, i32 %evl)
 106   %v = call <vscale x 1 x float> @llvm.vp.fma.nxv1f32(<vscale x 1 x float> %aext, <vscale x 1 x float> %bext, <vscale x 1 x float> %c, <vscale x 1 x i1> %m, i32 %evl)
 107   %u = call <vscale x 1 x float> @llvm.vp.merge.nxv1f32(<vscale x 1 x i1> %m, <vscale x 1 x float> %v, <vscale x 1 x float> %c, i32 %evl)
 108   ret <vscale x 1 x float> %u
 109 }
 110
 111 define <vscale x 1 x float> @vfmacc_vv_nxv1f32_unmasked_tu(<vscale x 1 x half> %a, <vscale x 1 x half> %b, <vscale x 1 x float> %c, i32 zeroext %evl) {
 112 ; ZVFH-LABEL: vfmacc_vv_nxv1f32_unmasked_tu:
 113 ; ZVFH:       # %bb.0:
 114 ; ZVFH-NEXT:    vsetvli zero, a0, e16, mf4, tu, ma
 115 ; ZVFH-NEXT:    vfwmacc.vv v10, v8, v9
 116 ; ZVFH-NEXT:    vmv1r.v v8, v10
 117 ; ZVFH-NEXT:    ret
 118 ;
 119 ; ZVFHMIN-LABEL: vfmacc_vv_nxv1f32_unmasked_tu:
 120 ; ZVFHMIN:       # %bb.0:
 121 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 122 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8
 123 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 124 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, tu, ma
 125 ; ZVFHMIN-NEXT:    vfmacc.vv v10, v11, v8
 126 ; ZVFHMIN-NEXT:    vmv1r.v v8, v10
 127 ; ZVFHMIN-NEXT:    ret
 128   %aext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
 129   %bext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %b, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
 130   %v = call <vscale x 1 x float> @llvm.vp.fma.nxv1f32(<vscale x 1 x float> %aext, <vscale x 1 x float> %bext, <vscale x 1 x float> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
 131   %u = call <vscale x 1 x float> @llvm.vp.merge.nxv1f32(<vscale x 1 x i1> splat (i1 -1), <vscale x 1 x float> %v, <vscale x 1 x float> %c, i32 %evl)
 132   ret <vscale x 1 x float> %u
 133 }
 134
 135 define <vscale x 1 x float> @vfmacc_vf_nxv1f32(<vscale x 1 x half> %va, half %b, <vscale x 1 x float> %vc, <vscale x 1 x i1> %m, i32 zeroext %evl) {
 136 ; ZVFH-LABEL: vfmacc_vf_nxv1f32:
 137 ; ZVFH:       # %bb.0:
 138 ; ZVFH-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 139 ; ZVFH-NEXT:    vfwmacc.vf v9, fa0, v8, v0.t
 140 ; ZVFH-NEXT:    vmv1r.v v8, v9
 141 ; ZVFH-NEXT:    ret
 142 ;
 143 ; ZVFHMIN-LABEL: vfmacc_vf_nxv1f32:
 144 ; ZVFHMIN:       # %bb.0:
 145 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 146 ; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, mf4, ta, ma
 147 ; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 148 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 149 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8, v0.t
 150 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10, v0.t
 151 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
 152 ; ZVFHMIN-NEXT:    vfmadd.vv v8, v11, v9, v0.t
 153 ; ZVFHMIN-NEXT:    ret
 154   %elt.head = insertelement <vscale x 1 x half> poison, half %b, i32 0
 155   %vb = shufflevector <vscale x 1 x half> %elt.head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
 156   %vaext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> %m, i32 %evl)
 157   %vbext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %vb, <vscale x 1 x i1> %m, i32 %evl)
 158   %v = call <vscale x 1 x float> @llvm.vp.fma.nxv1f32(<vscale x 1 x float> %vaext, <vscale x 1 x float> %vbext, <vscale x 1 x float> %vc, <vscale x 1 x i1> %m, i32 %evl)
 159   ret <vscale x 1 x float> %v
 160 }
 161
 162 define <vscale x 1 x float> @vfmacc_vf_nxv1f32_commute(<vscale x 1 x half> %va, half %b, <vscale x 1 x float> %vc, <vscale x 1 x i1> %m, i32 zeroext %evl) {
 163 ; ZVFH-LABEL: vfmacc_vf_nxv1f32_commute:
 164 ; ZVFH:       # %bb.0:
 165 ; ZVFH-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 166 ; ZVFH-NEXT:    vfwmacc.vf v9, fa0, v8, v0.t
 167 ; ZVFH-NEXT:    vmv1r.v v8, v9
 168 ; ZVFH-NEXT:    ret
 169 ;
 170 ; ZVFHMIN-LABEL: vfmacc_vf_nxv1f32_commute:
 171 ; ZVFHMIN:       # %bb.0:
 172 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 173 ; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, mf4, ta, ma
 174 ; ZVFHMIN-NEXT:    vmv.v.x v11, a1
 175 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 176 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v10, v8, v0.t
 177 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v11, v0.t
 178 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
 179 ; ZVFHMIN-NEXT:    vfmadd.vv v10, v8, v9, v0.t
 180 ; ZVFHMIN-NEXT:    vmv1r.v v8, v10
 181 ; ZVFHMIN-NEXT:    ret
 182   %elt.head = insertelement <vscale x 1 x half> poison, half %b, i32 0
 183   %vb = shufflevector <vscale x 1 x half> %elt.head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
 184   %vaext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> %m, i32 %evl)
 185   %vbext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %vb, <vscale x 1 x i1> %m, i32 %evl)
 186   %v = call <vscale x 1 x float> @llvm.vp.fma.nxv1f32(<vscale x 1 x float> %vbext, <vscale x 1 x float> %vaext, <vscale x 1 x float> %vc, <vscale x 1 x i1> %m, i32 %evl)
 187   ret <vscale x 1 x float> %v
 188 }
 189
 190 define <vscale x 1 x float> @vfmacc_vf_nxv1f32_unmasked(<vscale x 1 x half> %va, half %b, <vscale x 1 x float> %vc, i32 zeroext %evl) {
 191 ; ZVFH-LABEL: vfmacc_vf_nxv1f32_unmasked:
 192 ; ZVFH:       # %bb.0:
 193 ; ZVFH-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 194 ; ZVFH-NEXT:    vfwmacc.vf v9, fa0, v8
 195 ; ZVFH-NEXT:    vmv1r.v v8, v9
 196 ; ZVFH-NEXT:    ret
 197 ;
 198 ; ZVFHMIN-LABEL: vfmacc_vf_nxv1f32_unmasked:
 199 ; ZVFHMIN:       # %bb.0:
 200 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 201 ; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, mf4, ta, ma
 202 ; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 203 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 204 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8
 205 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 206 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
 207 ; ZVFHMIN-NEXT:    vfmadd.vv v8, v11, v9
 208 ; ZVFHMIN-NEXT:    ret
 209   %elt.head = insertelement <vscale x 1 x half> poison, half %b, i32 0
 210   %vb = shufflevector <vscale x 1 x half> %elt.head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
 211   %vaext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
 212   %vbext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %vb, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
 213   %v = call <vscale x 1 x float> @llvm.vp.fma.nxv1f32(<vscale x 1 x float> %vaext, <vscale x 1 x float> %vbext, <vscale x 1 x float> %vc, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
 214   ret <vscale x 1 x float> %v
 215 }
 216
 217 define <vscale x 1 x float> @vfmacc_vf_nxv1f32_tu(<vscale x 1 x half> %va, half %b, <vscale x 1 x float> %vc, <vscale x 1 x i1> %m, i32 zeroext %evl) {
 218 ; ZVFH-LABEL: vfmacc_vf_nxv1f32_tu:
 219 ; ZVFH:       # %bb.0:
 220 ; ZVFH-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
 221 ; ZVFH-NEXT:    vfwmacc.vf v9, fa0, v8, v0.t
 222 ; ZVFH-NEXT:    vmv1r.v v8, v9
 223 ; ZVFH-NEXT:    ret
 224 ;
 225 ; ZVFHMIN-LABEL: vfmacc_vf_nxv1f32_tu:
 226 ; ZVFHMIN:       # %bb.0:
 227 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 228 ; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, mf4, ta, ma
 229 ; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 230 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 231 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8
 232 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 233 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, tu, mu
 234 ; ZVFHMIN-NEXT:    vfmacc.vv v9, v11, v8, v0.t
 235 ; ZVFHMIN-NEXT:    vmv1r.v v8, v9
 236 ; ZVFHMIN-NEXT:    ret
 237   %elt.head = insertelement <vscale x 1 x half> poison, half %b, i32 0
 238   %vb = shufflevector <vscale x 1 x half> %elt.head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
 239   %vaext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
 240   %vbext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %vb, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
 241   %v = call <vscale x 1 x float> @llvm.vp.fma.nxv1f32(<vscale x 1 x float> %vaext, <vscale x 1 x float> %vbext, <vscale x 1 x float> %vc, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
 242   %u = call <vscale x 1 x float> @llvm.vp.merge.nxv1f32(<vscale x 1 x i1> %m, <vscale x 1 x float> %v, <vscale x 1 x float> %vc, i32 %evl)
 243   ret <vscale x 1 x float> %u
 244 }
 245
 246 define <vscale x 1 x float> @vfmacc_vf_nxv1f32_commute_tu(<vscale x 1 x half> %va, half %b, <vscale x 1 x float> %vc, <vscale x 1 x i1> %m, i32 zeroext %evl) {
 247 ; ZVFH-LABEL: vfmacc_vf_nxv1f32_commute_tu:
 248 ; ZVFH:       # %bb.0:
 249 ; ZVFH-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
 250 ; ZVFH-NEXT:    vfwmacc.vf v9, fa0, v8, v0.t
 251 ; ZVFH-NEXT:    vmv1r.v v8, v9
 252 ; ZVFH-NEXT:    ret
 253 ;
 254 ; ZVFHMIN-LABEL: vfmacc_vf_nxv1f32_commute_tu:
 255 ; ZVFHMIN:       # %bb.0:
 256 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 257 ; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, mf4, ta, ma
 258 ; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 259 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 260 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8
 261 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 262 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, tu, mu
 263 ; ZVFHMIN-NEXT:    vfmacc.vv v9, v8, v11, v0.t
 264 ; ZVFHMIN-NEXT:    vmv1r.v v8, v9
 265 ; ZVFHMIN-NEXT:    ret
 266   %elt.head = insertelement <vscale x 1 x half> poison, half %b, i32 0
 267   %vb = shufflevector <vscale x 1 x half> %elt.head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
 268   %vaext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
 269   %vbext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %vb, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
 270   %v = call <vscale x 1 x float> @llvm.vp.fma.nxv1f32(<vscale x 1 x float> %vbext, <vscale x 1 x float> %vaext, <vscale x 1 x float> %vc, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
 271   %u = call <vscale x 1 x float> @llvm.vp.merge.nxv1f32(<vscale x 1 x i1> %m, <vscale x 1 x float> %v, <vscale x 1 x float> %vc, i32 %evl)
 272   ret <vscale x 1 x float> %u
 273 }
 274
 275 define <vscale x 1 x float> @vfmacc_vf_nxv1f32_unmasked_tu(<vscale x 1 x half> %va, half %b, <vscale x 1 x float> %vc, i32 zeroext %evl) {
 276 ; ZVFH-LABEL: vfmacc_vf_nxv1f32_unmasked_tu:
 277 ; ZVFH:       # %bb.0:
 278 ; ZVFH-NEXT:    vsetvli zero, a0, e16, mf4, tu, ma
 279 ; ZVFH-NEXT:    vfwmacc.vf v9, fa0, v8
 280 ; ZVFH-NEXT:    vmv1r.v v8, v9
 281 ; ZVFH-NEXT:    ret
 282 ;
 283 ; ZVFHMIN-LABEL: vfmacc_vf_nxv1f32_unmasked_tu:
 284 ; ZVFHMIN:       # %bb.0:
 285 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 286 ; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, mf4, ta, ma
 287 ; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 288 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 289 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8
 290 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 291 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, tu, ma
 292 ; ZVFHMIN-NEXT:    vfmacc.vv v9, v11, v8
 293 ; ZVFHMIN-NEXT:    vmv1r.v v8, v9
 294 ; ZVFHMIN-NEXT:    ret
 295   %elt.head = insertelement <vscale x 1 x half> poison, half %b, i32 0
 296   %vb = shufflevector <vscale x 1 x half> %elt.head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
 297   %vaext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
 298   %vbext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %vb, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
 299   %v = call <vscale x 1 x float> @llvm.vp.fma.nxv1f32(<vscale x 1 x float> %vaext, <vscale x 1 x float> %vbext, <vscale x 1 x float> %vc, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
 300   %u = call <vscale x 1 x float> @llvm.vp.merge.nxv1f32(<vscale x 1 x i1> splat (i1 -1), <vscale x 1 x float> %v, <vscale x 1 x float> %vc, i32 %evl)
 301   ret <vscale x 1 x float> %u
 302 }
 303
 304 declare <vscale x 2 x float> @llvm.vp.fma.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>, <vscale x 2 x float>, <vscale x 2 x i1>, i32)
 305 declare <vscale x 2 x float> @llvm.vp.fneg.nxv2f32(<vscale x 2 x float>, <vscale x 2 x i1>, i32)
 306 declare <vscale x 2 x float> @llvm.vp.merge.nxv2f32(<vscale x 2 x i1>, <vscale x 2 x float>, <vscale x 2 x float>, i32)
 307 declare <vscale x 2 x float> @llvm.vp.select.nxv2f32(<vscale x 2 x i1>, <vscale x 2 x float>, <vscale x 2 x float>, i32)
 308 declare <vscale x 2 x float> @llvm.vp.fpext.nxv2f32.nxv2f16(<vscale x 2 x half>, <vscale x 2 x i1>, i32)
 309
 310 define <vscale x 2 x float> @vfmacc_vv_nxv2f32(<vscale x 2 x half> %a, <vscale x 2 x half> %b, <vscale x 2 x float> %c, <vscale x 2 x i1> %m, i32 zeroext %evl) {
 311 ; ZVFH-LABEL: vfmacc_vv_nxv2f32:
 312 ; ZVFH:       # %bb.0:
 313 ; ZVFH-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 314 ; ZVFH-NEXT:    vfwmacc.vv v10, v8, v9, v0.t
 315 ; ZVFH-NEXT:    vmv1r.v v8, v10
 316 ; ZVFH-NEXT:    ret
 317 ;
 318 ; ZVFHMIN-LABEL: vfmacc_vv_nxv2f32:
 319 ; ZVFHMIN:       # %bb.0:
 320 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 321 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8, v0.t
 322 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9, v0.t
 323 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
 324 ; ZVFHMIN-NEXT:    vfmadd.vv v8, v11, v10, v0.t
 325 ; ZVFHMIN-NEXT:    ret
 326   %aext = call <vscale x 2 x float> @llvm.vp.fpext.nxv2f32.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x i1> %m, i32 %evl)
 327   %bext = call <vscale x 2 x float> @llvm.vp.fpext.nxv2f32.nxv2f16(<vscale x 2 x half> %b, <vscale x 2 x i1> %m, i32 %evl)
 328   %v = call <vscale x 2 x float> @llvm.vp.fma.nxv2f32(<vscale x 2 x float> %aext, <vscale x 2 x float> %bext, <vscale x 2 x float> %c, <vscale x 2 x i1> %m, i32 %evl)
 329   ret <vscale x 2 x float> %v
 330 }
 331
 332 define <vscale x 2 x float> @vfmacc_vv_nxv2f32_unmasked(<vscale x 2 x half> %a, <vscale x 2 x half> %b, <vscale x 2 x float> %c, i32 zeroext %evl) {
 333 ; ZVFH-LABEL: vfmacc_vv_nxv2f32_unmasked:
 334 ; ZVFH:       # %bb.0:
 335 ; ZVFH-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 336 ; ZVFH-NEXT:    vfwmacc.vv v10, v8, v9
 337 ; ZVFH-NEXT:    vmv1r.v v8, v10
 338 ; ZVFH-NEXT:    ret
 339 ;
 340 ; ZVFHMIN-LABEL: vfmacc_vv_nxv2f32_unmasked:
 341 ; ZVFHMIN:       # %bb.0:
 342 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 343 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8
 344 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v9
 345 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
 346 ; ZVFHMIN-NEXT:    vfmadd.vv v8, v11, v10
 347 ; ZVFHMIN-NEXT:    ret
 348   %aext = call <vscale x 2 x float> @llvm.vp.fpext.nxv2f32.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
 349   %bext = call <vscale x 2 x float> @llvm.vp.fpext.nxv2f32.nxv2f16(<vscale x 2 x half> %b, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
 350   %v = call <vscale x 2 x float> @llvm.vp.fma.nxv2f32(<vscale x 2 x float> %aext, <vscale x 2 x float> %bext, <vscale x 2 x float> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
 351   ret <vscale x 2 x float> %v
 352 }
 353
 354 define <vscale x 2 x float> @vfmacc_vf_nxv2f32(<vscale x 2 x half> %va, half %b, <vscale x 2 x float> %vc, <vscale x 2 x i1> %m, i32 zeroext %evl) {
 355 ; ZVFH-LABEL: vfmacc_vf_nxv2f32:
 356 ; ZVFH:       # %bb.0:
 357 ; ZVFH-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 358 ; ZVFH-NEXT:    vfwmacc.vf v9, fa0, v8, v0.t
 359 ; ZVFH-NEXT:    vmv1r.v v8, v9
 360 ; ZVFH-NEXT:    ret
 361 ;
 362 ; ZVFHMIN-LABEL: vfmacc_vf_nxv2f32:
 363 ; ZVFHMIN:       # %bb.0:
 364 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 365 ; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, mf2, ta, ma
 366 ; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 367 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 368 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8, v0.t
 369 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10, v0.t
 370 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
 371 ; ZVFHMIN-NEXT:    vfmadd.vv v8, v11, v9, v0.t
 372 ; ZVFHMIN-NEXT:    ret
 373   %elt.head = insertelement <vscale x 2 x half> poison, half %b, i32 0
 374   %vb = shufflevector <vscale x 2 x half> %elt.head, <vscale x 2 x half> poison, <vscale x 2 x i32> zeroinitializer
 375   %vaext = call <vscale x 2 x float> @llvm.vp.fpext.nxv2f32.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x i1> %m, i32 %evl)
 376   %vbext = call <vscale x 2 x float> @llvm.vp.fpext.nxv2f32.nxv2f16(<vscale x 2 x half> %vb, <vscale x 2 x i1> %m, i32 %evl)
 377   %v = call <vscale x 2 x float> @llvm.vp.fma.nxv2f32(<vscale x 2 x float> %vaext, <vscale x 2 x float> %vbext, <vscale x 2 x float> %vc, <vscale x 2 x i1> %m, i32 %evl)
 378   ret <vscale x 2 x float> %v
 379 }
 380
 381 define <vscale x 2 x float> @vfmacc_vf_nxv2f32_unmasked(<vscale x 2 x half> %va, half %b, <vscale x 2 x float> %vc, i32 zeroext %evl) {
 382 ; ZVFH-LABEL: vfmacc_vf_nxv2f32_unmasked:
 383 ; ZVFH:       # %bb.0:
 384 ; ZVFH-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 385 ; ZVFH-NEXT:    vfwmacc.vf v9, fa0, v8
 386 ; ZVFH-NEXT:    vmv1r.v v8, v9
 387 ; ZVFH-NEXT:    ret
 388 ;
 389 ; ZVFHMIN-LABEL: vfmacc_vf_nxv2f32_unmasked:
 390 ; ZVFHMIN:       # %bb.0:
 391 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 392 ; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, mf2, ta, ma
 393 ; ZVFHMIN-NEXT:    vmv.v.x v10, a1
 394 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 395 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v11, v8
 396 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v10
 397 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
 398 ; ZVFHMIN-NEXT:    vfmadd.vv v8, v11, v9
 399 ; ZVFHMIN-NEXT:    ret
 400   %elt.head = insertelement <vscale x 2 x half> poison, half %b, i32 0
 401   %vb = shufflevector <vscale x 2 x half> %elt.head, <vscale x 2 x half> poison, <vscale x 2 x i32> zeroinitializer
 402   %vaext = call <vscale x 2 x float> @llvm.vp.fpext.nxv2f32.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
 403   %vbext = call <vscale x 2 x float> @llvm.vp.fpext.nxv2f32.nxv2f16(<vscale x 2 x half> %vb, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
 404   %v = call <vscale x 2 x float> @llvm.vp.fma.nxv2f32(<vscale x 2 x float> %vaext, <vscale x 2 x float> %vbext, <vscale x 2 x float> %vc, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
 405   ret <vscale x 2 x float> %v
 406 }
 407
 408 declare <vscale x 4 x float> @llvm.vp.fma.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x i1>, i32)
 409 declare <vscale x 4 x float> @llvm.vp.fneg.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i1>, i32)
 410 declare <vscale x 4 x float> @llvm.vp.merge.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>, i32)
 411 declare <vscale x 4 x float> @llvm.vp.select.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>, i32)
 412 declare <vscale x 4 x float> @llvm.vp.fpext.nxv4f32.nxv4f16(<vscale x 4 x half>, <vscale x 4 x i1>, i32)
 413
 414 define <vscale x 4 x float> @vfmacc_vv_nxv4f32(<vscale x 4 x half> %a, <vscale x 4 x half> %b, <vscale x 4 x float> %c, <vscale x 4 x i1> %m, i32 zeroext %evl) {
 415 ; ZVFH-LABEL: vfmacc_vv_nxv4f32:
 416 ; ZVFH:       # %bb.0:
 417 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 418 ; ZVFH-NEXT:    vfwmacc.vv v10, v8, v9, v0.t
 419 ; ZVFH-NEXT:    vmv2r.v v8, v10
 420 ; ZVFH-NEXT:    ret
 421 ;
 422 ; ZVFHMIN-LABEL: vfmacc_vv_nxv4f32:
 423 ; ZVFHMIN:       # %bb.0:
 424 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 425 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v8, v0.t
 426 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9, v0.t
 427 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 428 ; ZVFHMIN-NEXT:    vfmadd.vv v12, v14, v10, v0.t
 429 ; ZVFHMIN-NEXT:    vmv.v.v v8, v12
 430 ; ZVFHMIN-NEXT:    ret
 431   %aext = call <vscale x 4 x float> @llvm.vp.fpext.nxv4f32.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x i1> %m, i32 %evl)
 432   %bext = call <vscale x 4 x float> @llvm.vp.fpext.nxv4f32.nxv4f16(<vscale x 4 x half> %b, <vscale x 4 x i1> %m, i32 %evl)
 433   %v = call <vscale x 4 x float> @llvm.vp.fma.nxv4f32(<vscale x 4 x float> %aext, <vscale x 4 x float> %bext, <vscale x 4 x float> %c, <vscale x 4 x i1> %m, i32 %evl)
 434   ret <vscale x 4 x float> %v
 435 }
 436
 437 define <vscale x 4 x float> @vfmacc_vv_nxv4f32_unmasked(<vscale x 4 x half> %a, <vscale x 4 x half> %b, <vscale x 4 x float> %c, i32 zeroext %evl) {
 438 ; ZVFH-LABEL: vfmacc_vv_nxv4f32_unmasked:
 439 ; ZVFH:       # %bb.0:
 440 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 441 ; ZVFH-NEXT:    vfwmacc.vv v10, v8, v9
 442 ; ZVFH-NEXT:    vmv2r.v v8, v10
 443 ; ZVFH-NEXT:    ret
 444 ;
 445 ; ZVFHMIN-LABEL: vfmacc_vv_nxv4f32_unmasked:
 446 ; ZVFHMIN:       # %bb.0:
 447 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 448 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v8
 449 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v12, v9
 450 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 451 ; ZVFHMIN-NEXT:    vfmadd.vv v12, v14, v10
 452 ; ZVFHMIN-NEXT:    vmv.v.v v8, v12
 453 ; ZVFHMIN-NEXT:    ret
 454   %aext = call <vscale x 4 x float> @llvm.vp.fpext.nxv4f32.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
 455   %bext = call <vscale x 4 x float> @llvm.vp.fpext.nxv4f32.nxv4f16(<vscale x 4 x half> %b, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
 456   %v = call <vscale x 4 x float> @llvm.vp.fma.nxv4f32(<vscale x 4 x float> %aext, <vscale x 4 x float> %bext, <vscale x 4 x float> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
 457   ret <vscale x 4 x float> %v
 458 }
 459
 460 define <vscale x 4 x float> @vfmacc_vf_nxv4f32(<vscale x 4 x half> %va, half %b, <vscale x 4 x float> %vc, <vscale x 4 x i1> %m, i32 zeroext %evl) {
 461 ; ZVFH-LABEL: vfmacc_vf_nxv4f32:
 462 ; ZVFH:       # %bb.0:
 463 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 464 ; ZVFH-NEXT:    vfwmacc.vf v10, fa0, v8, v0.t
 465 ; ZVFH-NEXT:    vmv2r.v v8, v10
 466 ; ZVFH-NEXT:    ret
 467 ;
 468 ; ZVFHMIN-LABEL: vfmacc_vf_nxv4f32:
 469 ; ZVFHMIN:       # %bb.0:
 470 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 471 ; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m1, ta, ma
 472 ; ZVFHMIN-NEXT:    vmv.v.x v12, a1
 473 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 474 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v8, v0.t
 475 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12, v0.t
 476 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 477 ; ZVFHMIN-NEXT:    vfmadd.vv v8, v14, v10, v0.t
 478 ; ZVFHMIN-NEXT:    ret
 479   %elt.head = insertelement <vscale x 4 x half> poison, half %b, i32 0
 480   %vb = shufflevector <vscale x 4 x half> %elt.head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
 481   %vaext = call <vscale x 4 x float> @llvm.vp.fpext.nxv4f32.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x i1> %m, i32 %evl)
 482   %vbext = call <vscale x 4 x float> @llvm.vp.fpext.nxv4f32.nxv4f16(<vscale x 4 x half> %vb, <vscale x 4 x i1> %m, i32 %evl)
 483   %v = call <vscale x 4 x float> @llvm.vp.fma.nxv4f32(<vscale x 4 x float> %vaext, <vscale x 4 x float> %vbext, <vscale x 4 x float> %vc, <vscale x 4 x i1> %m, i32 %evl)
 484   ret <vscale x 4 x float> %v
 485 }
 486
 487 define <vscale x 4 x float> @vfmacc_vf_nxv4f32_unmasked(<vscale x 4 x half> %va, half %b, <vscale x 4 x float> %vc, i32 zeroext %evl) {
 488 ; ZVFH-LABEL: vfmacc_vf_nxv4f32_unmasked:
 489 ; ZVFH:       # %bb.0:
 490 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 491 ; ZVFH-NEXT:    vfwmacc.vf v10, fa0, v8
 492 ; ZVFH-NEXT:    vmv2r.v v8, v10
 493 ; ZVFH-NEXT:    ret
 494 ;
 495 ; ZVFHMIN-LABEL: vfmacc_vf_nxv4f32_unmasked:
 496 ; ZVFHMIN:       # %bb.0:
 497 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 498 ; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m1, ta, ma
 499 ; ZVFHMIN-NEXT:    vmv.v.x v12, a1
 500 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 501 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v14, v8
 502 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v12
 503 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
 504 ; ZVFHMIN-NEXT:    vfmadd.vv v8, v14, v10
 505 ; ZVFHMIN-NEXT:    ret
 506   %elt.head = insertelement <vscale x 4 x half> poison, half %b, i32 0
 507   %vb = shufflevector <vscale x 4 x half> %elt.head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
 508   %vaext = call <vscale x 4 x float> @llvm.vp.fpext.nxv4f32.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
 509   %vbext = call <vscale x 4 x float> @llvm.vp.fpext.nxv4f32.nxv4f16(<vscale x 4 x half> %vb, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
 510   %v = call <vscale x 4 x float> @llvm.vp.fma.nxv4f32(<vscale x 4 x float> %vaext, <vscale x 4 x float> %vbext, <vscale x 4 x float> %vc, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
 511   ret <vscale x 4 x float> %v
 512 }
 513
 514 declare <vscale x 8 x float> @llvm.vp.fma.nxv8f32(<vscale x 8 x float>, <vscale x 8 x float>, <vscale x 8 x float>, <vscale x 8 x i1>, i32)
 515 declare <vscale x 8 x float> @llvm.vp.fneg.nxv8f32(<vscale x 8 x float>, <vscale x 8 x i1>, i32)
 516 declare <vscale x 8 x float> @llvm.vp.merge.nxv8f32(<vscale x 8 x i1>, <vscale x 8 x float>, <vscale x 8 x float>, i32)
 517 declare <vscale x 8 x float> @llvm.vp.select.nxv8f32(<vscale x 8 x i1>, <vscale x 8 x float>, <vscale x 8 x float>, i32)
 518 declare <vscale x 8 x float> @llvm.vp.fpext.nxv8f32.nxv8f16(<vscale x 8 x half>, <vscale x 8 x i1>, i32)
 519
 520 define <vscale x 8 x float> @vfmacc_vv_nxv8f32(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x float> %c, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 521 ; ZVFH-LABEL: vfmacc_vv_nxv8f32:
 522 ; ZVFH:       # %bb.0:
 523 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 524 ; ZVFH-NEXT:    vfwmacc.vv v12, v8, v10, v0.t
 525 ; ZVFH-NEXT:    vmv4r.v v8, v12
 526 ; ZVFH-NEXT:    ret
 527 ;
 528 ; ZVFHMIN-LABEL: vfmacc_vv_nxv8f32:
 529 ; ZVFHMIN:       # %bb.0:
 530 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 531 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v20, v8, v0.t
 532 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10, v0.t
 533 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 534 ; ZVFHMIN-NEXT:    vfmadd.vv v16, v20, v12, v0.t
 535 ; ZVFHMIN-NEXT:    vmv.v.v v8, v16
 536 ; ZVFHMIN-NEXT:    ret
 537   %aext = call <vscale x 8 x float> @llvm.vp.fpext.nxv8f32.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x i1> %m, i32 %evl)
 538   %bext = call <vscale x 8 x float> @llvm.vp.fpext.nxv8f32.nxv8f16(<vscale x 8 x half> %b, <vscale x 8 x i1> %m, i32 %evl)
 539   %v = call <vscale x 8 x float> @llvm.vp.fma.nxv8f32(<vscale x 8 x float> %aext, <vscale x 8 x float> %bext, <vscale x 8 x float> %c, <vscale x 8 x i1> %m, i32 %evl)
 540   ret <vscale x 8 x float> %v
 541 }
 542
 543 define <vscale x 8 x float> @vfmacc_vv_nxv8f32_unmasked(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x float> %c, i32 zeroext %evl) {
 544 ; ZVFH-LABEL: vfmacc_vv_nxv8f32_unmasked:
 545 ; ZVFH:       # %bb.0:
 546 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 547 ; ZVFH-NEXT:    vfwmacc.vv v12, v8, v10
 548 ; ZVFH-NEXT:    vmv4r.v v8, v12
 549 ; ZVFH-NEXT:    ret
 550 ;
 551 ; ZVFHMIN-LABEL: vfmacc_vv_nxv8f32_unmasked:
 552 ; ZVFHMIN:       # %bb.0:
 553 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 554 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v20, v8
 555 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v10
 556 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 557 ; ZVFHMIN-NEXT:    vfmadd.vv v16, v20, v12
 558 ; ZVFHMIN-NEXT:    vmv.v.v v8, v16
 559 ; ZVFHMIN-NEXT:    ret
 560   %aext = call <vscale x 8 x float> @llvm.vp.fpext.nxv8f32.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
 561   %bext = call <vscale x 8 x float> @llvm.vp.fpext.nxv8f32.nxv8f16(<vscale x 8 x half> %b, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
 562   %v = call <vscale x 8 x float> @llvm.vp.fma.nxv8f32(<vscale x 8 x float> %aext, <vscale x 8 x float> %bext, <vscale x 8 x float> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
 563   ret <vscale x 8 x float> %v
 564 }
 565
 566 define <vscale x 8 x float> @vfmacc_vf_nxv8f32(<vscale x 8 x half> %va, half %b, <vscale x 8 x float> %vc, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 567 ; ZVFH-LABEL: vfmacc_vf_nxv8f32:
 568 ; ZVFH:       # %bb.0:
 569 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 570 ; ZVFH-NEXT:    vfwmacc.vf v12, fa0, v8, v0.t
 571 ; ZVFH-NEXT:    vmv4r.v v8, v12
 572 ; ZVFH-NEXT:    ret
 573 ;
 574 ; ZVFHMIN-LABEL: vfmacc_vf_nxv8f32:
 575 ; ZVFHMIN:       # %bb.0:
 576 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 577 ; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
 578 ; ZVFHMIN-NEXT:    vmv.v.x v16, a1
 579 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 580 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v20, v8, v0.t
 581 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v16, v0.t
 582 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 583 ; ZVFHMIN-NEXT:    vfmadd.vv v8, v20, v12, v0.t
 584 ; ZVFHMIN-NEXT:    ret
 585   %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
 586   %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
 587   %vaext = call <vscale x 8 x float> @llvm.vp.fpext.nxv8f32.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> %m, i32 %evl)
 588   %vbext = call <vscale x 8 x float> @llvm.vp.fpext.nxv8f32.nxv8f16(<vscale x 8 x half> %vb, <vscale x 8 x i1> %m, i32 %evl)
 589   %v = call <vscale x 8 x float> @llvm.vp.fma.nxv8f32(<vscale x 8 x float> %vaext, <vscale x 8 x float> %vbext, <vscale x 8 x float> %vc, <vscale x 8 x i1> %m, i32 %evl)
 590   ret <vscale x 8 x float> %v
 591 }
 592
 593 define <vscale x 8 x float> @vfmacc_vf_nxv8f32_unmasked(<vscale x 8 x half> %va, half %b, <vscale x 8 x float> %vc, i32 zeroext %evl) {
 594 ; ZVFH-LABEL: vfmacc_vf_nxv8f32_unmasked:
 595 ; ZVFH:       # %bb.0:
 596 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 597 ; ZVFH-NEXT:    vfwmacc.vf v12, fa0, v8
 598 ; ZVFH-NEXT:    vmv4r.v v8, v12
 599 ; ZVFH-NEXT:    ret
 600 ;
 601 ; ZVFHMIN-LABEL: vfmacc_vf_nxv8f32_unmasked:
 602 ; ZVFHMIN:       # %bb.0:
 603 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 604 ; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m2, ta, ma
 605 ; ZVFHMIN-NEXT:    vmv.v.x v16, a1
 606 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
 607 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v20, v8
 608 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v16
 609 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
 610 ; ZVFHMIN-NEXT:    vfmadd.vv v8, v20, v12
 611 ; ZVFHMIN-NEXT:    ret
 612   %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
 613   %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
 614   %vaext = call <vscale x 8 x float> @llvm.vp.fpext.nxv8f32.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
 615   %vbext = call <vscale x 8 x float> @llvm.vp.fpext.nxv8f32.nxv8f16(<vscale x 8 x half> %vb, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
 616   %v = call <vscale x 8 x float> @llvm.vp.fma.nxv8f32(<vscale x 8 x float> %vaext, <vscale x 8 x float> %vbext, <vscale x 8 x float> %vc, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
 617   ret <vscale x 8 x float> %v
 618 }
 619
 620 declare <vscale x 16 x float> @llvm.vp.fma.nxv16f32(<vscale x 16 x float>, <vscale x 16 x float>, <vscale x 16 x float>, <vscale x 16 x i1>, i32)
 621 declare <vscale x 16 x float> @llvm.vp.fneg.nxv16f32(<vscale x 16 x float>, <vscale x 16 x i1>, i32)
 622 declare <vscale x 16 x float> @llvm.vp.merge.nxv16f32(<vscale x 16 x i1>, <vscale x 16 x float>, <vscale x 16 x float>, i32)
 623 declare <vscale x 16 x float> @llvm.vp.select.nxv16f32(<vscale x 16 x i1>, <vscale x 16 x float>, <vscale x 16 x float>, i32)
 624 declare <vscale x 16 x float> @llvm.vp.fpext.nxv16f32.nxv16f16(<vscale x 16 x half>, <vscale x 16 x i1>, i32)
 625
 626 define <vscale x 16 x float> @vfmacc_vv_nxv16f32(<vscale x 16 x half> %a, <vscale x 16 x half> %b, <vscale x 16 x float> %c, <vscale x 16 x i1> %m, i32 zeroext %evl) {
 627 ; ZVFH-LABEL: vfmacc_vv_nxv16f32:
 628 ; ZVFH:       # %bb.0:
 629 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 630 ; ZVFH-NEXT:    vfwmacc.vv v16, v8, v12, v0.t
 631 ; ZVFH-NEXT:    vmv8r.v v8, v16
 632 ; ZVFH-NEXT:    ret
 633 ;
 634 ; ZVFHMIN-LABEL: vfmacc_vv_nxv16f32:
 635 ; ZVFHMIN:       # %bb.0:
 636 ; ZVFHMIN-NEXT:    addi sp, sp, -16
 637 ; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 16
 638 ; ZVFHMIN-NEXT:    csrr a1, vlenb
 639 ; ZVFHMIN-NEXT:    slli a1, a1, 3
 640 ; ZVFHMIN-NEXT:    sub sp, sp, a1
 641 ; ZVFHMIN-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
 642 ; ZVFHMIN-NEXT:    addi a1, sp, 16
 643 ; ZVFHMIN-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
 644 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 645 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v16, v8, v0.t
 646 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12, v0.t
 647 ; ZVFHMIN-NEXT:    addi a0, sp, 16
 648 ; ZVFHMIN-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
 649 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 650 ; ZVFHMIN-NEXT:    vfmadd.vv v24, v16, v8, v0.t
 651 ; ZVFHMIN-NEXT:    vmv.v.v v8, v24
 652 ; ZVFHMIN-NEXT:    csrr a0, vlenb
 653 ; ZVFHMIN-NEXT:    slli a0, a0, 3
 654 ; ZVFHMIN-NEXT:    add sp, sp, a0
 655 ; ZVFHMIN-NEXT:    .cfi_def_cfa sp, 16
 656 ; ZVFHMIN-NEXT:    addi sp, sp, 16
 657 ; ZVFHMIN-NEXT:    .cfi_def_cfa_offset 0
 658 ; ZVFHMIN-NEXT:    ret
 659   %aext = call <vscale x 16 x float> @llvm.vp.fpext.nxv16f32.nxv16f16(<vscale x 16 x half> %a, <vscale x 16 x i1> %m, i32 %evl)
 660   %bext = call <vscale x 16 x float> @llvm.vp.fpext.nxv16f32.nxv16f16(<vscale x 16 x half> %b, <vscale x 16 x i1> %m, i32 %evl)
 661   %v = call <vscale x 16 x float> @llvm.vp.fma.nxv16f32(<vscale x 16 x float> %aext, <vscale x 16 x float> %bext, <vscale x 16 x float> %c, <vscale x 16 x i1> %m, i32 %evl)
 662   ret <vscale x 16 x float> %v
 663 }
 664
 665 define <vscale x 16 x float> @vfmacc_vv_nxv16f32_unmasked(<vscale x 16 x half> %a, <vscale x 16 x half> %b, <vscale x 16 x float> %c, i32 zeroext %evl) {
 666 ; ZVFH-LABEL: vfmacc_vv_nxv16f32_unmasked:
 667 ; ZVFH:       # %bb.0:
 668 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 669 ; ZVFH-NEXT:    vfwmacc.vv v16, v8, v12
 670 ; ZVFH-NEXT:    vmv8r.v v8, v16
 671 ; ZVFH-NEXT:    ret
 672 ;
 673 ; ZVFHMIN-LABEL: vfmacc_vv_nxv16f32_unmasked:
 674 ; ZVFHMIN:       # %bb.0:
 675 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 676 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v0, v8
 677 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v12
 678 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 679 ; ZVFHMIN-NEXT:    vfmadd.vv v24, v0, v16
 680 ; ZVFHMIN-NEXT:    vmv.v.v v8, v24
 681 ; ZVFHMIN-NEXT:    ret
 682   %aext = call <vscale x 16 x float> @llvm.vp.fpext.nxv16f32.nxv16f16(<vscale x 16 x half> %a, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
 683   %bext = call <vscale x 16 x float> @llvm.vp.fpext.nxv16f32.nxv16f16(<vscale x 16 x half> %b, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
 684   %v = call <vscale x 16 x float> @llvm.vp.fma.nxv16f32(<vscale x 16 x float> %aext, <vscale x 16 x float> %bext, <vscale x 16 x float> %c, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
 685   ret <vscale x 16 x float> %v
 686 }
 687
 688 define <vscale x 16 x float> @vfmacc_vf_nxv16f32(<vscale x 16 x half> %va, half %b, <vscale x 16 x float> %vc, <vscale x 16 x i1> %m, i32 zeroext %evl) {
 689 ; ZVFH-LABEL: vfmacc_vf_nxv16f32:
 690 ; ZVFH:       # %bb.0:
 691 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 692 ; ZVFH-NEXT:    vfwmacc.vf v16, fa0, v8, v0.t
 693 ; ZVFH-NEXT:    vmv8r.v v8, v16
 694 ; ZVFH-NEXT:    ret
 695 ;
 696 ; ZVFHMIN-LABEL: vfmacc_vf_nxv16f32:
 697 ; ZVFHMIN:       # %bb.0:
 698 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 699 ; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m4, ta, ma
 700 ; ZVFHMIN-NEXT:    vmv.v.x v4, a1
 701 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 702 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v24, v8, v0.t
 703 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v4, v0.t
 704 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 705 ; ZVFHMIN-NEXT:    vfmadd.vv v8, v24, v16, v0.t
 706 ; ZVFHMIN-NEXT:    ret
 707   %elt.head = insertelement <vscale x 16 x half> poison, half %b, i32 0
 708   %vb = shufflevector <vscale x 16 x half> %elt.head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
 709   %vaext = call <vscale x 16 x float> @llvm.vp.fpext.nxv16f32.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> %m, i32 %evl)
 710   %vbext = call <vscale x 16 x float> @llvm.vp.fpext.nxv16f32.nxv16f16(<vscale x 16 x half> %vb, <vscale x 16 x i1> %m, i32 %evl)
 711   %v = call <vscale x 16 x float> @llvm.vp.fma.nxv16f32(<vscale x 16 x float> %vaext, <vscale x 16 x float> %vbext, <vscale x 16 x float> %vc, <vscale x 16 x i1> %m, i32 %evl)
 712   ret <vscale x 16 x float> %v
 713 }
 714
 715 define <vscale x 16 x float> @vfmacc_vf_nxv16f32_unmasked(<vscale x 16 x half> %va, half %b, <vscale x 16 x float> %vc, i32 zeroext %evl) {
 716 ; ZVFH-LABEL: vfmacc_vf_nxv16f32_unmasked:
 717 ; ZVFH:       # %bb.0:
 718 ; ZVFH-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 719 ; ZVFH-NEXT:    vfwmacc.vf v16, fa0, v8
 720 ; ZVFH-NEXT:    vmv8r.v v8, v16
 721 ; ZVFH-NEXT:    ret
 722 ;
 723 ; ZVFHMIN-LABEL: vfmacc_vf_nxv16f32_unmasked:
 724 ; ZVFHMIN:       # %bb.0:
 725 ; ZVFHMIN-NEXT:    fmv.x.h a1, fa0
 726 ; ZVFHMIN-NEXT:    vsetvli a2, zero, e16, m4, ta, ma
 727 ; ZVFHMIN-NEXT:    vmv.v.x v24, a1
 728 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
 729 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v0, v8
 730 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v8, v24
 731 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
 732 ; ZVFHMIN-NEXT:    vfmadd.vv v8, v0, v16
 733 ; ZVFHMIN-NEXT:    ret
 734   %elt.head = insertelement <vscale x 16 x half> poison, half %b, i32 0
 735   %vb = shufflevector <vscale x 16 x half> %elt.head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
 736   %vaext = call <vscale x 16 x float> @llvm.vp.fpext.nxv16f32.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
 737   %vbext = call <vscale x 16 x float> @llvm.vp.fpext.nxv16f32.nxv16f16(<vscale x 16 x half> %vb, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
 738   %v = call <vscale x 16 x float> @llvm.vp.fma.nxv16f32(<vscale x 16 x float> %vaext, <vscale x 16 x float> %vbext, <vscale x 16 x float> %vc, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
 739   ret <vscale x 16 x float> %v
 740 }
 741
 742 declare <vscale x 1 x double> @llvm.vp.fma.nxv1f64(<vscale x 1 x double>, <vscale x 1 x double>, <vscale x 1 x double>, <vscale x 1 x i1>, i32)
 743 declare <vscale x 1 x double> @llvm.vp.fneg.nxv1f64(<vscale x 1 x double>, <vscale x 1 x i1>, i32)
 744 declare <vscale x 1 x double> @llvm.vp.merge.nxv1f64(<vscale x 1 x i1>, <vscale x 1 x double>, <vscale x 1 x double>, i32)
 745 declare <vscale x 1 x double> @llvm.vp.select.nxv1f64(<vscale x 1 x i1>, <vscale x 1 x double>, <vscale x 1 x double>, i32)
 746 declare <vscale x 1 x double> @llvm.vp.fpext.nxv1f64.nxv1f32(<vscale x 1 x float>, <vscale x 1 x i1>, i32)
 747
 748 define <vscale x 1 x double> @vfmacc_vv_nxv1f64(<vscale x 1 x float> %a, <vscale x 1 x float> %b, <vscale x 1 x double> %c, <vscale x 1 x i1> %m, i32 zeroext %evl) {
 749 ; CHECK-LABEL: vfmacc_vv_nxv1f64:
 750 ; CHECK:       # %bb.0:
 751 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
 752 ; CHECK-NEXT:    vfwmacc.vv v10, v8, v9, v0.t
 753 ; CHECK-NEXT:    vmv1r.v v8, v10
 754 ; CHECK-NEXT:    ret
 755   %aext = call <vscale x 1 x double> @llvm.vp.fpext.nxv1f64.nxv1f32(<vscale x 1 x float> %a, <vscale x 1 x i1> %m, i32 %evl)
 756   %bext = call <vscale x 1 x double> @llvm.vp.fpext.nxv1f64.nxv1f32(<vscale x 1 x float> %b, <vscale x 1 x i1> %m, i32 %evl)
 757   %v = call <vscale x 1 x double> @llvm.vp.fma.nxv1f64(<vscale x 1 x double> %aext, <vscale x 1 x double> %bext, <vscale x 1 x double> %c, <vscale x 1 x i1> %m, i32 %evl)
 758   ret <vscale x 1 x double> %v
 759 }
 760
 761 define <vscale x 1 x double> @vfmacc_vv_nxv1f64_unmasked(<vscale x 1 x float> %a, <vscale x 1 x float> %b, <vscale x 1 x double> %c, i32 zeroext %evl) {
 762 ; CHECK-LABEL: vfmacc_vv_nxv1f64_unmasked:
 763 ; CHECK:       # %bb.0:
 764 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
 765 ; CHECK-NEXT:    vfwmacc.vv v10, v8, v9
 766 ; CHECK-NEXT:    vmv1r.v v8, v10
 767 ; CHECK-NEXT:    ret
 768   %aext = call <vscale x 1 x double> @llvm.vp.fpext.nxv1f64.nxv1f32(<vscale x 1 x float> %a, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
 769   %bext = call <vscale x 1 x double> @llvm.vp.fpext.nxv1f64.nxv1f32(<vscale x 1 x float> %b, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
 770   %v = call <vscale x 1 x double> @llvm.vp.fma.nxv1f64(<vscale x 1 x double> %aext, <vscale x 1 x double> %bext, <vscale x 1 x double> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
 771   ret <vscale x 1 x double> %v
 772 }
 773
 774 define <vscale x 1 x double> @vfmacc_vf_nxv1f64(<vscale x 1 x float> %va, float %b, <vscale x 1 x double> %vc, <vscale x 1 x i1> %m, i32 zeroext %evl) {
 775 ; CHECK-LABEL: vfmacc_vf_nxv1f64:
 776 ; CHECK:       # %bb.0:
 777 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
 778 ; CHECK-NEXT:    vfwmacc.vf v9, fa0, v8, v0.t
 779 ; CHECK-NEXT:    vmv1r.v v8, v9
 780 ; CHECK-NEXT:    ret
 781   %elt.head = insertelement <vscale x 1 x float> poison, float %b, i32 0
 782   %vb = shufflevector <vscale x 1 x float> %elt.head, <vscale x 1 x float> poison, <vscale x 1 x i32> zeroinitializer
 783   %vaext = call <vscale x 1 x double> @llvm.vp.fpext.nxv1f64.nxv1f32(<vscale x 1 x float> %va, <vscale x 1 x i1> %m, i32 %evl)
 784   %vbext = call <vscale x 1 x double> @llvm.vp.fpext.nxv1f64.nxv1f32(<vscale x 1 x float> %vb, <vscale x 1 x i1> %m, i32 %evl)
 785   %v = call <vscale x 1 x double> @llvm.vp.fma.nxv1f64(<vscale x 1 x double> %vaext, <vscale x 1 x double> %vbext, <vscale x 1 x double> %vc, <vscale x 1 x i1> %m, i32 %evl)
 786   ret <vscale x 1 x double> %v
 787 }
 788
 789 define <vscale x 1 x double> @vfmacc_vf_nxv1f64_unmasked(<vscale x 1 x float> %va, float %b, <vscale x 1 x double> %vc, i32 zeroext %evl) {
 790 ; CHECK-LABEL: vfmacc_vf_nxv1f64_unmasked:
 791 ; CHECK:       # %bb.0:
 792 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
 793 ; CHECK-NEXT:    vfwmacc.vf v9, fa0, v8
 794 ; CHECK-NEXT:    vmv1r.v v8, v9
 795 ; CHECK-NEXT:    ret
 796   %elt.head = insertelement <vscale x 1 x float> poison, float %b, i32 0
 797   %vb = shufflevector <vscale x 1 x float> %elt.head, <vscale x 1 x float> poison, <vscale x 1 x i32> zeroinitializer
 798   %vaext = call <vscale x 1 x double> @llvm.vp.fpext.nxv1f64.nxv1f32(<vscale x 1 x float> %va, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
 799   %vbext = call <vscale x 1 x double> @llvm.vp.fpext.nxv1f64.nxv1f32(<vscale x 1 x float> %vb, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
 800   %v = call <vscale x 1 x double> @llvm.vp.fma.nxv1f64(<vscale x 1 x double> %vaext, <vscale x 1 x double> %vbext, <vscale x 1 x double> %vc, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
 801   ret <vscale x 1 x double> %v
 802 }
 803
 804 declare <vscale x 2 x double> @llvm.vp.fma.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x i1>, i32)
 805 declare <vscale x 2 x double> @llvm.vp.fneg.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, i32)
 806 declare <vscale x 2 x double> @llvm.vp.merge.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>, i32)
 807 declare <vscale x 2 x double> @llvm.vp.select.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>, i32)
 808 declare <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f32(<vscale x 2 x float>, <vscale x 2 x i1>, i32)
 809
 810 define <vscale x 2 x double> @vfmacc_vv_nxv2f64(<vscale x 2 x float> %a, <vscale x 2 x float> %b, <vscale x 2 x double> %c, <vscale x 2 x i1> %m, i32 zeroext %evl) {
 811 ; CHECK-LABEL: vfmacc_vv_nxv2f64:
 812 ; CHECK:       # %bb.0:
 813 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
 814 ; CHECK-NEXT:    vfwmacc.vv v10, v8, v9, v0.t
 815 ; CHECK-NEXT:    vmv2r.v v8, v10
 816 ; CHECK-NEXT:    ret
 817   %aext = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x i1> %m, i32 %evl)
 818   %bext = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f32(<vscale x 2 x float> %b, <vscale x 2 x i1> %m, i32 %evl)
 819   %v = call <vscale x 2 x double> @llvm.vp.fma.nxv2f64(<vscale x 2 x double> %aext, <vscale x 2 x double> %bext, <vscale x 2 x double> %c, <vscale x 2 x i1> %m, i32 %evl)
 820   ret <vscale x 2 x double> %v
 821 }
 822
 823 define <vscale x 2 x double> @vfmacc_vv_nxv2f64_unmasked(<vscale x 2 x float> %a, <vscale x 2 x float> %b, <vscale x 2 x double> %c, i32 zeroext %evl) {
 824 ; CHECK-LABEL: vfmacc_vv_nxv2f64_unmasked:
 825 ; CHECK:       # %bb.0:
 826 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
 827 ; CHECK-NEXT:    vfwmacc.vv v10, v8, v9
 828 ; CHECK-NEXT:    vmv2r.v v8, v10
 829 ; CHECK-NEXT:    ret
 830   %aext = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
 831   %bext = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f32(<vscale x 2 x float> %b, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
 832   %v = call <vscale x 2 x double> @llvm.vp.fma.nxv2f64(<vscale x 2 x double> %aext, <vscale x 2 x double> %bext, <vscale x 2 x double> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
 833   ret <vscale x 2 x double> %v
 834 }
 835
 836 define <vscale x 2 x double> @vfmacc_vf_nxv2f64(<vscale x 2 x float> %va, float %b, <vscale x 2 x double> %vc, <vscale x 2 x i1> %m, i32 zeroext %evl) {
 837 ; CHECK-LABEL: vfmacc_vf_nxv2f64:
 838 ; CHECK:       # %bb.0:
 839 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
 840 ; CHECK-NEXT:    vfwmacc.vf v10, fa0, v8, v0.t
 841 ; CHECK-NEXT:    vmv2r.v v8, v10
 842 ; CHECK-NEXT:    ret
 843   %elt.head = insertelement <vscale x 2 x float> poison, float %b, i32 0
 844   %vb = shufflevector <vscale x 2 x float> %elt.head, <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer
 845   %vaext = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x i1> %m, i32 %evl)
 846   %vbext = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f32(<vscale x 2 x float> %vb, <vscale x 2 x i1> %m, i32 %evl)
 847   %v = call <vscale x 2 x double> @llvm.vp.fma.nxv2f64(<vscale x 2 x double> %vaext, <vscale x 2 x double> %vbext, <vscale x 2 x double> %vc, <vscale x 2 x i1> %m, i32 %evl)
 848   ret <vscale x 2 x double> %v
 849 }
 850
 851 define <vscale x 2 x double> @vfmacc_vf_nxv2f64_unmasked(<vscale x 2 x float> %va, float %b, <vscale x 2 x double> %vc, i32 zeroext %evl) {
 852 ; CHECK-LABEL: vfmacc_vf_nxv2f64_unmasked:
 853 ; CHECK:       # %bb.0:
 854 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
 855 ; CHECK-NEXT:    vfwmacc.vf v10, fa0, v8
 856 ; CHECK-NEXT:    vmv2r.v v8, v10
 857 ; CHECK-NEXT:    ret
 858   %elt.head = insertelement <vscale x 2 x float> poison, float %b, i32 0
 859   %vb = shufflevector <vscale x 2 x float> %elt.head, <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer
 860   %vaext = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
 861   %vbext = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f32(<vscale x 2 x float> %vb, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
 862   %v = call <vscale x 2 x double> @llvm.vp.fma.nxv2f64(<vscale x 2 x double> %vaext, <vscale x 2 x double> %vbext, <vscale x 2 x double> %vc, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
 863   ret <vscale x 2 x double> %v
 864 }
 865
 866 declare <vscale x 4 x double> @llvm.vp.fma.nxv4f64(<vscale x 4 x double>, <vscale x 4 x double>, <vscale x 4 x double>, <vscale x 4 x i1>, i32)
 867 declare <vscale x 4 x double> @llvm.vp.fneg.nxv4f64(<vscale x 4 x double>, <vscale x 4 x i1>, i32)
 868 declare <vscale x 4 x double> @llvm.vp.merge.nxv4f64(<vscale x 4 x i1>, <vscale x 4 x double>, <vscale x 4 x double>, i32)
 869 declare <vscale x 4 x double> @llvm.vp.select.nxv4f64(<vscale x 4 x i1>, <vscale x 4 x double>, <vscale x 4 x double>, i32)
 870 declare <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i1>, i32)
 871
 872 define <vscale x 4 x double> @vfmacc_vv_nxv4f64(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x double> %c, <vscale x 4 x i1> %m, i32 zeroext %evl) {
 873 ; CHECK-LABEL: vfmacc_vv_nxv4f64:
 874 ; CHECK:       # %bb.0:
 875 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
 876 ; CHECK-NEXT:    vfwmacc.vv v12, v8, v10, v0.t
 877 ; CHECK-NEXT:    vmv4r.v v8, v12
 878 ; CHECK-NEXT:    ret
 879   %aext = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x i1> %m, i32 %evl)
 880   %bext = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f32(<vscale x 4 x float> %b, <vscale x 4 x i1> %m, i32 %evl)
 881   %v = call <vscale x 4 x double> @llvm.vp.fma.nxv4f64(<vscale x 4 x double> %aext, <vscale x 4 x double> %bext, <vscale x 4 x double> %c, <vscale x 4 x i1> %m, i32 %evl)
 882   ret <vscale x 4 x double> %v
 883 }
 884
 885 define <vscale x 4 x double> @vfmacc_vv_nxv4f64_unmasked(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x double> %c, i32 zeroext %evl) {
 886 ; CHECK-LABEL: vfmacc_vv_nxv4f64_unmasked:
 887 ; CHECK:       # %bb.0:
 888 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
 889 ; CHECK-NEXT:    vfwmacc.vv v12, v8, v10
 890 ; CHECK-NEXT:    vmv4r.v v8, v12
 891 ; CHECK-NEXT:    ret
 892   %aext = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
 893   %bext = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f32(<vscale x 4 x float> %b, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
 894   %v = call <vscale x 4 x double> @llvm.vp.fma.nxv4f64(<vscale x 4 x double> %aext, <vscale x 4 x double> %bext, <vscale x 4 x double> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
 895   ret <vscale x 4 x double> %v
 896 }
 897
 898 define <vscale x 4 x double> @vfmacc_vf_nxv4f64(<vscale x 4 x float> %va, float %b, <vscale x 4 x double> %vc, <vscale x 4 x i1> %m, i32 zeroext %evl) {
 899 ; CHECK-LABEL: vfmacc_vf_nxv4f64:
 900 ; CHECK:       # %bb.0:
 901 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
 902 ; CHECK-NEXT:    vfwmacc.vf v12, fa0, v8, v0.t
 903 ; CHECK-NEXT:    vmv4r.v v8, v12
 904 ; CHECK-NEXT:    ret
 905   %elt.head = insertelement <vscale x 4 x float> poison, float %b, i32 0
 906   %vb = shufflevector <vscale x 4 x float> %elt.head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
 907   %vaext = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x i1> %m, i32 %evl)
 908   %vbext = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f32(<vscale x 4 x float> %vb, <vscale x 4 x i1> %m, i32 %evl)
 909   %v = call <vscale x 4 x double> @llvm.vp.fma.nxv4f64(<vscale x 4 x double> %vaext, <vscale x 4 x double> %vbext, <vscale x 4 x double> %vc, <vscale x 4 x i1> %m, i32 %evl)
 910   ret <vscale x 4 x double> %v
 911 }
 912
 913 define <vscale x 4 x double> @vfmacc_vf_nxv4f64_unmasked(<vscale x 4 x float> %va, float %b, <vscale x 4 x double> %vc, i32 zeroext %evl) {
 914 ; CHECK-LABEL: vfmacc_vf_nxv4f64_unmasked:
 915 ; CHECK:       # %bb.0:
 916 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
 917 ; CHECK-NEXT:    vfwmacc.vf v12, fa0, v8
 918 ; CHECK-NEXT:    vmv4r.v v8, v12
 919 ; CHECK-NEXT:    ret
 920   %elt.head = insertelement <vscale x 4 x float> poison, float %b, i32 0
 921   %vb = shufflevector <vscale x 4 x float> %elt.head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
 922   %vaext = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
 923   %vbext = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f32(<vscale x 4 x float> %vb, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
 924   %v = call <vscale x 4 x double> @llvm.vp.fma.nxv4f64(<vscale x 4 x double> %vaext, <vscale x 4 x double> %vbext, <vscale x 4 x double> %vc, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
 925   ret <vscale x 4 x double> %v
 926 }
 927
 928 declare <vscale x 8 x double> @llvm.vp.fma.nxv8f64(<vscale x 8 x double>, <vscale x 8 x double>, <vscale x 8 x double>, <vscale x 8 x i1>, i32)
 929 declare <vscale x 8 x double> @llvm.vp.fneg.nxv8f64(<vscale x 8 x double>, <vscale x 8 x i1>, i32)
 930 declare <vscale x 8 x double> @llvm.vp.merge.nxv8f64(<vscale x 8 x i1>, <vscale x 8 x double>, <vscale x 8 x double>, i32)
 931 declare <vscale x 8 x double> @llvm.vp.select.nxv8f64(<vscale x 8 x i1>, <vscale x 8 x double>, <vscale x 8 x double>, i32)
 932 declare <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f32(<vscale x 8 x float>, <vscale x 8 x i1>, i32)
 933
 934 define <vscale x 8 x double> @vfmacc_vv_nxv8f64(<vscale x 8 x float> %a, <vscale x 8 x float> %b, <vscale x 8 x double> %c, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 935 ; CHECK-LABEL: vfmacc_vv_nxv8f64:
 936 ; CHECK:       # %bb.0:
 937 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
 938 ; CHECK-NEXT:    vfwmacc.vv v16, v8, v12, v0.t
 939 ; CHECK-NEXT:    vmv8r.v v8, v16
 940 ; CHECK-NEXT:    ret
 941   %aext = call <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f32(<vscale x 8 x float> %a, <vscale x 8 x i1> %m, i32 %evl)
 942   %bext = call <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f32(<vscale x 8 x float> %b, <vscale x 8 x i1> %m, i32 %evl)
 943   %v = call <vscale x 8 x double> @llvm.vp.fma.nxv8f64(<vscale x 8 x double> %aext, <vscale x 8 x double> %bext, <vscale x 8 x double> %c, <vscale x 8 x i1> %m, i32 %evl)
 944   ret <vscale x 8 x double> %v
 945 }
 946
 947 define <vscale x 8 x double> @vfmacc_vv_nxv8f64_unmasked(<vscale x 8 x float> %a, <vscale x 8 x float> %b, <vscale x 8 x double> %c, i32 zeroext %evl) {
 948 ; CHECK-LABEL: vfmacc_vv_nxv8f64_unmasked:
 949 ; CHECK:       # %bb.0:
 950 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
 951 ; CHECK-NEXT:    vfwmacc.vv v16, v8, v12
 952 ; CHECK-NEXT:    vmv8r.v v8, v16
 953 ; CHECK-NEXT:    ret
 954   %aext = call <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f32(<vscale x 8 x float> %a, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
 955   %bext = call <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f32(<vscale x 8 x float> %b, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
 956   %v = call <vscale x 8 x double> @llvm.vp.fma.nxv8f64(<vscale x 8 x double> %aext, <vscale x 8 x double> %bext, <vscale x 8 x double> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
 957   ret <vscale x 8 x double> %v
 958 }
 959
 960 define <vscale x 8 x double> @vfmacc_vf_nxv8f64(<vscale x 8 x float> %va, float %b, <vscale x 8 x double> %vc, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 961 ; CHECK-LABEL: vfmacc_vf_nxv8f64:
 962 ; CHECK:       # %bb.0:
 963 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
 964 ; CHECK-NEXT:    vfwmacc.vf v16, fa0, v8, v0.t
 965 ; CHECK-NEXT:    vmv8r.v v8, v16
 966 ; CHECK-NEXT:    ret
 967   %elt.head = insertelement <vscale x 8 x float> poison, float %b, i32 0
 968   %vb = shufflevector <vscale x 8 x float> %elt.head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
 969   %vaext = call <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x i1> %m, i32 %evl)
 970   %vbext = call <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f32(<vscale x 8 x float> %vb, <vscale x 8 x i1> %m, i32 %evl)
 971   %v = call <vscale x 8 x double> @llvm.vp.fma.nxv8f64(<vscale x 8 x double> %vaext, <vscale x 8 x double> %vbext, <vscale x 8 x double> %vc, <vscale x 8 x i1> %m, i32 %evl)
 972   ret <vscale x 8 x double> %v
 973 }
 974
 975 define <vscale x 8 x double> @vfmacc_vf_nxv8f64_unmasked(<vscale x 8 x float> %va, float %b, <vscale x 8 x double> %vc, i32 zeroext %evl) {
 976 ; CHECK-LABEL: vfmacc_vf_nxv8f64_unmasked:
 977 ; CHECK:       # %bb.0:
 978 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
 979 ; CHECK-NEXT:    vfwmacc.vf v16, fa0, v8
 980 ; CHECK-NEXT:    vmv8r.v v8, v16
 981 ; CHECK-NEXT:    ret
 982   %elt.head = insertelement <vscale x 8 x float> poison, float %b, i32 0
 983   %vb = shufflevector <vscale x 8 x float> %elt.head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
 984   %vaext = call <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
 985   %vbext = call <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f32(<vscale x 8 x float> %vb, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
 986   %v = call <vscale x 8 x double> @llvm.vp.fma.nxv8f64(<vscale x 8 x double> %vaext, <vscale x 8 x double> %vbext, <vscale x 8 x double> %vc, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
 987   ret <vscale x 8 x double> %v
 988 }
 989
 990 declare <vscale x 1 x double> @llvm.vp.fpext.nxv1f64.nxv1f16(<vscale x 1 x half>, <vscale x 1 x i1>, i32)
 991
 992 define <vscale x 1 x double> @vfmacc_vv_nxv1f64_nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x half> %b, <vscale x 1 x double> %c, <vscale x 1 x i1> %m, i32 zeroext %evl) {
 993 ; CHECK-LABEL: vfmacc_vv_nxv1f64_nxv1f16:
 994 ; CHECK:       # %bb.0:
 995 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 996 ; CHECK-NEXT:    vfwcvt.f.f.v v11, v8, v0.t
 997 ; CHECK-NEXT:    vfwcvt.f.f.v v8, v9, v0.t
 998 ; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
 999 ; CHECK-NEXT:    vfwmacc.vv v10, v11, v8, v0.t
1000 ; CHECK-NEXT:    vmv1r.v v8, v10
1001 ; CHECK-NEXT:    ret
1002   %aext = call <vscale x 1 x double> @llvm.vp.fpext.nxv1f64.nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x i1> %m, i32 %evl)
1003   %bext = call <vscale x 1 x double> @llvm.vp.fpext.nxv1f64.nxv1f16(<vscale x 1 x half> %b, <vscale x 1 x i1> %m, i32 %evl)
1004   %v = call <vscale x 1 x double> @llvm.vp.fma.nxv1f64(<vscale x 1 x double> %aext, <vscale x 1 x double> %bext, <vscale x 1 x double> %c, <vscale x 1 x i1> %m, i32 %evl)
1005   ret <vscale x 1 x double> %v
1006 }
1007
1008 define <vscale x 1 x double> @vfmacc_vv_nxv1f64_nxv1f16_unmasked(<vscale x 1 x half> %a, <vscale x 1 x half> %b, <vscale x 1 x double> %c, i32 zeroext %evl) {
1009 ; CHECK-LABEL: vfmacc_vv_nxv1f64_nxv1f16_unmasked:
1010 ; CHECK:       # %bb.0:
1011 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
1012 ; CHECK-NEXT:    vfwcvt.f.f.v v11, v8
1013 ; CHECK-NEXT:    vfwcvt.f.f.v v8, v9
1014 ; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
1015 ; CHECK-NEXT:    vfwmacc.vv v10, v11, v8
1016 ; CHECK-NEXT:    vmv1r.v v8, v10
1017 ; CHECK-NEXT:    ret
1018   %aext = call <vscale x 1 x double> @llvm.vp.fpext.nxv1f64.nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
1019   %bext = call <vscale x 1 x double> @llvm.vp.fpext.nxv1f64.nxv1f16(<vscale x 1 x half> %b, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
1020   %v = call <vscale x 1 x double> @llvm.vp.fma.nxv1f64(<vscale x 1 x double> %aext, <vscale x 1 x double> %bext, <vscale x 1 x double> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
1021   ret <vscale x 1 x double> %v
1022 }
1023
1024 declare <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f16(<vscale x 2 x half>, <vscale x 2 x i1>, i32)
1025
1026 define <vscale x 2 x double> @vfmacc_vv_nxv2f64_nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b, <vscale x 2 x double> %c, <vscale x 2 x i1> %m, i32 zeroext %evl) {
1027 ; CHECK-LABEL: vfmacc_vv_nxv2f64_nxv2f16:
1028 ; CHECK:       # %bb.0:
1029 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
1030 ; CHECK-NEXT:    vfwcvt.f.f.v v12, v8, v0.t
1031 ; CHECK-NEXT:    vfwcvt.f.f.v v8, v9, v0.t
1032 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
1033 ; CHECK-NEXT:    vfwmacc.vv v10, v12, v8, v0.t
1034 ; CHECK-NEXT:    vmv2r.v v8, v10
1035 ; CHECK-NEXT:    ret
1036   %aext = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x i1> %m, i32 %evl)
1037   %bext = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f16(<vscale x 2 x half> %b, <vscale x 2 x i1> %m, i32 %evl)
1038   %v = call <vscale x 2 x double> @llvm.vp.fma.nxv2f64(<vscale x 2 x double> %aext, <vscale x 2 x double> %bext, <vscale x 2 x double> %c, <vscale x 2 x i1> %m, i32 %evl)
1039   ret <vscale x 2 x double> %v
1040 }
1041
1042 define <vscale x 2 x double> @vfmacc_vv_nxv2f64_nxv2f16_unmasked(<vscale x 2 x half> %a, <vscale x 2 x half> %b, <vscale x 2 x double> %c, i32 zeroext %evl) {
1043 ; CHECK-LABEL: vfmacc_vv_nxv2f64_nxv2f16_unmasked:
1044 ; CHECK:       # %bb.0:
1045 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
1046 ; CHECK-NEXT:    vfwcvt.f.f.v v12, v8
1047 ; CHECK-NEXT:    vfwcvt.f.f.v v8, v9
1048 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
1049 ; CHECK-NEXT:    vfwmacc.vv v10, v12, v8
1050 ; CHECK-NEXT:    vmv2r.v v8, v10
1051 ; CHECK-NEXT:    ret
1052   %aext = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
1053   %bext = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f16(<vscale x 2 x half> %b, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
1054   %v = call <vscale x 2 x double> @llvm.vp.fma.nxv2f64(<vscale x 2 x double> %aext, <vscale x 2 x double> %bext, <vscale x 2 x double> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
1055   ret <vscale x 2 x double> %v
1056 }
1057
1058 declare <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f16(<vscale x 4 x half>, <vscale x 4 x i1>, i32)
1059
1060 define <vscale x 4 x double> @vfmacc_vv_nxv4f64_nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b, <vscale x 4 x double> %c, <vscale x 4 x i1> %m, i32 zeroext %evl) {
1061 ; CHECK-LABEL: vfmacc_vv_nxv4f64_nxv4f16:
1062 ; CHECK:       # %bb.0:
1063 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
1064 ; CHECK-NEXT:    vfwcvt.f.f.v v10, v8, v0.t
1065 ; CHECK-NEXT:    vfwcvt.f.f.v v16, v9, v0.t
1066 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
1067 ; CHECK-NEXT:    vfwmacc.vv v12, v10, v16, v0.t
1068 ; CHECK-NEXT:    vmv4r.v v8, v12
1069 ; CHECK-NEXT:    ret
1070   %aext = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x i1> %m, i32 %evl)
1071   %bext = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f16(<vscale x 4 x half> %b, <vscale x 4 x i1> %m, i32 %evl)
1072   %v = call <vscale x 4 x double> @llvm.vp.fma.nxv4f64(<vscale x 4 x double> %aext, <vscale x 4 x double> %bext, <vscale x 4 x double> %c, <vscale x 4 x i1> %m, i32 %evl)
1073   ret <vscale x 4 x double> %v
1074 }
1075
1076 define <vscale x 4 x double> @vfmacc_vv_nxv4f64_nxv4f16_unmasked(<vscale x 4 x half> %a, <vscale x 4 x half> %b, <vscale x 4 x double> %c, i32 zeroext %evl) {
1077 ; CHECK-LABEL: vfmacc_vv_nxv4f64_nxv4f16_unmasked:
1078 ; CHECK:       # %bb.0:
1079 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
1080 ; CHECK-NEXT:    vfwcvt.f.f.v v10, v8
1081 ; CHECK-NEXT:    vfwcvt.f.f.v v16, v9
1082 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
1083 ; CHECK-NEXT:    vfwmacc.vv v12, v10, v16
1084 ; CHECK-NEXT:    vmv4r.v v8, v12
1085 ; CHECK-NEXT:    ret
1086   %aext = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
1087   %bext = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f16(<vscale x 4 x half> %b, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
1088   %v = call <vscale x 4 x double> @llvm.vp.fma.nxv4f64(<vscale x 4 x double> %aext, <vscale x 4 x double> %bext, <vscale x 4 x double> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
1089   ret <vscale x 4 x double> %v
1090 }
1091
1092 declare <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f16(<vscale x 8 x half>, <vscale x 8 x i1>, i32)
1093
1094 define <vscale x 8 x double> @vfmacc_vv_nxv8f64_nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x double> %c, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1095 ; CHECK-LABEL: vfmacc_vv_nxv8f64_nxv8f16:
1096 ; CHECK:       # %bb.0:
1097 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
1098 ; CHECK-NEXT:    vfwcvt.f.f.v v12, v8, v0.t
1099 ; CHECK-NEXT:    vfwcvt.f.f.v v24, v10, v0.t
1100 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
1101 ; CHECK-NEXT:    vfwmacc.vv v16, v12, v24, v0.t
1102 ; CHECK-NEXT:    vmv8r.v v8, v16
1103 ; CHECK-NEXT:    ret
1104   %aext = call <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x i1> %m, i32 %evl)
1105   %bext = call <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f16(<vscale x 8 x half> %b, <vscale x 8 x i1> %m, i32 %evl)
1106   %v = call <vscale x 8 x double> @llvm.vp.fma.nxv8f64(<vscale x 8 x double> %aext, <vscale x 8 x double> %bext, <vscale x 8 x double> %c, <vscale x 8 x i1> %m, i32 %evl)
1107   ret <vscale x 8 x double> %v
1108 }
1109
1110 define <vscale x 8 x double> @vfmacc_vv_nxv8f64_nxv8f16_unmasked(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x double> %c, i32 zeroext %evl) {
1111 ; CHECK-LABEL: vfmacc_vv_nxv8f64_nxv8f16_unmasked:
1112 ; CHECK:       # %bb.0:
1113 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
1114 ; CHECK-NEXT:    vfwcvt.f.f.v v12, v8
1115 ; CHECK-NEXT:    vfwcvt.f.f.v v24, v10
1116 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
1117 ; CHECK-NEXT:    vfwmacc.vv v16, v12, v24
1118 ; CHECK-NEXT:    vmv8r.v v8, v16
1119 ; CHECK-NEXT:    ret
1120   %aext = call <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
1121   %bext = call <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f16(<vscale x 8 x half> %b, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
1122   %v = call <vscale x 8 x double> @llvm.vp.fma.nxv8f64(<vscale x 8 x double> %aext, <vscale x 8 x double> %bext, <vscale x 8 x double> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
1123   ret <vscale x 8 x double> %v
1124 }
1125
1126 define <vscale x 1 x float> @vfmacc_squared_nxv1f32(<vscale x 1 x half> %a, <vscale x 1 x half> %b, <vscale x 1 x float> %c, <vscale x 1 x i1> %m, i32 zeroext %evl) {
1127 ; ZVFH-LABEL: vfmacc_squared_nxv1f32:
1128 ; ZVFH:       # %bb.0:
1129 ; ZVFH-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
1130 ; ZVFH-NEXT:    vfwmacc.vv v10, v8, v8, v0.t
1131 ; ZVFH-NEXT:    vmv1r.v v8, v10
1132 ; ZVFH-NEXT:    ret
1133 ;
1134 ; ZVFHMIN-LABEL: vfmacc_squared_nxv1f32:
1135 ; ZVFHMIN:       # %bb.0:
1136 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
1137 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8, v0.t
1138 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
1139 ; ZVFHMIN-NEXT:    vfmadd.vv v9, v9, v10, v0.t
1140 ; ZVFHMIN-NEXT:    vmv1r.v v8, v9
1141 ; ZVFHMIN-NEXT:    ret
1142   %aext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x i1> %m, i32 %evl)
1143   %v = call <vscale x 1 x float> @llvm.vp.fma.nxv1f32(<vscale x 1 x float> %aext, <vscale x 1 x float> %aext, <vscale x 1 x float> %c, <vscale x 1 x i1> %m, i32 %evl)
1144   ret <vscale x 1 x float> %v
1145 }
1146
1147 define <vscale x 1 x float> @vfmacc_squared_nxv1f32_unmasked(<vscale x 1 x half> %a, <vscale x 1 x half> %b, <vscale x 1 x float> %c, i32 zeroext %evl) {
1148 ; ZVFH-LABEL: vfmacc_squared_nxv1f32_unmasked:
1149 ; ZVFH:       # %bb.0:
1150 ; ZVFH-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
1151 ; ZVFH-NEXT:    vfwmacc.vv v10, v8, v8
1152 ; ZVFH-NEXT:    vmv1r.v v8, v10
1153 ; ZVFH-NEXT:    ret
1154 ;
1155 ; ZVFHMIN-LABEL: vfmacc_squared_nxv1f32_unmasked:
1156 ; ZVFHMIN:       # %bb.0:
1157 ; ZVFHMIN-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
1158 ; ZVFHMIN-NEXT:    vfwcvt.f.f.v v9, v8
1159 ; ZVFHMIN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
1160 ; ZVFHMIN-NEXT:    vfmadd.vv v9, v9, v10
1161 ; ZVFHMIN-NEXT:    vmv1r.v v8, v9
1162 ; ZVFHMIN-NEXT:    ret
1163   %aext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
1164   %v = call <vscale x 1 x float> @llvm.vp.fma.nxv1f32(<vscale x 1 x float> %aext, <vscale x 1 x float> %aext, <vscale x 1 x float> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
1165   ret <vscale x 1 x float> %v
1166 }