llvm/test/CodeGen/RISCV/rvv/vmadd-vp.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
   2 ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=ilp32d \
   3 ; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
   4 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=lp64d \
   5 ; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
   6
   7 declare <vscale x 1 x i8> @llvm.vp.mul.nxv1i8(<vscale x 1 x i8>, <vscale x 1 x i8>, <vscale x 1 x i1>, i32)
   8 declare <vscale x 1 x i8> @llvm.vp.add.nxv1i8(<vscale x 1 x i8>, <vscale x 1 x i8>, <vscale x 1 x i1>, i32)
   9 declare <vscale x 1 x i8> @llvm.vp.merge.nxv1i8(<vscale x 1 x i1>, <vscale x 1 x i8>, <vscale x 1 x i8>, i32)
  10 declare <vscale x 1 x i8> @llvm.vp.select.nxv1i8(<vscale x 1 x i1>, <vscale x 1 x i8>, <vscale x 1 x i8>, i32)
  11
  12 define <vscale x 1 x i8> @vmadd_vv_nxv1i8(<vscale x 1 x i8> %a, <vscale x 1 x i8> %b, <vscale x 1 x i8> %c,  <vscale x 1 x i1> %m, i32 zeroext %evl) {
  13 ; CHECK-LABEL: vmadd_vv_nxv1i8:
  14 ; CHECK:       # %bb.0:
  15 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
  16 ; CHECK-NEXT:    vmadd.vv v9, v8, v10
  17 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf8, tu, ma
  18 ; CHECK-NEXT:    vmerge.vvm v8, v8, v9, v0
  19 ; CHECK-NEXT:    ret
  20   %splat = insertelement <vscale x 1 x i1> poison, i1 -1, i32 0
  21   %allones = shufflevector <vscale x 1 x i1> %splat, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
  22   %x = call <vscale x 1 x i8> @llvm.vp.mul.nxv1i8(<vscale x 1 x i8> %a, <vscale x 1 x i8> %b, <vscale x 1 x i1> %allones, i32 %evl)
  23   %y = call <vscale x 1 x i8> @llvm.vp.add.nxv1i8(<vscale x 1 x i8> %x, <vscale x 1 x i8> %c, <vscale x 1 x i1> %allones, i32 %evl)
  24   %u = call <vscale x 1 x i8> @llvm.vp.merge.nxv1i8(<vscale x 1 x i1> %m, <vscale x 1 x i8> %y, <vscale x 1 x i8> %a, i32 %evl)
  25   ret <vscale x 1 x i8> %u
  26 }
  27
  28 define <vscale x 1 x i8> @vmadd_vv_nxv1i8_unmasked(<vscale x 1 x i8> %a, <vscale x 1 x i8> %b, <vscale x 1 x i8> %c,  <vscale x 1 x i1> %m, i32 zeroext %evl) {
  29 ; CHECK-LABEL: vmadd_vv_nxv1i8_unmasked:
  30 ; CHECK:       # %bb.0:
  31 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
  32 ; CHECK-NEXT:    vmadd.vv v9, v8, v10
  33 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf8, tu, ma
  34 ; CHECK-NEXT:    vmv.v.v v8, v9
  35 ; CHECK-NEXT:    ret
  36   %splat = insertelement <vscale x 1 x i1> poison, i1 -1, i32 0
  37   %allones = shufflevector <vscale x 1 x i1> %splat, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
  38   %x = call <vscale x 1 x i8> @llvm.vp.mul.nxv1i8(<vscale x 1 x i8> %a, <vscale x 1 x i8> %b, <vscale x 1 x i1> %allones, i32 %evl)
  39   %y = call <vscale x 1 x i8> @llvm.vp.add.nxv1i8(<vscale x 1 x i8> %x, <vscale x 1 x i8> %c, <vscale x 1 x i1> %allones, i32 %evl)
  40   %u = call <vscale x 1 x i8> @llvm.vp.merge.nxv1i8(<vscale x 1 x i1> %allones, <vscale x 1 x i8> %y, <vscale x 1 x i8> %a, i32 %evl)
  41   ret <vscale x 1 x i8> %u
  42 }
  43
  44 define <vscale x 1 x i8> @vmadd_vx_nxv1i8(<vscale x 1 x i8> %a, i8 %b, <vscale x 1 x i8> %c,  <vscale x 1 x i1> %m, i32 zeroext %evl) {
  45 ; CHECK-LABEL: vmadd_vx_nxv1i8:
  46 ; CHECK:       # %bb.0:
  47 ; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, tu, mu
  48 ; CHECK-NEXT:    vmadd.vx v8, a0, v9, v0.t
  49 ; CHECK-NEXT:    ret
  50   %elt.head = insertelement <vscale x 1 x i8> poison, i8 %b, i32 0
  51   %vb = shufflevector <vscale x 1 x i8> %elt.head, <vscale x 1 x i8> poison, <vscale x 1 x i32> zeroinitializer
  52   %splat = insertelement <vscale x 1 x i1> poison, i1 -1, i32 0
  53   %allones = shufflevector <vscale x 1 x i1> %splat, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
  54   %x = call <vscale x 1 x i8> @llvm.vp.mul.nxv1i8(<vscale x 1 x i8> %a, <vscale x 1 x i8> %vb, <vscale x 1 x i1> %allones, i32 %evl)
  55   %y = call <vscale x 1 x i8> @llvm.vp.add.nxv1i8(<vscale x 1 x i8> %x, <vscale x 1 x i8> %c, <vscale x 1 x i1> %allones, i32 %evl)
  56   %u = call <vscale x 1 x i8> @llvm.vp.merge.nxv1i8(<vscale x 1 x i1> %m, <vscale x 1 x i8> %y, <vscale x 1 x i8> %a, i32 %evl)
  57   ret <vscale x 1 x i8> %u
  58 }
  59
  60 define <vscale x 1 x i8> @vmadd_vx_nxv1i8_unmasked(<vscale x 1 x i8> %a, i8 %b, <vscale x 1 x i8> %c,  <vscale x 1 x i1> %m, i32 zeroext %evl) {
  61 ; CHECK-LABEL: vmadd_vx_nxv1i8_unmasked:
  62 ; CHECK:       # %bb.0:
  63 ; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, tu, ma
  64 ; CHECK-NEXT:    vmadd.vx v8, a0, v9
  65 ; CHECK-NEXT:    ret
  66   %elt.head = insertelement <vscale x 1 x i8> poison, i8 %b, i32 0
  67   %vb = shufflevector <vscale x 1 x i8> %elt.head, <vscale x 1 x i8> poison, <vscale x 1 x i32> zeroinitializer
  68   %splat = insertelement <vscale x 1 x i1> poison, i1 -1, i32 0
  69   %allones = shufflevector <vscale x 1 x i1> %splat, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
  70   %x = call <vscale x 1 x i8> @llvm.vp.mul.nxv1i8(<vscale x 1 x i8> %a, <vscale x 1 x i8> %vb, <vscale x 1 x i1> %allones, i32 %evl)
  71   %y = call <vscale x 1 x i8> @llvm.vp.add.nxv1i8(<vscale x 1 x i8> %x, <vscale x 1 x i8> %c, <vscale x 1 x i1> %allones, i32 %evl)
  72   %u = call <vscale x 1 x i8> @llvm.vp.merge.nxv1i8(<vscale x 1 x i1> %allones, <vscale x 1 x i8> %y, <vscale x 1 x i8> %a, i32 %evl)
  73   ret <vscale x 1 x i8> %u
  74 }
  75
  76 define <vscale x 1 x i8> @vmadd_vv_nxv1i8_ta(<vscale x 1 x i8> %a, <vscale x 1 x i8> %b, <vscale x 1 x i8> %c,  <vscale x 1 x i1> %m, i32 zeroext %evl) {
  77 ; CHECK-LABEL: vmadd_vv_nxv1i8_ta:
  78 ; CHECK:       # %bb.0:
  79 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
  80 ; CHECK-NEXT:    vmadd.vv v9, v8, v10
  81 ; CHECK-NEXT:    vmerge.vvm v8, v8, v9, v0
  82 ; CHECK-NEXT:    ret
  83   %splat = insertelement <vscale x 1 x i1> poison, i1 -1, i32 0
  84   %allones = shufflevector <vscale x 1 x i1> %splat, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
  85   %x = call <vscale x 1 x i8> @llvm.vp.mul.nxv1i8(<vscale x 1 x i8> %a, <vscale x 1 x i8> %b, <vscale x 1 x i1> %allones, i32 %evl)
  86   %y = call <vscale x 1 x i8> @llvm.vp.add.nxv1i8(<vscale x 1 x i8> %x, <vscale x 1 x i8> %c, <vscale x 1 x i1> %allones, i32 %evl)
  87   %u = call <vscale x 1 x i8> @llvm.vp.select.nxv1i8(<vscale x 1 x i1> %m, <vscale x 1 x i8> %y, <vscale x 1 x i8> %a, i32 %evl)
  88   ret <vscale x 1 x i8> %u
  89 }
  90
  91 define <vscale x 1 x i8> @vmadd_vx_nxv1i8_ta(<vscale x 1 x i8> %a, i8 %b, <vscale x 1 x i8> %c,  <vscale x 1 x i1> %m, i32 zeroext %evl) {
  92 ; CHECK-LABEL: vmadd_vx_nxv1i8_ta:
  93 ; CHECK:       # %bb.0:
  94 ; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
  95 ; CHECK-NEXT:    vmacc.vx v9, a0, v8
  96 ; CHECK-NEXT:    vmerge.vvm v8, v8, v9, v0
  97 ; CHECK-NEXT:    ret
  98   %elt.head = insertelement <vscale x 1 x i8> poison, i8 %b, i32 0
  99   %vb = shufflevector <vscale x 1 x i8> %elt.head, <vscale x 1 x i8> poison, <vscale x 1 x i32> zeroinitializer
 100   %splat = insertelement <vscale x 1 x i1> poison, i1 -1, i32 0
 101   %allones = shufflevector <vscale x 1 x i1> %splat, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
 102   %x = call <vscale x 1 x i8> @llvm.vp.mul.nxv1i8(<vscale x 1 x i8> %a, <vscale x 1 x i8> %vb, <vscale x 1 x i1> %allones, i32 %evl)
 103   %y = call <vscale x 1 x i8> @llvm.vp.add.nxv1i8(<vscale x 1 x i8> %x, <vscale x 1 x i8> %c, <vscale x 1 x i1> %allones, i32 %evl)
 104   %u = call <vscale x 1 x i8> @llvm.vp.select.nxv1i8(<vscale x 1 x i1> %m, <vscale x 1 x i8> %y, <vscale x 1 x i8> %a, i32 %evl)
 105   ret <vscale x 1 x i8> %u
 106 }
 107
 108 declare <vscale x 2 x i8> @llvm.vp.mul.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i8>, <vscale x 2 x i1>, i32)
 109 declare <vscale x 2 x i8> @llvm.vp.add.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i8>, <vscale x 2 x i1>, i32)
 110 declare <vscale x 2 x i8> @llvm.vp.merge.nxv2i8(<vscale x 2 x i1>, <vscale x 2 x i8>, <vscale x 2 x i8>, i32)
 111 declare <vscale x 2 x i8> @llvm.vp.select.nxv2i8(<vscale x 2 x i1>, <vscale x 2 x i8>, <vscale x 2 x i8>, i32)
 112
 113 define <vscale x 2 x i8> @vmadd_vv_nxv2i8(<vscale x 2 x i8> %a, <vscale x 2 x i8> %b, <vscale x 2 x i8> %c,  <vscale x 2 x i1> %m, i32 zeroext %evl) {
 114 ; CHECK-LABEL: vmadd_vv_nxv2i8:
 115 ; CHECK:       # %bb.0:
 116 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
 117 ; CHECK-NEXT:    vmadd.vv v9, v8, v10
 118 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf4, tu, ma
 119 ; CHECK-NEXT:    vmerge.vvm v8, v8, v9, v0
 120 ; CHECK-NEXT:    ret
 121   %splat = insertelement <vscale x 2 x i1> poison, i1 -1, i32 0
 122   %allones = shufflevector <vscale x 2 x i1> %splat, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
 123   %x = call <vscale x 2 x i8> @llvm.vp.mul.nxv2i8(<vscale x 2 x i8> %a, <vscale x 2 x i8> %b, <vscale x 2 x i1> %allones, i32 %evl)
 124   %y = call <vscale x 2 x i8> @llvm.vp.add.nxv2i8(<vscale x 2 x i8> %x, <vscale x 2 x i8> %c, <vscale x 2 x i1> %allones, i32 %evl)
 125   %u = call <vscale x 2 x i8> @llvm.vp.merge.nxv2i8(<vscale x 2 x i1> %m, <vscale x 2 x i8> %y, <vscale x 2 x i8> %a, i32 %evl)
 126   ret <vscale x 2 x i8> %u
 127 }
 128
 129 define <vscale x 2 x i8> @vmadd_vv_nxv2i8_unmasked(<vscale x 2 x i8> %a, <vscale x 2 x i8> %b, <vscale x 2 x i8> %c,  <vscale x 2 x i1> %m, i32 zeroext %evl) {
 130 ; CHECK-LABEL: vmadd_vv_nxv2i8_unmasked:
 131 ; CHECK:       # %bb.0:
 132 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
 133 ; CHECK-NEXT:    vmadd.vv v9, v8, v10
 134 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf4, tu, ma
 135 ; CHECK-NEXT:    vmv.v.v v8, v9
 136 ; CHECK-NEXT:    ret
 137   %splat = insertelement <vscale x 2 x i1> poison, i1 -1, i32 0
 138   %allones = shufflevector <vscale x 2 x i1> %splat, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
 139   %x = call <vscale x 2 x i8> @llvm.vp.mul.nxv2i8(<vscale x 2 x i8> %a, <vscale x 2 x i8> %b, <vscale x 2 x i1> %allones, i32 %evl)
 140   %y = call <vscale x 2 x i8> @llvm.vp.add.nxv2i8(<vscale x 2 x i8> %x, <vscale x 2 x i8> %c, <vscale x 2 x i1> %allones, i32 %evl)
 141   %u = call <vscale x 2 x i8> @llvm.vp.merge.nxv2i8(<vscale x 2 x i1> %allones, <vscale x 2 x i8> %y, <vscale x 2 x i8> %a, i32 %evl)
 142   ret <vscale x 2 x i8> %u
 143 }
 144
 145 define <vscale x 2 x i8> @vmadd_vx_nxv2i8(<vscale x 2 x i8> %a, i8 %b, <vscale x 2 x i8> %c,  <vscale x 2 x i1> %m, i32 zeroext %evl) {
 146 ; CHECK-LABEL: vmadd_vx_nxv2i8:
 147 ; CHECK:       # %bb.0:
 148 ; CHECK-NEXT:    vsetvli zero, a1, e8, mf4, tu, mu
 149 ; CHECK-NEXT:    vmadd.vx v8, a0, v9, v0.t
 150 ; CHECK-NEXT:    ret
 151   %elt.head = insertelement <vscale x 2 x i8> poison, i8 %b, i32 0
 152   %vb = shufflevector <vscale x 2 x i8> %elt.head, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer
 153   %splat = insertelement <vscale x 2 x i1> poison, i1 -1, i32 0
 154   %allones = shufflevector <vscale x 2 x i1> %splat, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
 155   %x = call <vscale x 2 x i8> @llvm.vp.mul.nxv2i8(<vscale x 2 x i8> %a, <vscale x 2 x i8> %vb, <vscale x 2 x i1> %allones, i32 %evl)
 156   %y = call <vscale x 2 x i8> @llvm.vp.add.nxv2i8(<vscale x 2 x i8> %x, <vscale x 2 x i8> %c, <vscale x 2 x i1> %allones, i32 %evl)
 157   %u = call <vscale x 2 x i8> @llvm.vp.merge.nxv2i8(<vscale x 2 x i1> %m, <vscale x 2 x i8> %y, <vscale x 2 x i8> %a, i32 %evl)
 158   ret <vscale x 2 x i8> %u
 159 }
 160
 161 define <vscale x 2 x i8> @vmadd_vx_nxv2i8_unmasked(<vscale x 2 x i8> %a, i8 %b, <vscale x 2 x i8> %c,  <vscale x 2 x i1> %m, i32 zeroext %evl) {
 162 ; CHECK-LABEL: vmadd_vx_nxv2i8_unmasked:
 163 ; CHECK:       # %bb.0:
 164 ; CHECK-NEXT:    vsetvli zero, a1, e8, mf4, tu, ma
 165 ; CHECK-NEXT:    vmadd.vx v8, a0, v9
 166 ; CHECK-NEXT:    ret
 167   %elt.head = insertelement <vscale x 2 x i8> poison, i8 %b, i32 0
 168   %vb = shufflevector <vscale x 2 x i8> %elt.head, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer
 169   %splat = insertelement <vscale x 2 x i1> poison, i1 -1, i32 0
 170   %allones = shufflevector <vscale x 2 x i1> %splat, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
 171   %x = call <vscale x 2 x i8> @llvm.vp.mul.nxv2i8(<vscale x 2 x i8> %a, <vscale x 2 x i8> %vb, <vscale x 2 x i1> %allones, i32 %evl)
 172   %y = call <vscale x 2 x i8> @llvm.vp.add.nxv2i8(<vscale x 2 x i8> %x, <vscale x 2 x i8> %c, <vscale x 2 x i1> %allones, i32 %evl)
 173   %u = call <vscale x 2 x i8> @llvm.vp.merge.nxv2i8(<vscale x 2 x i1> %allones, <vscale x 2 x i8> %y, <vscale x 2 x i8> %a, i32 %evl)
 174   ret <vscale x 2 x i8> %u
 175 }
 176
 177 define <vscale x 2 x i8> @vmadd_vv_nxv2i8_ta(<vscale x 2 x i8> %a, <vscale x 2 x i8> %b, <vscale x 2 x i8> %c,  <vscale x 2 x i1> %m, i32 zeroext %evl) {
 178 ; CHECK-LABEL: vmadd_vv_nxv2i8_ta:
 179 ; CHECK:       # %bb.0:
 180 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
 181 ; CHECK-NEXT:    vmadd.vv v9, v8, v10
 182 ; CHECK-NEXT:    vmerge.vvm v8, v8, v9, v0
 183 ; CHECK-NEXT:    ret
 184   %splat = insertelement <vscale x 2 x i1> poison, i1 -1, i32 0
 185   %allones = shufflevector <vscale x 2 x i1> %splat, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
 186   %x = call <vscale x 2 x i8> @llvm.vp.mul.nxv2i8(<vscale x 2 x i8> %a, <vscale x 2 x i8> %b, <vscale x 2 x i1> %allones, i32 %evl)
 187   %y = call <vscale x 2 x i8> @llvm.vp.add.nxv2i8(<vscale x 2 x i8> %x, <vscale x 2 x i8> %c, <vscale x 2 x i1> %allones, i32 %evl)
 188   %u = call <vscale x 2 x i8> @llvm.vp.select.nxv2i8(<vscale x 2 x i1> %m, <vscale x 2 x i8> %y, <vscale x 2 x i8> %a, i32 %evl)
 189   ret <vscale x 2 x i8> %u
 190 }
 191
 192 define <vscale x 2 x i8> @vmadd_vx_nxv2i8_ta(<vscale x 2 x i8> %a, i8 %b, <vscale x 2 x i8> %c,  <vscale x 2 x i1> %m, i32 zeroext %evl) {
 193 ; CHECK-LABEL: vmadd_vx_nxv2i8_ta:
 194 ; CHECK:       # %bb.0:
 195 ; CHECK-NEXT:    vsetvli zero, a1, e8, mf4, ta, ma
 196 ; CHECK-NEXT:    vmacc.vx v9, a0, v8
 197 ; CHECK-NEXT:    vmerge.vvm v8, v8, v9, v0
 198 ; CHECK-NEXT:    ret
 199   %elt.head = insertelement <vscale x 2 x i8> poison, i8 %b, i32 0
 200   %vb = shufflevector <vscale x 2 x i8> %elt.head, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer
 201   %splat = insertelement <vscale x 2 x i1> poison, i1 -1, i32 0
 202   %allones = shufflevector <vscale x 2 x i1> %splat, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
 203   %x = call <vscale x 2 x i8> @llvm.vp.mul.nxv2i8(<vscale x 2 x i8> %a, <vscale x 2 x i8> %vb, <vscale x 2 x i1> %allones, i32 %evl)
 204   %y = call <vscale x 2 x i8> @llvm.vp.add.nxv2i8(<vscale x 2 x i8> %x, <vscale x 2 x i8> %c, <vscale x 2 x i1> %allones, i32 %evl)
 205   %u = call <vscale x 2 x i8> @llvm.vp.select.nxv2i8(<vscale x 2 x i1> %m, <vscale x 2 x i8> %y, <vscale x 2 x i8> %a, i32 %evl)
 206   ret <vscale x 2 x i8> %u
 207 }
 208
 209 declare <vscale x 4 x i8> @llvm.vp.mul.nxv4i8(<vscale x 4 x i8>, <vscale x 4 x i8>, <vscale x 4 x i1>, i32)
 210 declare <vscale x 4 x i8> @llvm.vp.add.nxv4i8(<vscale x 4 x i8>, <vscale x 4 x i8>, <vscale x 4 x i1>, i32)
 211 declare <vscale x 4 x i8> @llvm.vp.merge.nxv4i8(<vscale x 4 x i1>, <vscale x 4 x i8>, <vscale x 4 x i8>, i32)
 212 declare <vscale x 4 x i8> @llvm.vp.select.nxv4i8(<vscale x 4 x i1>, <vscale x 4 x i8>, <vscale x 4 x i8>, i32)
 213
 214 define <vscale x 4 x i8> @vmadd_vv_nxv4i8(<vscale x 4 x i8> %a, <vscale x 4 x i8> %b, <vscale x 4 x i8> %c,  <vscale x 4 x i1> %m, i32 zeroext %evl) {
 215 ; CHECK-LABEL: vmadd_vv_nxv4i8:
 216 ; CHECK:       # %bb.0:
 217 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
 218 ; CHECK-NEXT:    vmadd.vv v9, v8, v10
 219 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf2, tu, ma
 220 ; CHECK-NEXT:    vmerge.vvm v8, v8, v9, v0
 221 ; CHECK-NEXT:    ret
 222   %splat = insertelement <vscale x 4 x i1> poison, i1 -1, i32 0
 223   %allones = shufflevector <vscale x 4 x i1> %splat, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
 224   %x = call <vscale x 4 x i8> @llvm.vp.mul.nxv4i8(<vscale x 4 x i8> %a, <vscale x 4 x i8> %b, <vscale x 4 x i1> %allones, i32 %evl)
 225   %y = call <vscale x 4 x i8> @llvm.vp.add.nxv4i8(<vscale x 4 x i8> %x, <vscale x 4 x i8> %c, <vscale x 4 x i1> %allones, i32 %evl)
 226   %u = call <vscale x 4 x i8> @llvm.vp.merge.nxv4i8(<vscale x 4 x i1> %m, <vscale x 4 x i8> %y, <vscale x 4 x i8> %a, i32 %evl)
 227   ret <vscale x 4 x i8> %u
 228 }
 229
 230 define <vscale x 4 x i8> @vmadd_vv_nxv4i8_unmasked(<vscale x 4 x i8> %a, <vscale x 4 x i8> %b, <vscale x 4 x i8> %c,  <vscale x 4 x i1> %m, i32 zeroext %evl) {
 231 ; CHECK-LABEL: vmadd_vv_nxv4i8_unmasked:
 232 ; CHECK:       # %bb.0:
 233 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
 234 ; CHECK-NEXT:    vmadd.vv v9, v8, v10
 235 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf2, tu, ma
 236 ; CHECK-NEXT:    vmv.v.v v8, v9
 237 ; CHECK-NEXT:    ret
 238   %splat = insertelement <vscale x 4 x i1> poison, i1 -1, i32 0
 239   %allones = shufflevector <vscale x 4 x i1> %splat, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
 240   %x = call <vscale x 4 x i8> @llvm.vp.mul.nxv4i8(<vscale x 4 x i8> %a, <vscale x 4 x i8> %b, <vscale x 4 x i1> %allones, i32 %evl)
 241   %y = call <vscale x 4 x i8> @llvm.vp.add.nxv4i8(<vscale x 4 x i8> %x, <vscale x 4 x i8> %c, <vscale x 4 x i1> %allones, i32 %evl)
 242   %u = call <vscale x 4 x i8> @llvm.vp.merge.nxv4i8(<vscale x 4 x i1> %allones, <vscale x 4 x i8> %y, <vscale x 4 x i8> %a, i32 %evl)
 243   ret <vscale x 4 x i8> %u
 244 }
 245
 246 define <vscale x 4 x i8> @vmadd_vx_nxv4i8(<vscale x 4 x i8> %a, i8 %b, <vscale x 4 x i8> %c,  <vscale x 4 x i1> %m, i32 zeroext %evl) {
 247 ; CHECK-LABEL: vmadd_vx_nxv4i8:
 248 ; CHECK:       # %bb.0:
 249 ; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, tu, mu
 250 ; CHECK-NEXT:    vmadd.vx v8, a0, v9, v0.t
 251 ; CHECK-NEXT:    ret
 252   %elt.head = insertelement <vscale x 4 x i8> poison, i8 %b, i32 0
 253   %vb = shufflevector <vscale x 4 x i8> %elt.head, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer
 254   %splat = insertelement <vscale x 4 x i1> poison, i1 -1, i32 0
 255   %allones = shufflevector <vscale x 4 x i1> %splat, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
 256   %x = call <vscale x 4 x i8> @llvm.vp.mul.nxv4i8(<vscale x 4 x i8> %a, <vscale x 4 x i8> %vb, <vscale x 4 x i1> %allones, i32 %evl)
 257   %y = call <vscale x 4 x i8> @llvm.vp.add.nxv4i8(<vscale x 4 x i8> %x, <vscale x 4 x i8> %c, <vscale x 4 x i1> %allones, i32 %evl)
 258   %u = call <vscale x 4 x i8> @llvm.vp.merge.nxv4i8(<vscale x 4 x i1> %m, <vscale x 4 x i8> %y, <vscale x 4 x i8> %a, i32 %evl)
 259   ret <vscale x 4 x i8> %u
 260 }
 261
 262 define <vscale x 4 x i8> @vmadd_vx_nxv4i8_unmasked(<vscale x 4 x i8> %a, i8 %b, <vscale x 4 x i8> %c,  <vscale x 4 x i1> %m, i32 zeroext %evl) {
 263 ; CHECK-LABEL: vmadd_vx_nxv4i8_unmasked:
 264 ; CHECK:       # %bb.0:
 265 ; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, tu, ma
 266 ; CHECK-NEXT:    vmadd.vx v8, a0, v9
 267 ; CHECK-NEXT:    ret
 268   %elt.head = insertelement <vscale x 4 x i8> poison, i8 %b, i32 0
 269   %vb = shufflevector <vscale x 4 x i8> %elt.head, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer
 270   %splat = insertelement <vscale x 4 x i1> poison, i1 -1, i32 0
 271   %allones = shufflevector <vscale x 4 x i1> %splat, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
 272   %x = call <vscale x 4 x i8> @llvm.vp.mul.nxv4i8(<vscale x 4 x i8> %a, <vscale x 4 x i8> %vb, <vscale x 4 x i1> %allones, i32 %evl)
 273   %y = call <vscale x 4 x i8> @llvm.vp.add.nxv4i8(<vscale x 4 x i8> %x, <vscale x 4 x i8> %c, <vscale x 4 x i1> %allones, i32 %evl)
 274   %u = call <vscale x 4 x i8> @llvm.vp.merge.nxv4i8(<vscale x 4 x i1> %allones, <vscale x 4 x i8> %y, <vscale x 4 x i8> %a, i32 %evl)
 275   ret <vscale x 4 x i8> %u
 276 }
 277
 278 define <vscale x 4 x i8> @vmadd_vv_nxv4i8_ta(<vscale x 4 x i8> %a, <vscale x 4 x i8> %b, <vscale x 4 x i8> %c,  <vscale x 4 x i1> %m, i32 zeroext %evl) {
 279 ; CHECK-LABEL: vmadd_vv_nxv4i8_ta:
 280 ; CHECK:       # %bb.0:
 281 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
 282 ; CHECK-NEXT:    vmadd.vv v9, v8, v10
 283 ; CHECK-NEXT:    vmerge.vvm v8, v8, v9, v0
 284 ; CHECK-NEXT:    ret
 285   %splat = insertelement <vscale x 4 x i1> poison, i1 -1, i32 0
 286   %allones = shufflevector <vscale x 4 x i1> %splat, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
 287   %x = call <vscale x 4 x i8> @llvm.vp.mul.nxv4i8(<vscale x 4 x i8> %a, <vscale x 4 x i8> %b, <vscale x 4 x i1> %allones, i32 %evl)
 288   %y = call <vscale x 4 x i8> @llvm.vp.add.nxv4i8(<vscale x 4 x i8> %x, <vscale x 4 x i8> %c, <vscale x 4 x i1> %allones, i32 %evl)
 289   %u = call <vscale x 4 x i8> @llvm.vp.select.nxv4i8(<vscale x 4 x i1> %m, <vscale x 4 x i8> %y, <vscale x 4 x i8> %a, i32 %evl)
 290   ret <vscale x 4 x i8> %u
 291 }
 292
 293 define <vscale x 4 x i8> @vmadd_vx_nxv4i8_ta(<vscale x 4 x i8> %a, i8 %b, <vscale x 4 x i8> %c,  <vscale x 4 x i1> %m, i32 zeroext %evl) {
 294 ; CHECK-LABEL: vmadd_vx_nxv4i8_ta:
 295 ; CHECK:       # %bb.0:
 296 ; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, ta, ma
 297 ; CHECK-NEXT:    vmacc.vx v9, a0, v8
 298 ; CHECK-NEXT:    vmerge.vvm v8, v8, v9, v0
 299 ; CHECK-NEXT:    ret
 300   %elt.head = insertelement <vscale x 4 x i8> poison, i8 %b, i32 0
 301   %vb = shufflevector <vscale x 4 x i8> %elt.head, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer
 302   %splat = insertelement <vscale x 4 x i1> poison, i1 -1, i32 0
 303   %allones = shufflevector <vscale x 4 x i1> %splat, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
 304   %x = call <vscale x 4 x i8> @llvm.vp.mul.nxv4i8(<vscale x 4 x i8> %a, <vscale x 4 x i8> %vb, <vscale x 4 x i1> %allones, i32 %evl)
 305   %y = call <vscale x 4 x i8> @llvm.vp.add.nxv4i8(<vscale x 4 x i8> %x, <vscale x 4 x i8> %c, <vscale x 4 x i1> %allones, i32 %evl)
 306   %u = call <vscale x 4 x i8> @llvm.vp.select.nxv4i8(<vscale x 4 x i1> %m, <vscale x 4 x i8> %y, <vscale x 4 x i8> %a, i32 %evl)
 307   ret <vscale x 4 x i8> %u
 308 }
 309
 310 declare <vscale x 8 x i8> @llvm.vp.mul.nxv8i8(<vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i1>, i32)
 311 declare <vscale x 8 x i8> @llvm.vp.add.nxv8i8(<vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i1>, i32)
 312 declare <vscale x 8 x i8> @llvm.vp.merge.nxv8i8(<vscale x 8 x i1>, <vscale x 8 x i8>, <vscale x 8 x i8>, i32)
 313 declare <vscale x 8 x i8> @llvm.vp.select.nxv8i8(<vscale x 8 x i1>, <vscale x 8 x i8>, <vscale x 8 x i8>, i32)
 314
 315 define <vscale x 8 x i8> @vmadd_vv_nxv8i8(<vscale x 8 x i8> %a, <vscale x 8 x i8> %b, <vscale x 8 x i8> %c,  <vscale x 8 x i1> %m, i32 zeroext %evl) {
 316 ; CHECK-LABEL: vmadd_vv_nxv8i8:
 317 ; CHECK:       # %bb.0:
 318 ; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
 319 ; CHECK-NEXT:    vmadd.vv v9, v8, v10
 320 ; CHECK-NEXT:    vsetvli zero, zero, e8, m1, tu, ma
 321 ; CHECK-NEXT:    vmerge.vvm v8, v8, v9, v0
 322 ; CHECK-NEXT:    ret
 323   %splat = insertelement <vscale x 8 x i1> poison, i1 -1, i32 0
 324   %allones = shufflevector <vscale x 8 x i1> %splat, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
 325   %x = call <vscale x 8 x i8> @llvm.vp.mul.nxv8i8(<vscale x 8 x i8> %a, <vscale x 8 x i8> %b, <vscale x 8 x i1> %allones, i32 %evl)
 326   %y = call <vscale x 8 x i8> @llvm.vp.add.nxv8i8(<vscale x 8 x i8> %x, <vscale x 8 x i8> %c, <vscale x 8 x i1> %allones, i32 %evl)
 327   %u = call <vscale x 8 x i8> @llvm.vp.merge.nxv8i8(<vscale x 8 x i1> %m, <vscale x 8 x i8> %y, <vscale x 8 x i8> %a, i32 %evl)
 328   ret <vscale x 8 x i8> %u
 329 }
 330
 331 define <vscale x 8 x i8> @vmadd_vv_nxv8i8_unmasked(<vscale x 8 x i8> %a, <vscale x 8 x i8> %b, <vscale x 8 x i8> %c,  <vscale x 8 x i1> %m, i32 zeroext %evl) {
 332 ; CHECK-LABEL: vmadd_vv_nxv8i8_unmasked:
 333 ; CHECK:       # %bb.0:
 334 ; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
 335 ; CHECK-NEXT:    vmadd.vv v9, v8, v10
 336 ; CHECK-NEXT:    vsetvli zero, zero, e8, m1, tu, ma
 337 ; CHECK-NEXT:    vmv.v.v v8, v9
 338 ; CHECK-NEXT:    ret
 339   %splat = insertelement <vscale x 8 x i1> poison, i1 -1, i32 0
 340   %allones = shufflevector <vscale x 8 x i1> %splat, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
 341   %x = call <vscale x 8 x i8> @llvm.vp.mul.nxv8i8(<vscale x 8 x i8> %a, <vscale x 8 x i8> %b, <vscale x 8 x i1> %allones, i32 %evl)
 342   %y = call <vscale x 8 x i8> @llvm.vp.add.nxv8i8(<vscale x 8 x i8> %x, <vscale x 8 x i8> %c, <vscale x 8 x i1> %allones, i32 %evl)
 343   %u = call <vscale x 8 x i8> @llvm.vp.merge.nxv8i8(<vscale x 8 x i1> %allones, <vscale x 8 x i8> %y, <vscale x 8 x i8> %a, i32 %evl)
 344   ret <vscale x 8 x i8> %u
 345 }
 346
 347 define <vscale x 8 x i8> @vmadd_vx_nxv8i8(<vscale x 8 x i8> %a, i8 %b, <vscale x 8 x i8> %c,  <vscale x 8 x i1> %m, i32 zeroext %evl) {
 348 ; CHECK-LABEL: vmadd_vx_nxv8i8:
 349 ; CHECK:       # %bb.0:
 350 ; CHECK-NEXT:    vsetvli zero, a1, e8, m1, tu, mu
 351 ; CHECK-NEXT:    vmadd.vx v8, a0, v9, v0.t
 352 ; CHECK-NEXT:    ret
 353   %elt.head = insertelement <vscale x 8 x i8> poison, i8 %b, i32 0
 354   %vb = shufflevector <vscale x 8 x i8> %elt.head, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
 355   %splat = insertelement <vscale x 8 x i1> poison, i1 -1, i32 0
 356   %allones = shufflevector <vscale x 8 x i1> %splat, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
 357   %x = call <vscale x 8 x i8> @llvm.vp.mul.nxv8i8(<vscale x 8 x i8> %a, <vscale x 8 x i8> %vb, <vscale x 8 x i1> %allones, i32 %evl)
 358   %y = call <vscale x 8 x i8> @llvm.vp.add.nxv8i8(<vscale x 8 x i8> %x, <vscale x 8 x i8> %c, <vscale x 8 x i1> %allones, i32 %evl)
 359   %u = call <vscale x 8 x i8> @llvm.vp.merge.nxv8i8(<vscale x 8 x i1> %m, <vscale x 8 x i8> %y, <vscale x 8 x i8> %a, i32 %evl)
 360   ret <vscale x 8 x i8> %u
 361 }
 362
 363 define <vscale x 8 x i8> @vmadd_vx_nxv8i8_unmasked(<vscale x 8 x i8> %a, i8 %b, <vscale x 8 x i8> %c,  <vscale x 8 x i1> %m, i32 zeroext %evl) {
 364 ; CHECK-LABEL: vmadd_vx_nxv8i8_unmasked:
 365 ; CHECK:       # %bb.0:
 366 ; CHECK-NEXT:    vsetvli zero, a1, e8, m1, tu, ma
 367 ; CHECK-NEXT:    vmadd.vx v8, a0, v9
 368 ; CHECK-NEXT:    ret
 369   %elt.head = insertelement <vscale x 8 x i8> poison, i8 %b, i32 0
 370   %vb = shufflevector <vscale x 8 x i8> %elt.head, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
 371   %splat = insertelement <vscale x 8 x i1> poison, i1 -1, i32 0
 372   %allones = shufflevector <vscale x 8 x i1> %splat, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
 373   %x = call <vscale x 8 x i8> @llvm.vp.mul.nxv8i8(<vscale x 8 x i8> %a, <vscale x 8 x i8> %vb, <vscale x 8 x i1> %allones, i32 %evl)
 374   %y = call <vscale x 8 x i8> @llvm.vp.add.nxv8i8(<vscale x 8 x i8> %x, <vscale x 8 x i8> %c, <vscale x 8 x i1> %allones, i32 %evl)
 375   %u = call <vscale x 8 x i8> @llvm.vp.merge.nxv8i8(<vscale x 8 x i1> %allones, <vscale x 8 x i8> %y, <vscale x 8 x i8> %a, i32 %evl)
 376   ret <vscale x 8 x i8> %u
 377 }
 378
 379 define <vscale x 8 x i8> @vmadd_vv_nxv8i8_ta(<vscale x 8 x i8> %a, <vscale x 8 x i8> %b, <vscale x 8 x i8> %c,  <vscale x 8 x i1> %m, i32 zeroext %evl) {
 380 ; CHECK-LABEL: vmadd_vv_nxv8i8_ta:
 381 ; CHECK:       # %bb.0:
 382 ; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
 383 ; CHECK-NEXT:    vmadd.vv v9, v8, v10
 384 ; CHECK-NEXT:    vmerge.vvm v8, v8, v9, v0
 385 ; CHECK-NEXT:    ret
 386   %splat = insertelement <vscale x 8 x i1> poison, i1 -1, i32 0
 387   %allones = shufflevector <vscale x 8 x i1> %splat, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
 388   %x = call <vscale x 8 x i8> @llvm.vp.mul.nxv8i8(<vscale x 8 x i8> %a, <vscale x 8 x i8> %b, <vscale x 8 x i1> %allones, i32 %evl)
 389   %y = call <vscale x 8 x i8> @llvm.vp.add.nxv8i8(<vscale x 8 x i8> %x, <vscale x 8 x i8> %c, <vscale x 8 x i1> %allones, i32 %evl)
 390   %u = call <vscale x 8 x i8> @llvm.vp.select.nxv8i8(<vscale x 8 x i1> %m, <vscale x 8 x i8> %y, <vscale x 8 x i8> %a, i32 %evl)
 391   ret <vscale x 8 x i8> %u
 392 }
 393
 394 define <vscale x 8 x i8> @vmadd_vx_nxv8i8_ta(<vscale x 8 x i8> %a, i8 %b, <vscale x 8 x i8> %c,  <vscale x 8 x i1> %m, i32 zeroext %evl) {
 395 ; CHECK-LABEL: vmadd_vx_nxv8i8_ta:
 396 ; CHECK:       # %bb.0:
 397 ; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
 398 ; CHECK-NEXT:    vmacc.vx v9, a0, v8
 399 ; CHECK-NEXT:    vmerge.vvm v8, v8, v9, v0
 400 ; CHECK-NEXT:    ret
 401   %elt.head = insertelement <vscale x 8 x i8> poison, i8 %b, i32 0
 402   %vb = shufflevector <vscale x 8 x i8> %elt.head, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
 403   %splat = insertelement <vscale x 8 x i1> poison, i1 -1, i32 0
 404   %allones = shufflevector <vscale x 8 x i1> %splat, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
 405   %x = call <vscale x 8 x i8> @llvm.vp.mul.nxv8i8(<vscale x 8 x i8> %a, <vscale x 8 x i8> %vb, <vscale x 8 x i1> %allones, i32 %evl)
 406   %y = call <vscale x 8 x i8> @llvm.vp.add.nxv8i8(<vscale x 8 x i8> %x, <vscale x 8 x i8> %c, <vscale x 8 x i1> %allones, i32 %evl)
 407   %u = call <vscale x 8 x i8> @llvm.vp.select.nxv8i8(<vscale x 8 x i1> %m, <vscale x 8 x i8> %y, <vscale x 8 x i8> %a, i32 %evl)
 408   ret <vscale x 8 x i8> %u
 409 }
 410
 411 declare <vscale x 16 x i8> @llvm.vp.mul.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i1>, i32)
 412 declare <vscale x 16 x i8> @llvm.vp.add.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i1>, i32)
 413 declare <vscale x 16 x i8> @llvm.vp.merge.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>, i32)
 414 declare <vscale x 16 x i8> @llvm.vp.select.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>, i32)
 415
 416 define <vscale x 16 x i8> @vmadd_vv_nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c,  <vscale x 16 x i1> %m, i32 zeroext %evl) {
 417 ; CHECK-LABEL: vmadd_vv_nxv16i8:
 418 ; CHECK:       # %bb.0:
 419 ; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
 420 ; CHECK-NEXT:    vmadd.vv v10, v8, v12
 421 ; CHECK-NEXT:    vsetvli zero, zero, e8, m2, tu, ma
 422 ; CHECK-NEXT:    vmerge.vvm v8, v8, v10, v0
 423 ; CHECK-NEXT:    ret
 424   %splat = insertelement <vscale x 16 x i1> poison, i1 -1, i32 0
 425   %allones = shufflevector <vscale x 16 x i1> %splat, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
 426   %x = call <vscale x 16 x i8> @llvm.vp.mul.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i1> %allones, i32 %evl)
 427   %y = call <vscale x 16 x i8> @llvm.vp.add.nxv16i8(<vscale x 16 x i8> %x, <vscale x 16 x i8> %c, <vscale x 16 x i1> %allones, i32 %evl)
 428   %u = call <vscale x 16 x i8> @llvm.vp.merge.nxv16i8(<vscale x 16 x i1> %m, <vscale x 16 x i8> %y, <vscale x 16 x i8> %a, i32 %evl)
 429   ret <vscale x 16 x i8> %u
 430 }
 431
 432 define <vscale x 16 x i8> @vmadd_vv_nxv16i8_unmasked(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c,  <vscale x 16 x i1> %m, i32 zeroext %evl) {
 433 ; CHECK-LABEL: vmadd_vv_nxv16i8_unmasked:
 434 ; CHECK:       # %bb.0:
 435 ; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
 436 ; CHECK-NEXT:    vmadd.vv v10, v8, v12
 437 ; CHECK-NEXT:    vsetvli zero, zero, e8, m2, tu, ma
 438 ; CHECK-NEXT:    vmv.v.v v8, v10
 439 ; CHECK-NEXT:    ret
 440   %splat = insertelement <vscale x 16 x i1> poison, i1 -1, i32 0
 441   %allones = shufflevector <vscale x 16 x i1> %splat, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
 442   %x = call <vscale x 16 x i8> @llvm.vp.mul.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i1> %allones, i32 %evl)
 443   %y = call <vscale x 16 x i8> @llvm.vp.add.nxv16i8(<vscale x 16 x i8> %x, <vscale x 16 x i8> %c, <vscale x 16 x i1> %allones, i32 %evl)
 444   %u = call <vscale x 16 x i8> @llvm.vp.merge.nxv16i8(<vscale x 16 x i1> %allones, <vscale x 16 x i8> %y, <vscale x 16 x i8> %a, i32 %evl)
 445   ret <vscale x 16 x i8> %u
 446 }
 447
 448 define <vscale x 16 x i8> @vmadd_vx_nxv16i8(<vscale x 16 x i8> %a, i8 %b, <vscale x 16 x i8> %c,  <vscale x 16 x i1> %m, i32 zeroext %evl) {
 449 ; CHECK-LABEL: vmadd_vx_nxv16i8:
 450 ; CHECK:       # %bb.0:
 451 ; CHECK-NEXT:    vsetvli zero, a1, e8, m2, tu, mu
 452 ; CHECK-NEXT:    vmadd.vx v8, a0, v10, v0.t
 453 ; CHECK-NEXT:    ret
 454   %elt.head = insertelement <vscale x 16 x i8> poison, i8 %b, i32 0
 455   %vb = shufflevector <vscale x 16 x i8> %elt.head, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
 456   %splat = insertelement <vscale x 16 x i1> poison, i1 -1, i32 0
 457   %allones = shufflevector <vscale x 16 x i1> %splat, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
 458   %x = call <vscale x 16 x i8> @llvm.vp.mul.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %vb, <vscale x 16 x i1> %allones, i32 %evl)
 459   %y = call <vscale x 16 x i8> @llvm.vp.add.nxv16i8(<vscale x 16 x i8> %x, <vscale x 16 x i8> %c, <vscale x 16 x i1> %allones, i32 %evl)
 460   %u = call <vscale x 16 x i8> @llvm.vp.merge.nxv16i8(<vscale x 16 x i1> %m, <vscale x 16 x i8> %y, <vscale x 16 x i8> %a, i32 %evl)
 461   ret <vscale x 16 x i8> %u
 462 }
 463
 464 define <vscale x 16 x i8> @vmadd_vx_nxv16i8_unmasked(<vscale x 16 x i8> %a, i8 %b, <vscale x 16 x i8> %c,  <vscale x 16 x i1> %m, i32 zeroext %evl) {
 465 ; CHECK-LABEL: vmadd_vx_nxv16i8_unmasked:
 466 ; CHECK:       # %bb.0:
 467 ; CHECK-NEXT:    vsetvli zero, a1, e8, m2, tu, ma
 468 ; CHECK-NEXT:    vmadd.vx v8, a0, v10
 469 ; CHECK-NEXT:    ret
 470   %elt.head = insertelement <vscale x 16 x i8> poison, i8 %b, i32 0
 471   %vb = shufflevector <vscale x 16 x i8> %elt.head, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
 472   %splat = insertelement <vscale x 16 x i1> poison, i1 -1, i32 0
 473   %allones = shufflevector <vscale x 16 x i1> %splat, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
 474   %x = call <vscale x 16 x i8> @llvm.vp.mul.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %vb, <vscale x 16 x i1> %allones, i32 %evl)
 475   %y = call <vscale x 16 x i8> @llvm.vp.add.nxv16i8(<vscale x 16 x i8> %x, <vscale x 16 x i8> %c, <vscale x 16 x i1> %allones, i32 %evl)
 476   %u = call <vscale x 16 x i8> @llvm.vp.merge.nxv16i8(<vscale x 16 x i1> %allones, <vscale x 16 x i8> %y, <vscale x 16 x i8> %a, i32 %evl)
 477   ret <vscale x 16 x i8> %u
 478 }
 479
 480 define <vscale x 16 x i8> @vmadd_vv_nxv16i8_ta(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c,  <vscale x 16 x i1> %m, i32 zeroext %evl) {
 481 ; CHECK-LABEL: vmadd_vv_nxv16i8_ta:
 482 ; CHECK:       # %bb.0:
 483 ; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
 484 ; CHECK-NEXT:    vmadd.vv v10, v8, v12
 485 ; CHECK-NEXT:    vmerge.vvm v8, v8, v10, v0
 486 ; CHECK-NEXT:    ret
 487   %splat = insertelement <vscale x 16 x i1> poison, i1 -1, i32 0
 488   %allones = shufflevector <vscale x 16 x i1> %splat, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
 489   %x = call <vscale x 16 x i8> @llvm.vp.mul.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i1> %allones, i32 %evl)
 490   %y = call <vscale x 16 x i8> @llvm.vp.add.nxv16i8(<vscale x 16 x i8> %x, <vscale x 16 x i8> %c, <vscale x 16 x i1> %allones, i32 %evl)
 491   %u = call <vscale x 16 x i8> @llvm.vp.select.nxv16i8(<vscale x 16 x i1> %m, <vscale x 16 x i8> %y, <vscale x 16 x i8> %a, i32 %evl)
 492   ret <vscale x 16 x i8> %u
 493 }
 494
 495 define <vscale x 16 x i8> @vmadd_vx_nxv16i8_ta(<vscale x 16 x i8> %a, i8 %b, <vscale x 16 x i8> %c,  <vscale x 16 x i1> %m, i32 zeroext %evl) {
 496 ; CHECK-LABEL: vmadd_vx_nxv16i8_ta:
 497 ; CHECK:       # %bb.0:
 498 ; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
 499 ; CHECK-NEXT:    vmacc.vx v10, a0, v8
 500 ; CHECK-NEXT:    vmerge.vvm v8, v8, v10, v0
 501 ; CHECK-NEXT:    ret
 502   %elt.head = insertelement <vscale x 16 x i8> poison, i8 %b, i32 0
 503   %vb = shufflevector <vscale x 16 x i8> %elt.head, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
 504   %splat = insertelement <vscale x 16 x i1> poison, i1 -1, i32 0
 505   %allones = shufflevector <vscale x 16 x i1> %splat, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
 506   %x = call <vscale x 16 x i8> @llvm.vp.mul.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %vb, <vscale x 16 x i1> %allones, i32 %evl)
 507   %y = call <vscale x 16 x i8> @llvm.vp.add.nxv16i8(<vscale x 16 x i8> %x, <vscale x 16 x i8> %c, <vscale x 16 x i1> %allones, i32 %evl)
 508   %u = call <vscale x 16 x i8> @llvm.vp.select.nxv16i8(<vscale x 16 x i1> %m, <vscale x 16 x i8> %y, <vscale x 16 x i8> %a, i32 %evl)
 509   ret <vscale x 16 x i8> %u
 510 }
 511
 512 declare <vscale x 32 x i8> @llvm.vp.mul.nxv32i8(<vscale x 32 x i8>, <vscale x 32 x i8>, <vscale x 32 x i1>, i32)
 513 declare <vscale x 32 x i8> @llvm.vp.add.nxv32i8(<vscale x 32 x i8>, <vscale x 32 x i8>, <vscale x 32 x i1>, i32)
 514 declare <vscale x 32 x i8> @llvm.vp.merge.nxv32i8(<vscale x 32 x i1>, <vscale x 32 x i8>, <vscale x 32 x i8>, i32)
 515 declare <vscale x 32 x i8> @llvm.vp.select.nxv32i8(<vscale x 32 x i1>, <vscale x 32 x i8>, <vscale x 32 x i8>, i32)
 516
 517 define <vscale x 32 x i8> @vmadd_vv_nxv32i8(<vscale x 32 x i8> %a, <vscale x 32 x i8> %b, <vscale x 32 x i8> %c,  <vscale x 32 x i1> %m, i32 zeroext %evl) {
 518 ; CHECK-LABEL: vmadd_vv_nxv32i8:
 519 ; CHECK:       # %bb.0:
 520 ; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
 521 ; CHECK-NEXT:    vmadd.vv v12, v8, v16
 522 ; CHECK-NEXT:    vsetvli zero, zero, e8, m4, tu, ma
 523 ; CHECK-NEXT:    vmerge.vvm v8, v8, v12, v0
 524 ; CHECK-NEXT:    ret
 525   %splat = insertelement <vscale x 32 x i1> poison, i1 -1, i32 0
 526   %allones = shufflevector <vscale x 32 x i1> %splat, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
 527   %x = call <vscale x 32 x i8> @llvm.vp.mul.nxv32i8(<vscale x 32 x i8> %a, <vscale x 32 x i8> %b, <vscale x 32 x i1> %allones, i32 %evl)
 528   %y = call <vscale x 32 x i8> @llvm.vp.add.nxv32i8(<vscale x 32 x i8> %x, <vscale x 32 x i8> %c, <vscale x 32 x i1> %allones, i32 %evl)
 529   %u = call <vscale x 32 x i8> @llvm.vp.merge.nxv32i8(<vscale x 32 x i1> %m, <vscale x 32 x i8> %y, <vscale x 32 x i8> %a, i32 %evl)
 530   ret <vscale x 32 x i8> %u
 531 }
 532
 533 define <vscale x 32 x i8> @vmadd_vv_nxv32i8_unmasked(<vscale x 32 x i8> %a, <vscale x 32 x i8> %b, <vscale x 32 x i8> %c,  <vscale x 32 x i1> %m, i32 zeroext %evl) {
 534 ; CHECK-LABEL: vmadd_vv_nxv32i8_unmasked:
 535 ; CHECK:       # %bb.0:
 536 ; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
 537 ; CHECK-NEXT:    vmadd.vv v12, v8, v16
 538 ; CHECK-NEXT:    vsetvli zero, zero, e8, m4, tu, ma
 539 ; CHECK-NEXT:    vmv.v.v v8, v12
 540 ; CHECK-NEXT:    ret
 541   %splat = insertelement <vscale x 32 x i1> poison, i1 -1, i32 0
 542   %allones = shufflevector <vscale x 32 x i1> %splat, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
 543   %x = call <vscale x 32 x i8> @llvm.vp.mul.nxv32i8(<vscale x 32 x i8> %a, <vscale x 32 x i8> %b, <vscale x 32 x i1> %allones, i32 %evl)
 544   %y = call <vscale x 32 x i8> @llvm.vp.add.nxv32i8(<vscale x 32 x i8> %x, <vscale x 32 x i8> %c, <vscale x 32 x i1> %allones, i32 %evl)
 545   %u = call <vscale x 32 x i8> @llvm.vp.merge.nxv32i8(<vscale x 32 x i1> %allones, <vscale x 32 x i8> %y, <vscale x 32 x i8> %a, i32 %evl)
 546   ret <vscale x 32 x i8> %u
 547 }
 548
 549 define <vscale x 32 x i8> @vmadd_vx_nxv32i8(<vscale x 32 x i8> %a, i8 %b, <vscale x 32 x i8> %c,  <vscale x 32 x i1> %m, i32 zeroext %evl) {
 550 ; CHECK-LABEL: vmadd_vx_nxv32i8:
 551 ; CHECK:       # %bb.0:
 552 ; CHECK-NEXT:    vsetvli zero, a1, e8, m4, tu, mu
 553 ; CHECK-NEXT:    vmadd.vx v8, a0, v12, v0.t
 554 ; CHECK-NEXT:    ret
 555   %elt.head = insertelement <vscale x 32 x i8> poison, i8 %b, i32 0
 556   %vb = shufflevector <vscale x 32 x i8> %elt.head, <vscale x 32 x i8> poison, <vscale x 32 x i32> zeroinitializer
 557   %splat = insertelement <vscale x 32 x i1> poison, i1 -1, i32 0
 558   %allones = shufflevector <vscale x 32 x i1> %splat, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
 559   %x = call <vscale x 32 x i8> @llvm.vp.mul.nxv32i8(<vscale x 32 x i8> %a, <vscale x 32 x i8> %vb, <vscale x 32 x i1> %allones, i32 %evl)
 560   %y = call <vscale x 32 x i8> @llvm.vp.add.nxv32i8(<vscale x 32 x i8> %x, <vscale x 32 x i8> %c, <vscale x 32 x i1> %allones, i32 %evl)
 561   %u = call <vscale x 32 x i8> @llvm.vp.merge.nxv32i8(<vscale x 32 x i1> %m, <vscale x 32 x i8> %y, <vscale x 32 x i8> %a, i32 %evl)
 562   ret <vscale x 32 x i8> %u
 563 }
 564
 565 define <vscale x 32 x i8> @vmadd_vx_nxv32i8_unmasked(<vscale x 32 x i8> %a, i8 %b, <vscale x 32 x i8> %c,  <vscale x 32 x i1> %m, i32 zeroext %evl) {
 566 ; CHECK-LABEL: vmadd_vx_nxv32i8_unmasked:
 567 ; CHECK:       # %bb.0:
 568 ; CHECK-NEXT:    vsetvli zero, a1, e8, m4, tu, ma
 569 ; CHECK-NEXT:    vmadd.vx v8, a0, v12
 570 ; CHECK-NEXT:    ret
 571   %elt.head = insertelement <vscale x 32 x i8> poison, i8 %b, i32 0
 572   %vb = shufflevector <vscale x 32 x i8> %elt.head, <vscale x 32 x i8> poison, <vscale x 32 x i32> zeroinitializer
 573   %splat = insertelement <vscale x 32 x i1> poison, i1 -1, i32 0
 574   %allones = shufflevector <vscale x 32 x i1> %splat, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
 575   %x = call <vscale x 32 x i8> @llvm.vp.mul.nxv32i8(<vscale x 32 x i8> %a, <vscale x 32 x i8> %vb, <vscale x 32 x i1> %allones, i32 %evl)
 576   %y = call <vscale x 32 x i8> @llvm.vp.add.nxv32i8(<vscale x 32 x i8> %x, <vscale x 32 x i8> %c, <vscale x 32 x i1> %allones, i32 %evl)
 577   %u = call <vscale x 32 x i8> @llvm.vp.merge.nxv32i8(<vscale x 32 x i1> %allones, <vscale x 32 x i8> %y, <vscale x 32 x i8> %a, i32 %evl)
 578   ret <vscale x 32 x i8> %u
 579 }
 580
 581 define <vscale x 32 x i8> @vmadd_vv_nxv32i8_ta(<vscale x 32 x i8> %a, <vscale x 32 x i8> %b, <vscale x 32 x i8> %c,  <vscale x 32 x i1> %m, i32 zeroext %evl) {
 582 ; CHECK-LABEL: vmadd_vv_nxv32i8_ta:
 583 ; CHECK:       # %bb.0:
 584 ; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
 585 ; CHECK-NEXT:    vmadd.vv v12, v8, v16
 586 ; CHECK-NEXT:    vmerge.vvm v8, v8, v12, v0
 587 ; CHECK-NEXT:    ret
 588   %splat = insertelement <vscale x 32 x i1> poison, i1 -1, i32 0
 589   %allones = shufflevector <vscale x 32 x i1> %splat, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
 590   %x = call <vscale x 32 x i8> @llvm.vp.mul.nxv32i8(<vscale x 32 x i8> %a, <vscale x 32 x i8> %b, <vscale x 32 x i1> %allones, i32 %evl)
 591   %y = call <vscale x 32 x i8> @llvm.vp.add.nxv32i8(<vscale x 32 x i8> %x, <vscale x 32 x i8> %c, <vscale x 32 x i1> %allones, i32 %evl)
 592   %u = call <vscale x 32 x i8> @llvm.vp.select.nxv32i8(<vscale x 32 x i1> %m, <vscale x 32 x i8> %y, <vscale x 32 x i8> %a, i32 %evl)
 593   ret <vscale x 32 x i8> %u
 594 }
 595
 596 define <vscale x 32 x i8> @vmadd_vx_nxv32i8_ta(<vscale x 32 x i8> %a, i8 %b, <vscale x 32 x i8> %c,  <vscale x 32 x i1> %m, i32 zeroext %evl) {
 597 ; CHECK-LABEL: vmadd_vx_nxv32i8_ta:
 598 ; CHECK:       # %bb.0:
 599 ; CHECK-NEXT:    vsetvli zero, a1, e8, m4, ta, ma
 600 ; CHECK-NEXT:    vmacc.vx v12, a0, v8
 601 ; CHECK-NEXT:    vmerge.vvm v8, v8, v12, v0
 602 ; CHECK-NEXT:    ret
 603   %elt.head = insertelement <vscale x 32 x i8> poison, i8 %b, i32 0
 604   %vb = shufflevector <vscale x 32 x i8> %elt.head, <vscale x 32 x i8> poison, <vscale x 32 x i32> zeroinitializer
 605   %splat = insertelement <vscale x 32 x i1> poison, i1 -1, i32 0
 606   %allones = shufflevector <vscale x 32 x i1> %splat, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
 607   %x = call <vscale x 32 x i8> @llvm.vp.mul.nxv32i8(<vscale x 32 x i8> %a, <vscale x 32 x i8> %vb, <vscale x 32 x i1> %allones, i32 %evl)
 608   %y = call <vscale x 32 x i8> @llvm.vp.add.nxv32i8(<vscale x 32 x i8> %x, <vscale x 32 x i8> %c, <vscale x 32 x i1> %allones, i32 %evl)
 609   %u = call <vscale x 32 x i8> @llvm.vp.select.nxv32i8(<vscale x 32 x i1> %m, <vscale x 32 x i8> %y, <vscale x 32 x i8> %a, i32 %evl)
 610   ret <vscale x 32 x i8> %u
 611 }
 612
 613 declare <vscale x 64 x i8> @llvm.vp.mul.nxv64i8(<vscale x 64 x i8>, <vscale x 64 x i8>, <vscale x 64 x i1>, i32)
 614 declare <vscale x 64 x i8> @llvm.vp.add.nxv64i8(<vscale x 64 x i8>, <vscale x 64 x i8>, <vscale x 64 x i1>, i32)
 615 declare <vscale x 64 x i8> @llvm.vp.merge.nxv64i8(<vscale x 64 x i1>, <vscale x 64 x i8>, <vscale x 64 x i8>, i32)
 616 declare <vscale x 64 x i8> @llvm.vp.select.nxv64i8(<vscale x 64 x i1>, <vscale x 64 x i8>, <vscale x 64 x i8>, i32)
 617
 618 define <vscale x 64 x i8> @vmadd_vv_nxv64i8(<vscale x 64 x i8> %a, <vscale x 64 x i8> %b, <vscale x 64 x i8> %c,  <vscale x 64 x i1> %m, i32 zeroext %evl) {
 619 ; CHECK-LABEL: vmadd_vv_nxv64i8:
 620 ; CHECK:       # %bb.0:
 621 ; CHECK-NEXT:    vl8r.v v24, (a0)
 622 ; CHECK-NEXT:    vsetvli zero, a1, e8, m8, ta, ma
 623 ; CHECK-NEXT:    vmacc.vv v24, v8, v16
 624 ; CHECK-NEXT:    vsetvli zero, zero, e8, m8, tu, ma
 625 ; CHECK-NEXT:    vmerge.vvm v8, v8, v24, v0
 626 ; CHECK-NEXT:    ret
 627   %splat = insertelement <vscale x 64 x i1> poison, i1 -1, i32 0
 628   %allones = shufflevector <vscale x 64 x i1> %splat, <vscale x 64 x i1> poison, <vscale x 64 x i32> zeroinitializer
 629   %x = call <vscale x 64 x i8> @llvm.vp.mul.nxv64i8(<vscale x 64 x i8> %a, <vscale x 64 x i8> %b, <vscale x 64 x i1> %allones, i32 %evl)
 630   %y = call <vscale x 64 x i8> @llvm.vp.add.nxv64i8(<vscale x 64 x i8> %x, <vscale x 64 x i8> %c, <vscale x 64 x i1> %allones, i32 %evl)
 631   %u = call <vscale x 64 x i8> @llvm.vp.merge.nxv64i8(<vscale x 64 x i1> %m, <vscale x 64 x i8> %y, <vscale x 64 x i8> %a, i32 %evl)
 632   ret <vscale x 64 x i8> %u
 633 }
 634
 635 define <vscale x 64 x i8> @vmadd_vv_nxv64i8_unmasked(<vscale x 64 x i8> %a, <vscale x 64 x i8> %b, <vscale x 64 x i8> %c,  <vscale x 64 x i1> %m, i32 zeroext %evl) {
 636 ; CHECK-LABEL: vmadd_vv_nxv64i8_unmasked:
 637 ; CHECK:       # %bb.0:
 638 ; CHECK-NEXT:    vl8r.v v24, (a0)
 639 ; CHECK-NEXT:    vsetvli zero, a1, e8, m8, ta, ma
 640 ; CHECK-NEXT:    vmacc.vv v24, v8, v16
 641 ; CHECK-NEXT:    vsetvli zero, zero, e8, m8, tu, ma
 642 ; CHECK-NEXT:    vmv.v.v v8, v24
 643 ; CHECK-NEXT:    ret
 644   %splat = insertelement <vscale x 64 x i1> poison, i1 -1, i32 0
 645   %allones = shufflevector <vscale x 64 x i1> %splat, <vscale x 64 x i1> poison, <vscale x 64 x i32> zeroinitializer
 646   %x = call <vscale x 64 x i8> @llvm.vp.mul.nxv64i8(<vscale x 64 x i8> %a, <vscale x 64 x i8> %b, <vscale x 64 x i1> %allones, i32 %evl)
 647   %y = call <vscale x 64 x i8> @llvm.vp.add.nxv64i8(<vscale x 64 x i8> %x, <vscale x 64 x i8> %c, <vscale x 64 x i1> %allones, i32 %evl)
 648   %u = call <vscale x 64 x i8> @llvm.vp.merge.nxv64i8(<vscale x 64 x i1> %allones, <vscale x 64 x i8> %y, <vscale x 64 x i8> %a, i32 %evl)
 649   ret <vscale x 64 x i8> %u
 650 }
 651
 652 define <vscale x 64 x i8> @vmadd_vx_nxv64i8(<vscale x 64 x i8> %a, i8 %b, <vscale x 64 x i8> %c,  <vscale x 64 x i1> %m, i32 zeroext %evl) {
 653 ; CHECK-LABEL: vmadd_vx_nxv64i8:
 654 ; CHECK:       # %bb.0:
 655 ; CHECK-NEXT:    vsetvli zero, a1, e8, m8, tu, mu
 656 ; CHECK-NEXT:    vmadd.vx v8, a0, v16, v0.t
 657 ; CHECK-NEXT:    ret
 658   %elt.head = insertelement <vscale x 64 x i8> poison, i8 %b, i32 0
 659   %vb = shufflevector <vscale x 64 x i8> %elt.head, <vscale x 64 x i8> poison, <vscale x 64 x i32> zeroinitializer
 660   %splat = insertelement <vscale x 64 x i1> poison, i1 -1, i32 0
 661   %allones = shufflevector <vscale x 64 x i1> %splat, <vscale x 64 x i1> poison, <vscale x 64 x i32> zeroinitializer
 662   %x = call <vscale x 64 x i8> @llvm.vp.mul.nxv64i8(<vscale x 64 x i8> %a, <vscale x 64 x i8> %vb, <vscale x 64 x i1> %allones, i32 %evl)
 663   %y = call <vscale x 64 x i8> @llvm.vp.add.nxv64i8(<vscale x 64 x i8> %x, <vscale x 64 x i8> %c, <vscale x 64 x i1> %allones, i32 %evl)
 664   %u = call <vscale x 64 x i8> @llvm.vp.merge.nxv64i8(<vscale x 64 x i1> %m, <vscale x 64 x i8> %y, <vscale x 64 x i8> %a, i32 %evl)
 665   ret <vscale x 64 x i8> %u
 666 }
 667
 668 define <vscale x 64 x i8> @vmadd_vx_nxv64i8_unmasked(<vscale x 64 x i8> %a, i8 %b, <vscale x 64 x i8> %c,  <vscale x 64 x i1> %m, i32 zeroext %evl) {
 669 ; CHECK-LABEL: vmadd_vx_nxv64i8_unmasked:
 670 ; CHECK:       # %bb.0:
 671 ; CHECK-NEXT:    vsetvli zero, a1, e8, m8, tu, ma
 672 ; CHECK-NEXT:    vmadd.vx v8, a0, v16
 673 ; CHECK-NEXT:    ret
 674   %elt.head = insertelement <vscale x 64 x i8> poison, i8 %b, i32 0
 675   %vb = shufflevector <vscale x 64 x i8> %elt.head, <vscale x 64 x i8> poison, <vscale x 64 x i32> zeroinitializer
 676   %splat = insertelement <vscale x 64 x i1> poison, i1 -1, i32 0
 677   %allones = shufflevector <vscale x 64 x i1> %splat, <vscale x 64 x i1> poison, <vscale x 64 x i32> zeroinitializer
 678   %x = call <vscale x 64 x i8> @llvm.vp.mul.nxv64i8(<vscale x 64 x i8> %a, <vscale x 64 x i8> %vb, <vscale x 64 x i1> %allones, i32 %evl)
 679   %y = call <vscale x 64 x i8> @llvm.vp.add.nxv64i8(<vscale x 64 x i8> %x, <vscale x 64 x i8> %c, <vscale x 64 x i1> %allones, i32 %evl)
 680   %u = call <vscale x 64 x i8> @llvm.vp.merge.nxv64i8(<vscale x 64 x i1> %allones, <vscale x 64 x i8> %y, <vscale x 64 x i8> %a, i32 %evl)
 681   ret <vscale x 64 x i8> %u
 682 }
 683
 684 define <vscale x 64 x i8> @vmadd_vv_nxv64i8_ta(<vscale x 64 x i8> %a, <vscale x 64 x i8> %b, <vscale x 64 x i8> %c,  <vscale x 64 x i1> %m, i32 zeroext %evl) {
 685 ; CHECK-LABEL: vmadd_vv_nxv64i8_ta:
 686 ; CHECK:       # %bb.0:
 687 ; CHECK-NEXT:    vl8r.v v24, (a0)
 688 ; CHECK-NEXT:    vsetvli zero, a1, e8, m8, ta, ma
 689 ; CHECK-NEXT:    vmacc.vv v24, v8, v16
 690 ; CHECK-NEXT:    vmerge.vvm v8, v8, v24, v0
 691 ; CHECK-NEXT:    ret
 692   %splat = insertelement <vscale x 64 x i1> poison, i1 -1, i32 0
 693   %allones = shufflevector <vscale x 64 x i1> %splat, <vscale x 64 x i1> poison, <vscale x 64 x i32> zeroinitializer
 694   %x = call <vscale x 64 x i8> @llvm.vp.mul.nxv64i8(<vscale x 64 x i8> %a, <vscale x 64 x i8> %b, <vscale x 64 x i1> %allones, i32 %evl)
 695   %y = call <vscale x 64 x i8> @llvm.vp.add.nxv64i8(<vscale x 64 x i8> %x, <vscale x 64 x i8> %c, <vscale x 64 x i1> %allones, i32 %evl)
 696   %u = call <vscale x 64 x i8> @llvm.vp.select.nxv64i8(<vscale x 64 x i1> %m, <vscale x 64 x i8> %y, <vscale x 64 x i8> %a, i32 %evl)
 697   ret <vscale x 64 x i8> %u
 698 }
 699
 700 define <vscale x 64 x i8> @vmadd_vx_nxv64i8_ta(<vscale x 64 x i8> %a, i8 %b, <vscale x 64 x i8> %c,  <vscale x 64 x i1> %m, i32 zeroext %evl) {
 701 ; CHECK-LABEL: vmadd_vx_nxv64i8_ta:
 702 ; CHECK:       # %bb.0:
 703 ; CHECK-NEXT:    vsetvli zero, a1, e8, m8, ta, ma
 704 ; CHECK-NEXT:    vmacc.vx v16, a0, v8
 705 ; CHECK-NEXT:    vmerge.vvm v8, v8, v16, v0
 706 ; CHECK-NEXT:    ret
 707   %elt.head = insertelement <vscale x 64 x i8> poison, i8 %b, i32 0
 708   %vb = shufflevector <vscale x 64 x i8> %elt.head, <vscale x 64 x i8> poison, <vscale x 64 x i32> zeroinitializer
 709   %splat = insertelement <vscale x 64 x i1> poison, i1 -1, i32 0
 710   %allones = shufflevector <vscale x 64 x i1> %splat, <vscale x 64 x i1> poison, <vscale x 64 x i32> zeroinitializer
 711   %x = call <vscale x 64 x i8> @llvm.vp.mul.nxv64i8(<vscale x 64 x i8> %a, <vscale x 64 x i8> %vb, <vscale x 64 x i1> %allones, i32 %evl)
 712   %y = call <vscale x 64 x i8> @llvm.vp.add.nxv64i8(<vscale x 64 x i8> %x, <vscale x 64 x i8> %c, <vscale x 64 x i1> %allones, i32 %evl)
 713   %u = call <vscale x 64 x i8> @llvm.vp.select.nxv64i8(<vscale x 64 x i1> %m, <vscale x 64 x i8> %y, <vscale x 64 x i8> %a, i32 %evl)
 714   ret <vscale x 64 x i8> %u
 715 }
 716
 717 declare <vscale x 1 x i16> @llvm.vp.mul.nxv1i16(<vscale x 1 x i16>, <vscale x 1 x i16>, <vscale x 1 x i1>, i32)
 718 declare <vscale x 1 x i16> @llvm.vp.add.nxv1i16(<vscale x 1 x i16>, <vscale x 1 x i16>, <vscale x 1 x i1>, i32)
 719 declare <vscale x 1 x i16> @llvm.vp.merge.nxv1i16(<vscale x 1 x i1>, <vscale x 1 x i16>, <vscale x 1 x i16>, i32)
 720 declare <vscale x 1 x i16> @llvm.vp.select.nxv1i16(<vscale x 1 x i1>, <vscale x 1 x i16>, <vscale x 1 x i16>, i32)
 721
 722 define <vscale x 1 x i16> @vmadd_vv_nxv1i16(<vscale x 1 x i16> %a, <vscale x 1 x i16> %b, <vscale x 1 x i16> %c,  <vscale x 1 x i1> %m, i32 zeroext %evl) {
 723 ; CHECK-LABEL: vmadd_vv_nxv1i16:
 724 ; CHECK:       # %bb.0:
 725 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 726 ; CHECK-NEXT:    vmadd.vv v9, v8, v10
 727 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, tu, ma
 728 ; CHECK-NEXT:    vmerge.vvm v8, v8, v9, v0
 729 ; CHECK-NEXT:    ret
 730   %splat = insertelement <vscale x 1 x i1> poison, i1 -1, i32 0
 731   %allones = shufflevector <vscale x 1 x i1> %splat, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
 732   %x = call <vscale x 1 x i16> @llvm.vp.mul.nxv1i16(<vscale x 1 x i16> %a, <vscale x 1 x i16> %b, <vscale x 1 x i1> %allones, i32 %evl)
 733   %y = call <vscale x 1 x i16> @llvm.vp.add.nxv1i16(<vscale x 1 x i16> %x, <vscale x 1 x i16> %c, <vscale x 1 x i1> %allones, i32 %evl)
 734   %u = call <vscale x 1 x i16> @llvm.vp.merge.nxv1i16(<vscale x 1 x i1> %m, <vscale x 1 x i16> %y, <vscale x 1 x i16> %a, i32 %evl)
 735   ret <vscale x 1 x i16> %u
 736 }
 737
 738 define <vscale x 1 x i16> @vmadd_vv_nxv1i16_unmasked(<vscale x 1 x i16> %a, <vscale x 1 x i16> %b, <vscale x 1 x i16> %c,  <vscale x 1 x i1> %m, i32 zeroext %evl) {
 739 ; CHECK-LABEL: vmadd_vv_nxv1i16_unmasked:
 740 ; CHECK:       # %bb.0:
 741 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 742 ; CHECK-NEXT:    vmadd.vv v9, v8, v10
 743 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, tu, ma
 744 ; CHECK-NEXT:    vmv.v.v v8, v9
 745 ; CHECK-NEXT:    ret
 746   %splat = insertelement <vscale x 1 x i1> poison, i1 -1, i32 0
 747   %allones = shufflevector <vscale x 1 x i1> %splat, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
 748   %x = call <vscale x 1 x i16> @llvm.vp.mul.nxv1i16(<vscale x 1 x i16> %a, <vscale x 1 x i16> %b, <vscale x 1 x i1> %allones, i32 %evl)
 749   %y = call <vscale x 1 x i16> @llvm.vp.add.nxv1i16(<vscale x 1 x i16> %x, <vscale x 1 x i16> %c, <vscale x 1 x i1> %allones, i32 %evl)
 750   %u = call <vscale x 1 x i16> @llvm.vp.merge.nxv1i16(<vscale x 1 x i1> %allones, <vscale x 1 x i16> %y, <vscale x 1 x i16> %a, i32 %evl)
 751   ret <vscale x 1 x i16> %u
 752 }
 753
 754 define <vscale x 1 x i16> @vmadd_vx_nxv1i16(<vscale x 1 x i16> %a, i16 %b, <vscale x 1 x i16> %c,  <vscale x 1 x i1> %m, i32 zeroext %evl) {
 755 ; CHECK-LABEL: vmadd_vx_nxv1i16:
 756 ; CHECK:       # %bb.0:
 757 ; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, tu, mu
 758 ; CHECK-NEXT:    vmadd.vx v8, a0, v9, v0.t
 759 ; CHECK-NEXT:    ret
 760   %elt.head = insertelement <vscale x 1 x i16> poison, i16 %b, i32 0
 761   %vb = shufflevector <vscale x 1 x i16> %elt.head, <vscale x 1 x i16> poison, <vscale x 1 x i32> zeroinitializer
 762   %splat = insertelement <vscale x 1 x i1> poison, i1 -1, i32 0
 763   %allones = shufflevector <vscale x 1 x i1> %splat, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
 764   %x = call <vscale x 1 x i16> @llvm.vp.mul.nxv1i16(<vscale x 1 x i16> %a, <vscale x 1 x i16> %vb, <vscale x 1 x i1> %allones, i32 %evl)
 765   %y = call <vscale x 1 x i16> @llvm.vp.add.nxv1i16(<vscale x 1 x i16> %x, <vscale x 1 x i16> %c, <vscale x 1 x i1> %allones, i32 %evl)
 766   %u = call <vscale x 1 x i16> @llvm.vp.merge.nxv1i16(<vscale x 1 x i1> %m, <vscale x 1 x i16> %y, <vscale x 1 x i16> %a, i32 %evl)
 767   ret <vscale x 1 x i16> %u
 768 }
 769
 770 define <vscale x 1 x i16> @vmadd_vx_nxv1i16_unmasked(<vscale x 1 x i16> %a, i16 %b, <vscale x 1 x i16> %c,  <vscale x 1 x i1> %m, i32 zeroext %evl) {
 771 ; CHECK-LABEL: vmadd_vx_nxv1i16_unmasked:
 772 ; CHECK:       # %bb.0:
 773 ; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, tu, ma
 774 ; CHECK-NEXT:    vmadd.vx v8, a0, v9
 775 ; CHECK-NEXT:    ret
 776   %elt.head = insertelement <vscale x 1 x i16> poison, i16 %b, i32 0
 777   %vb = shufflevector <vscale x 1 x i16> %elt.head, <vscale x 1 x i16> poison, <vscale x 1 x i32> zeroinitializer
 778   %splat = insertelement <vscale x 1 x i1> poison, i1 -1, i32 0
 779   %allones = shufflevector <vscale x 1 x i1> %splat, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
 780   %x = call <vscale x 1 x i16> @llvm.vp.mul.nxv1i16(<vscale x 1 x i16> %a, <vscale x 1 x i16> %vb, <vscale x 1 x i1> %allones, i32 %evl)
 781   %y = call <vscale x 1 x i16> @llvm.vp.add.nxv1i16(<vscale x 1 x i16> %x, <vscale x 1 x i16> %c, <vscale x 1 x i1> %allones, i32 %evl)
 782   %u = call <vscale x 1 x i16> @llvm.vp.merge.nxv1i16(<vscale x 1 x i1> %allones, <vscale x 1 x i16> %y, <vscale x 1 x i16> %a, i32 %evl)
 783   ret <vscale x 1 x i16> %u
 784 }
 785
 786 define <vscale x 1 x i16> @vmadd_vv_nxv1i16_ta(<vscale x 1 x i16> %a, <vscale x 1 x i16> %b, <vscale x 1 x i16> %c,  <vscale x 1 x i1> %m, i32 zeroext %evl) {
 787 ; CHECK-LABEL: vmadd_vv_nxv1i16_ta:
 788 ; CHECK:       # %bb.0:
 789 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
 790 ; CHECK-NEXT:    vmadd.vv v9, v8, v10
 791 ; CHECK-NEXT:    vmerge.vvm v8, v8, v9, v0
 792 ; CHECK-NEXT:    ret
 793   %splat = insertelement <vscale x 1 x i1> poison, i1 -1, i32 0
 794   %allones = shufflevector <vscale x 1 x i1> %splat, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
 795   %x = call <vscale x 1 x i16> @llvm.vp.mul.nxv1i16(<vscale x 1 x i16> %a, <vscale x 1 x i16> %b, <vscale x 1 x i1> %allones, i32 %evl)
 796   %y = call <vscale x 1 x i16> @llvm.vp.add.nxv1i16(<vscale x 1 x i16> %x, <vscale x 1 x i16> %c, <vscale x 1 x i1> %allones, i32 %evl)
 797   %u = call <vscale x 1 x i16> @llvm.vp.select.nxv1i16(<vscale x 1 x i1> %m, <vscale x 1 x i16> %y, <vscale x 1 x i16> %a, i32 %evl)
 798   ret <vscale x 1 x i16> %u
 799 }
 800
 801 define <vscale x 1 x i16> @vmadd_vx_nxv1i16_ta(<vscale x 1 x i16> %a, i16 %b, <vscale x 1 x i16> %c,  <vscale x 1 x i1> %m, i32 zeroext %evl) {
 802 ; CHECK-LABEL: vmadd_vx_nxv1i16_ta:
 803 ; CHECK:       # %bb.0:
 804 ; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, ma
 805 ; CHECK-NEXT:    vmacc.vx v9, a0, v8
 806 ; CHECK-NEXT:    vmerge.vvm v8, v8, v9, v0
 807 ; CHECK-NEXT:    ret
 808   %elt.head = insertelement <vscale x 1 x i16> poison, i16 %b, i32 0
 809   %vb = shufflevector <vscale x 1 x i16> %elt.head, <vscale x 1 x i16> poison, <vscale x 1 x i32> zeroinitializer
 810   %splat = insertelement <vscale x 1 x i1> poison, i1 -1, i32 0
 811   %allones = shufflevector <vscale x 1 x i1> %splat, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
 812   %x = call <vscale x 1 x i16> @llvm.vp.mul.nxv1i16(<vscale x 1 x i16> %a, <vscale x 1 x i16> %vb, <vscale x 1 x i1> %allones, i32 %evl)
 813   %y = call <vscale x 1 x i16> @llvm.vp.add.nxv1i16(<vscale x 1 x i16> %x, <vscale x 1 x i16> %c, <vscale x 1 x i1> %allones, i32 %evl)
 814   %u = call <vscale x 1 x i16> @llvm.vp.select.nxv1i16(<vscale x 1 x i1> %m, <vscale x 1 x i16> %y, <vscale x 1 x i16> %a, i32 %evl)
 815   ret <vscale x 1 x i16> %u
 816 }
 817
 818 declare <vscale x 2 x i16> @llvm.vp.mul.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i16>, <vscale x 2 x i1>, i32)
 819 declare <vscale x 2 x i16> @llvm.vp.add.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i16>, <vscale x 2 x i1>, i32)
 820 declare <vscale x 2 x i16> @llvm.vp.merge.nxv2i16(<vscale x 2 x i1>, <vscale x 2 x i16>, <vscale x 2 x i16>, i32)
 821 declare <vscale x 2 x i16> @llvm.vp.select.nxv2i16(<vscale x 2 x i1>, <vscale x 2 x i16>, <vscale x 2 x i16>, i32)
 822
 823 define <vscale x 2 x i16> @vmadd_vv_nxv2i16(<vscale x 2 x i16> %a, <vscale x 2 x i16> %b, <vscale x 2 x i16> %c,  <vscale x 2 x i1> %m, i32 zeroext %evl) {
 824 ; CHECK-LABEL: vmadd_vv_nxv2i16:
 825 ; CHECK:       # %bb.0:
 826 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 827 ; CHECK-NEXT:    vmadd.vv v9, v8, v10
 828 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, tu, ma
 829 ; CHECK-NEXT:    vmerge.vvm v8, v8, v9, v0
 830 ; CHECK-NEXT:    ret
 831   %splat = insertelement <vscale x 2 x i1> poison, i1 -1, i32 0
 832   %allones = shufflevector <vscale x 2 x i1> %splat, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
 833   %x = call <vscale x 2 x i16> @llvm.vp.mul.nxv2i16(<vscale x 2 x i16> %a, <vscale x 2 x i16> %b, <vscale x 2 x i1> %allones, i32 %evl)
 834   %y = call <vscale x 2 x i16> @llvm.vp.add.nxv2i16(<vscale x 2 x i16> %x, <vscale x 2 x i16> %c, <vscale x 2 x i1> %allones, i32 %evl)
 835   %u = call <vscale x 2 x i16> @llvm.vp.merge.nxv2i16(<vscale x 2 x i1> %m, <vscale x 2 x i16> %y, <vscale x 2 x i16> %a, i32 %evl)
 836   ret <vscale x 2 x i16> %u
 837 }
 838
 839 define <vscale x 2 x i16> @vmadd_vv_nxv2i16_unmasked(<vscale x 2 x i16> %a, <vscale x 2 x i16> %b, <vscale x 2 x i16> %c,  <vscale x 2 x i1> %m, i32 zeroext %evl) {
 840 ; CHECK-LABEL: vmadd_vv_nxv2i16_unmasked:
 841 ; CHECK:       # %bb.0:
 842 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 843 ; CHECK-NEXT:    vmadd.vv v9, v8, v10
 844 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, tu, ma
 845 ; CHECK-NEXT:    vmv.v.v v8, v9
 846 ; CHECK-NEXT:    ret
 847   %splat = insertelement <vscale x 2 x i1> poison, i1 -1, i32 0
 848   %allones = shufflevector <vscale x 2 x i1> %splat, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
 849   %x = call <vscale x 2 x i16> @llvm.vp.mul.nxv2i16(<vscale x 2 x i16> %a, <vscale x 2 x i16> %b, <vscale x 2 x i1> %allones, i32 %evl)
 850   %y = call <vscale x 2 x i16> @llvm.vp.add.nxv2i16(<vscale x 2 x i16> %x, <vscale x 2 x i16> %c, <vscale x 2 x i1> %allones, i32 %evl)
 851   %u = call <vscale x 2 x i16> @llvm.vp.merge.nxv2i16(<vscale x 2 x i1> %allones, <vscale x 2 x i16> %y, <vscale x 2 x i16> %a, i32 %evl)
 852   ret <vscale x 2 x i16> %u
 853 }
 854
 855 define <vscale x 2 x i16> @vmadd_vx_nxv2i16(<vscale x 2 x i16> %a, i16 %b, <vscale x 2 x i16> %c,  <vscale x 2 x i1> %m, i32 zeroext %evl) {
 856 ; CHECK-LABEL: vmadd_vx_nxv2i16:
 857 ; CHECK:       # %bb.0:
 858 ; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, tu, mu
 859 ; CHECK-NEXT:    vmadd.vx v8, a0, v9, v0.t
 860 ; CHECK-NEXT:    ret
 861   %elt.head = insertelement <vscale x 2 x i16> poison, i16 %b, i32 0
 862   %vb = shufflevector <vscale x 2 x i16> %elt.head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
 863   %splat = insertelement <vscale x 2 x i1> poison, i1 -1, i32 0
 864   %allones = shufflevector <vscale x 2 x i1> %splat, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
 865   %x = call <vscale x 2 x i16> @llvm.vp.mul.nxv2i16(<vscale x 2 x i16> %a, <vscale x 2 x i16> %vb, <vscale x 2 x i1> %allones, i32 %evl)
 866   %y = call <vscale x 2 x i16> @llvm.vp.add.nxv2i16(<vscale x 2 x i16> %x, <vscale x 2 x i16> %c, <vscale x 2 x i1> %allones, i32 %evl)
 867   %u = call <vscale x 2 x i16> @llvm.vp.merge.nxv2i16(<vscale x 2 x i1> %m, <vscale x 2 x i16> %y, <vscale x 2 x i16> %a, i32 %evl)
 868   ret <vscale x 2 x i16> %u
 869 }
 870
 871 define <vscale x 2 x i16> @vmadd_vx_nxv2i16_unmasked(<vscale x 2 x i16> %a, i16 %b, <vscale x 2 x i16> %c,  <vscale x 2 x i1> %m, i32 zeroext %evl) {
 872 ; CHECK-LABEL: vmadd_vx_nxv2i16_unmasked:
 873 ; CHECK:       # %bb.0:
 874 ; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, tu, ma
 875 ; CHECK-NEXT:    vmadd.vx v8, a0, v9
 876 ; CHECK-NEXT:    ret
 877   %elt.head = insertelement <vscale x 2 x i16> poison, i16 %b, i32 0
 878   %vb = shufflevector <vscale x 2 x i16> %elt.head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
 879   %splat = insertelement <vscale x 2 x i1> poison, i1 -1, i32 0
 880   %allones = shufflevector <vscale x 2 x i1> %splat, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
 881   %x = call <vscale x 2 x i16> @llvm.vp.mul.nxv2i16(<vscale x 2 x i16> %a, <vscale x 2 x i16> %vb, <vscale x 2 x i1> %allones, i32 %evl)
 882   %y = call <vscale x 2 x i16> @llvm.vp.add.nxv2i16(<vscale x 2 x i16> %x, <vscale x 2 x i16> %c, <vscale x 2 x i1> %allones, i32 %evl)
 883   %u = call <vscale x 2 x i16> @llvm.vp.merge.nxv2i16(<vscale x 2 x i1> %allones, <vscale x 2 x i16> %y, <vscale x 2 x i16> %a, i32 %evl)
 884   ret <vscale x 2 x i16> %u
 885 }
 886
 887 define <vscale x 2 x i16> @vmadd_vv_nxv2i16_ta(<vscale x 2 x i16> %a, <vscale x 2 x i16> %b, <vscale x 2 x i16> %c,  <vscale x 2 x i1> %m, i32 zeroext %evl) {
 888 ; CHECK-LABEL: vmadd_vv_nxv2i16_ta:
 889 ; CHECK:       # %bb.0:
 890 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
 891 ; CHECK-NEXT:    vmadd.vv v9, v8, v10
 892 ; CHECK-NEXT:    vmerge.vvm v8, v8, v9, v0
 893 ; CHECK-NEXT:    ret
 894   %splat = insertelement <vscale x 2 x i1> poison, i1 -1, i32 0
 895   %allones = shufflevector <vscale x 2 x i1> %splat, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
 896   %x = call <vscale x 2 x i16> @llvm.vp.mul.nxv2i16(<vscale x 2 x i16> %a, <vscale x 2 x i16> %b, <vscale x 2 x i1> %allones, i32 %evl)
 897   %y = call <vscale x 2 x i16> @llvm.vp.add.nxv2i16(<vscale x 2 x i16> %x, <vscale x 2 x i16> %c, <vscale x 2 x i1> %allones, i32 %evl)
 898   %u = call <vscale x 2 x i16> @llvm.vp.select.nxv2i16(<vscale x 2 x i1> %m, <vscale x 2 x i16> %y, <vscale x 2 x i16> %a, i32 %evl)
 899   ret <vscale x 2 x i16> %u
 900 }
 901
 902 define <vscale x 2 x i16> @vmadd_vx_nxv2i16_ta(<vscale x 2 x i16> %a, i16 %b, <vscale x 2 x i16> %c,  <vscale x 2 x i1> %m, i32 zeroext %evl) {
 903 ; CHECK-LABEL: vmadd_vx_nxv2i16_ta:
 904 ; CHECK:       # %bb.0:
 905 ; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, ma
 906 ; CHECK-NEXT:    vmacc.vx v9, a0, v8
 907 ; CHECK-NEXT:    vmerge.vvm v8, v8, v9, v0
 908 ; CHECK-NEXT:    ret
 909   %elt.head = insertelement <vscale x 2 x i16> poison, i16 %b, i32 0
 910   %vb = shufflevector <vscale x 2 x i16> %elt.head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
 911   %splat = insertelement <vscale x 2 x i1> poison, i1 -1, i32 0
 912   %allones = shufflevector <vscale x 2 x i1> %splat, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
 913   %x = call <vscale x 2 x i16> @llvm.vp.mul.nxv2i16(<vscale x 2 x i16> %a, <vscale x 2 x i16> %vb, <vscale x 2 x i1> %allones, i32 %evl)
 914   %y = call <vscale x 2 x i16> @llvm.vp.add.nxv2i16(<vscale x 2 x i16> %x, <vscale x 2 x i16> %c, <vscale x 2 x i1> %allones, i32 %evl)
 915   %u = call <vscale x 2 x i16> @llvm.vp.select.nxv2i16(<vscale x 2 x i1> %m, <vscale x 2 x i16> %y, <vscale x 2 x i16> %a, i32 %evl)
 916   ret <vscale x 2 x i16> %u
 917 }
 918
 919 declare <vscale x 4 x i16> @llvm.vp.mul.nxv4i16(<vscale x 4 x i16>, <vscale x 4 x i16>, <vscale x 4 x i1>, i32)
 920 declare <vscale x 4 x i16> @llvm.vp.add.nxv4i16(<vscale x 4 x i16>, <vscale x 4 x i16>, <vscale x 4 x i1>, i32)
 921 declare <vscale x 4 x i16> @llvm.vp.merge.nxv4i16(<vscale x 4 x i1>, <vscale x 4 x i16>, <vscale x 4 x i16>, i32)
 922 declare <vscale x 4 x i16> @llvm.vp.select.nxv4i16(<vscale x 4 x i1>, <vscale x 4 x i16>, <vscale x 4 x i16>, i32)
 923
 924 define <vscale x 4 x i16> @vmadd_vv_nxv4i16(<vscale x 4 x i16> %a, <vscale x 4 x i16> %b, <vscale x 4 x i16> %c,  <vscale x 4 x i1> %m, i32 zeroext %evl) {
 925 ; CHECK-LABEL: vmadd_vv_nxv4i16:
 926 ; CHECK:       # %bb.0:
 927 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 928 ; CHECK-NEXT:    vmadd.vv v9, v8, v10
 929 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, tu, ma
 930 ; CHECK-NEXT:    vmerge.vvm v8, v8, v9, v0
 931 ; CHECK-NEXT:    ret
 932   %splat = insertelement <vscale x 4 x i1> poison, i1 -1, i32 0
 933   %allones = shufflevector <vscale x 4 x i1> %splat, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
 934   %x = call <vscale x 4 x i16> @llvm.vp.mul.nxv4i16(<vscale x 4 x i16> %a, <vscale x 4 x i16> %b, <vscale x 4 x i1> %allones, i32 %evl)
 935   %y = call <vscale x 4 x i16> @llvm.vp.add.nxv4i16(<vscale x 4 x i16> %x, <vscale x 4 x i16> %c, <vscale x 4 x i1> %allones, i32 %evl)
 936   %u = call <vscale x 4 x i16> @llvm.vp.merge.nxv4i16(<vscale x 4 x i1> %m, <vscale x 4 x i16> %y, <vscale x 4 x i16> %a, i32 %evl)
 937   ret <vscale x 4 x i16> %u
 938 }
 939
 940 define <vscale x 4 x i16> @vmadd_vv_nxv4i16_unmasked(<vscale x 4 x i16> %a, <vscale x 4 x i16> %b, <vscale x 4 x i16> %c,  <vscale x 4 x i1> %m, i32 zeroext %evl) {
 941 ; CHECK-LABEL: vmadd_vv_nxv4i16_unmasked:
 942 ; CHECK:       # %bb.0:
 943 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 944 ; CHECK-NEXT:    vmadd.vv v9, v8, v10
 945 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, tu, ma
 946 ; CHECK-NEXT:    vmv.v.v v8, v9
 947 ; CHECK-NEXT:    ret
 948   %splat = insertelement <vscale x 4 x i1> poison, i1 -1, i32 0
 949   %allones = shufflevector <vscale x 4 x i1> %splat, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
 950   %x = call <vscale x 4 x i16> @llvm.vp.mul.nxv4i16(<vscale x 4 x i16> %a, <vscale x 4 x i16> %b, <vscale x 4 x i1> %allones, i32 %evl)
 951   %y = call <vscale x 4 x i16> @llvm.vp.add.nxv4i16(<vscale x 4 x i16> %x, <vscale x 4 x i16> %c, <vscale x 4 x i1> %allones, i32 %evl)
 952   %u = call <vscale x 4 x i16> @llvm.vp.merge.nxv4i16(<vscale x 4 x i1> %allones, <vscale x 4 x i16> %y, <vscale x 4 x i16> %a, i32 %evl)
 953   ret <vscale x 4 x i16> %u
 954 }
 955
 956 define <vscale x 4 x i16> @vmadd_vx_nxv4i16(<vscale x 4 x i16> %a, i16 %b, <vscale x 4 x i16> %c,  <vscale x 4 x i1> %m, i32 zeroext %evl) {
 957 ; CHECK-LABEL: vmadd_vx_nxv4i16:
 958 ; CHECK:       # %bb.0:
 959 ; CHECK-NEXT:    vsetvli zero, a1, e16, m1, tu, mu
 960 ; CHECK-NEXT:    vmadd.vx v8, a0, v9, v0.t
 961 ; CHECK-NEXT:    ret
 962   %elt.head = insertelement <vscale x 4 x i16> poison, i16 %b, i32 0
 963   %vb = shufflevector <vscale x 4 x i16> %elt.head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
 964   %splat = insertelement <vscale x 4 x i1> poison, i1 -1, i32 0
 965   %allones = shufflevector <vscale x 4 x i1> %splat, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
 966   %x = call <vscale x 4 x i16> @llvm.vp.mul.nxv4i16(<vscale x 4 x i16> %a, <vscale x 4 x i16> %vb, <vscale x 4 x i1> %allones, i32 %evl)
 967   %y = call <vscale x 4 x i16> @llvm.vp.add.nxv4i16(<vscale x 4 x i16> %x, <vscale x 4 x i16> %c, <vscale x 4 x i1> %allones, i32 %evl)
 968   %u = call <vscale x 4 x i16> @llvm.vp.merge.nxv4i16(<vscale x 4 x i1> %m, <vscale x 4 x i16> %y, <vscale x 4 x i16> %a, i32 %evl)
 969   ret <vscale x 4 x i16> %u
 970 }
 971
 972 define <vscale x 4 x i16> @vmadd_vx_nxv4i16_unmasked(<vscale x 4 x i16> %a, i16 %b, <vscale x 4 x i16> %c,  <vscale x 4 x i1> %m, i32 zeroext %evl) {
 973 ; CHECK-LABEL: vmadd_vx_nxv4i16_unmasked:
 974 ; CHECK:       # %bb.0:
 975 ; CHECK-NEXT:    vsetvli zero, a1, e16, m1, tu, ma
 976 ; CHECK-NEXT:    vmadd.vx v8, a0, v9
 977 ; CHECK-NEXT:    ret
 978   %elt.head = insertelement <vscale x 4 x i16> poison, i16 %b, i32 0
 979   %vb = shufflevector <vscale x 4 x i16> %elt.head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
 980   %splat = insertelement <vscale x 4 x i1> poison, i1 -1, i32 0
 981   %allones = shufflevector <vscale x 4 x i1> %splat, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
 982   %x = call <vscale x 4 x i16> @llvm.vp.mul.nxv4i16(<vscale x 4 x i16> %a, <vscale x 4 x i16> %vb, <vscale x 4 x i1> %allones, i32 %evl)
 983   %y = call <vscale x 4 x i16> @llvm.vp.add.nxv4i16(<vscale x 4 x i16> %x, <vscale x 4 x i16> %c, <vscale x 4 x i1> %allones, i32 %evl)
 984   %u = call <vscale x 4 x i16> @llvm.vp.merge.nxv4i16(<vscale x 4 x i1> %allones, <vscale x 4 x i16> %y, <vscale x 4 x i16> %a, i32 %evl)
 985   ret <vscale x 4 x i16> %u
 986 }
 987
 988 define <vscale x 4 x i16> @vmadd_vv_nxv4i16_ta(<vscale x 4 x i16> %a, <vscale x 4 x i16> %b, <vscale x 4 x i16> %c,  <vscale x 4 x i1> %m, i32 zeroext %evl) {
 989 ; CHECK-LABEL: vmadd_vv_nxv4i16_ta:
 990 ; CHECK:       # %bb.0:
 991 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
 992 ; CHECK-NEXT:    vmadd.vv v9, v8, v10
 993 ; CHECK-NEXT:    vmerge.vvm v8, v8, v9, v0
 994 ; CHECK-NEXT:    ret
 995   %splat = insertelement <vscale x 4 x i1> poison, i1 -1, i32 0
 996   %allones = shufflevector <vscale x 4 x i1> %splat, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
 997   %x = call <vscale x 4 x i16> @llvm.vp.mul.nxv4i16(<vscale x 4 x i16> %a, <vscale x 4 x i16> %b, <vscale x 4 x i1> %allones, i32 %evl)
 998   %y = call <vscale x 4 x i16> @llvm.vp.add.nxv4i16(<vscale x 4 x i16> %x, <vscale x 4 x i16> %c, <vscale x 4 x i1> %allones, i32 %evl)
 999   %u = call <vscale x 4 x i16> @llvm.vp.select.nxv4i16(<vscale x 4 x i1> %m, <vscale x 4 x i16> %y, <vscale x 4 x i16> %a, i32 %evl)
1000   ret <vscale x 4 x i16> %u
1001 }
1002
1003 define <vscale x 4 x i16> @vmadd_vx_nxv4i16_ta(<vscale x 4 x i16> %a, i16 %b, <vscale x 4 x i16> %c,  <vscale x 4 x i1> %m, i32 zeroext %evl) {
1004 ; CHECK-LABEL: vmadd_vx_nxv4i16_ta:
1005 ; CHECK:       # %bb.0:
1006 ; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
1007 ; CHECK-NEXT:    vmacc.vx v9, a0, v8
1008 ; CHECK-NEXT:    vmerge.vvm v8, v8, v9, v0
1009 ; CHECK-NEXT:    ret
1010   %elt.head = insertelement <vscale x 4 x i16> poison, i16 %b, i32 0
1011   %vb = shufflevector <vscale x 4 x i16> %elt.head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
1012   %splat = insertelement <vscale x 4 x i1> poison, i1 -1, i32 0
1013   %allones = shufflevector <vscale x 4 x i1> %splat, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
1014   %x = call <vscale x 4 x i16> @llvm.vp.mul.nxv4i16(<vscale x 4 x i16> %a, <vscale x 4 x i16> %vb, <vscale x 4 x i1> %allones, i32 %evl)
1015   %y = call <vscale x 4 x i16> @llvm.vp.add.nxv4i16(<vscale x 4 x i16> %x, <vscale x 4 x i16> %c, <vscale x 4 x i1> %allones, i32 %evl)
1016   %u = call <vscale x 4 x i16> @llvm.vp.select.nxv4i16(<vscale x 4 x i1> %m, <vscale x 4 x i16> %y, <vscale x 4 x i16> %a, i32 %evl)
1017   ret <vscale x 4 x i16> %u
1018 }
1019
1020 declare <vscale x 8 x i16> @llvm.vp.mul.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i1>, i32)
1021 declare <vscale x 8 x i16> @llvm.vp.add.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i1>, i32)
1022 declare <vscale x 8 x i16> @llvm.vp.merge.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>, i32)
1023 declare <vscale x 8 x i16> @llvm.vp.select.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>, i32)
1024
1025 define <vscale x 8 x i16> @vmadd_vv_nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c,  <vscale x 8 x i1> %m, i32 zeroext %evl) {
1026 ; CHECK-LABEL: vmadd_vv_nxv8i16:
1027 ; CHECK:       # %bb.0:
1028 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
1029 ; CHECK-NEXT:    vmadd.vv v10, v8, v12
1030 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, tu, ma
1031 ; CHECK-NEXT:    vmerge.vvm v8, v8, v10, v0
1032 ; CHECK-NEXT:    ret
1033   %splat = insertelement <vscale x 8 x i1> poison, i1 -1, i32 0
1034   %allones = shufflevector <vscale x 8 x i1> %splat, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
1035   %x = call <vscale x 8 x i16> @llvm.vp.mul.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i1> %allones, i32 %evl)
1036   %y = call <vscale x 8 x i16> @llvm.vp.add.nxv8i16(<vscale x 8 x i16> %x, <vscale x 8 x i16> %c, <vscale x 8 x i1> %allones, i32 %evl)
1037   %u = call <vscale x 8 x i16> @llvm.vp.merge.nxv8i16(<vscale x 8 x i1> %m, <vscale x 8 x i16> %y, <vscale x 8 x i16> %a, i32 %evl)
1038   ret <vscale x 8 x i16> %u
1039 }
1040
1041 define <vscale x 8 x i16> @vmadd_vv_nxv8i16_unmasked(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c,  <vscale x 8 x i1> %m, i32 zeroext %evl) {
1042 ; CHECK-LABEL: vmadd_vv_nxv8i16_unmasked:
1043 ; CHECK:       # %bb.0:
1044 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
1045 ; CHECK-NEXT:    vmadd.vv v10, v8, v12
1046 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, tu, ma
1047 ; CHECK-NEXT:    vmv.v.v v8, v10
1048 ; CHECK-NEXT:    ret
1049   %splat = insertelement <vscale x 8 x i1> poison, i1 -1, i32 0
1050   %allones = shufflevector <vscale x 8 x i1> %splat, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
1051   %x = call <vscale x 8 x i16> @llvm.vp.mul.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i1> %allones, i32 %evl)
1052   %y = call <vscale x 8 x i16> @llvm.vp.add.nxv8i16(<vscale x 8 x i16> %x, <vscale x 8 x i16> %c, <vscale x 8 x i1> %allones, i32 %evl)
1053   %u = call <vscale x 8 x i16> @llvm.vp.merge.nxv8i16(<vscale x 8 x i1> %allones, <vscale x 8 x i16> %y, <vscale x 8 x i16> %a, i32 %evl)
1054   ret <vscale x 8 x i16> %u
1055 }
1056
1057 define <vscale x 8 x i16> @vmadd_vx_nxv8i16(<vscale x 8 x i16> %a, i16 %b, <vscale x 8 x i16> %c,  <vscale x 8 x i1> %m, i32 zeroext %evl) {
1058 ; CHECK-LABEL: vmadd_vx_nxv8i16:
1059 ; CHECK:       # %bb.0:
1060 ; CHECK-NEXT:    vsetvli zero, a1, e16, m2, tu, mu
1061 ; CHECK-NEXT:    vmadd.vx v8, a0, v10, v0.t
1062 ; CHECK-NEXT:    ret
1063   %elt.head = insertelement <vscale x 8 x i16> poison, i16 %b, i32 0
1064   %vb = shufflevector <vscale x 8 x i16> %elt.head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
1065   %splat = insertelement <vscale x 8 x i1> poison, i1 -1, i32 0
1066   %allones = shufflevector <vscale x 8 x i1> %splat, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
1067   %x = call <vscale x 8 x i16> @llvm.vp.mul.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %vb, <vscale x 8 x i1> %allones, i32 %evl)
1068   %y = call <vscale x 8 x i16> @llvm.vp.add.nxv8i16(<vscale x 8 x i16> %x, <vscale x 8 x i16> %c, <vscale x 8 x i1> %allones, i32 %evl)
1069   %u = call <vscale x 8 x i16> @llvm.vp.merge.nxv8i16(<vscale x 8 x i1> %m, <vscale x 8 x i16> %y, <vscale x 8 x i16> %a, i32 %evl)
1070   ret <vscale x 8 x i16> %u
1071 }
1072
1073 define <vscale x 8 x i16> @vmadd_vx_nxv8i16_unmasked(<vscale x 8 x i16> %a, i16 %b, <vscale x 8 x i16> %c,  <vscale x 8 x i1> %m, i32 zeroext %evl) {
1074 ; CHECK-LABEL: vmadd_vx_nxv8i16_unmasked:
1075 ; CHECK:       # %bb.0:
1076 ; CHECK-NEXT:    vsetvli zero, a1, e16, m2, tu, ma
1077 ; CHECK-NEXT:    vmadd.vx v8, a0, v10
1078 ; CHECK-NEXT:    ret
1079   %elt.head = insertelement <vscale x 8 x i16> poison, i16 %b, i32 0
1080   %vb = shufflevector <vscale x 8 x i16> %elt.head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
1081   %splat = insertelement <vscale x 8 x i1> poison, i1 -1, i32 0
1082   %allones = shufflevector <vscale x 8 x i1> %splat, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
1083   %x = call <vscale x 8 x i16> @llvm.vp.mul.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %vb, <vscale x 8 x i1> %allones, i32 %evl)
1084   %y = call <vscale x 8 x i16> @llvm.vp.add.nxv8i16(<vscale x 8 x i16> %x, <vscale x 8 x i16> %c, <vscale x 8 x i1> %allones, i32 %evl)
1085   %u = call <vscale x 8 x i16> @llvm.vp.merge.nxv8i16(<vscale x 8 x i1> %allones, <vscale x 8 x i16> %y, <vscale x 8 x i16> %a, i32 %evl)
1086   ret <vscale x 8 x i16> %u
1087 }
1088
1089 define <vscale x 8 x i16> @vmadd_vv_nxv8i16_ta(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c,  <vscale x 8 x i1> %m, i32 zeroext %evl) {
1090 ; CHECK-LABEL: vmadd_vv_nxv8i16_ta:
1091 ; CHECK:       # %bb.0:
1092 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
1093 ; CHECK-NEXT:    vmadd.vv v10, v8, v12
1094 ; CHECK-NEXT:    vmerge.vvm v8, v8, v10, v0
1095 ; CHECK-NEXT:    ret
1096   %splat = insertelement <vscale x 8 x i1> poison, i1 -1, i32 0
1097   %allones = shufflevector <vscale x 8 x i1> %splat, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
1098   %x = call <vscale x 8 x i16> @llvm.vp.mul.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i1> %allones, i32 %evl)
1099   %y = call <vscale x 8 x i16> @llvm.vp.add.nxv8i16(<vscale x 8 x i16> %x, <vscale x 8 x i16> %c, <vscale x 8 x i1> %allones, i32 %evl)
1100   %u = call <vscale x 8 x i16> @llvm.vp.select.nxv8i16(<vscale x 8 x i1> %m, <vscale x 8 x i16> %y, <vscale x 8 x i16> %a, i32 %evl)
1101   ret <vscale x 8 x i16> %u
1102 }
1103
1104 define <vscale x 8 x i16> @vmadd_vx_nxv8i16_ta(<vscale x 8 x i16> %a, i16 %b, <vscale x 8 x i16> %c,  <vscale x 8 x i1> %m, i32 zeroext %evl) {
1105 ; CHECK-LABEL: vmadd_vx_nxv8i16_ta:
1106 ; CHECK:       # %bb.0:
1107 ; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
1108 ; CHECK-NEXT:    vmacc.vx v10, a0, v8
1109 ; CHECK-NEXT:    vmerge.vvm v8, v8, v10, v0
1110 ; CHECK-NEXT:    ret
1111   %elt.head = insertelement <vscale x 8 x i16> poison, i16 %b, i32 0
1112   %vb = shufflevector <vscale x 8 x i16> %elt.head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
1113   %splat = insertelement <vscale x 8 x i1> poison, i1 -1, i32 0
1114   %allones = shufflevector <vscale x 8 x i1> %splat, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
1115   %x = call <vscale x 8 x i16> @llvm.vp.mul.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %vb, <vscale x 8 x i1> %allones, i32 %evl)
1116   %y = call <vscale x 8 x i16> @llvm.vp.add.nxv8i16(<vscale x 8 x i16> %x, <vscale x 8 x i16> %c, <vscale x 8 x i1> %allones, i32 %evl)
1117   %u = call <vscale x 8 x i16> @llvm.vp.select.nxv8i16(<vscale x 8 x i1> %m, <vscale x 8 x i16> %y, <vscale x 8 x i16> %a, i32 %evl)
1118   ret <vscale x 8 x i16> %u
1119 }
1120
1121 declare <vscale x 16 x i16> @llvm.vp.mul.nxv16i16(<vscale x 16 x i16>, <vscale x 16 x i16>, <vscale x 16 x i1>, i32)
1122 declare <vscale x 16 x i16> @llvm.vp.add.nxv16i16(<vscale x 16 x i16>, <vscale x 16 x i16>, <vscale x 16 x i1>, i32)
1123 declare <vscale x 16 x i16> @llvm.vp.merge.nxv16i16(<vscale x 16 x i1>, <vscale x 16 x i16>, <vscale x 16 x i16>, i32)
1124 declare <vscale x 16 x i16> @llvm.vp.select.nxv16i16(<vscale x 16 x i1>, <vscale x 16 x i16>, <vscale x 16 x i16>, i32)
1125
1126 define <vscale x 16 x i16> @vmadd_vv_nxv16i16(<vscale x 16 x i16> %a, <vscale x 16 x i16> %b, <vscale x 16 x i16> %c,  <vscale x 16 x i1> %m, i32 zeroext %evl) {
1127 ; CHECK-LABEL: vmadd_vv_nxv16i16:
1128 ; CHECK:       # %bb.0:
1129 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
1130 ; CHECK-NEXT:    vmadd.vv v12, v8, v16
1131 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, tu, ma
1132 ; CHECK-NEXT:    vmerge.vvm v8, v8, v12, v0
1133 ; CHECK-NEXT:    ret
1134   %splat = insertelement <vscale x 16 x i1> poison, i1 -1, i32 0
1135   %allones = shufflevector <vscale x 16 x i1> %splat, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
1136   %x = call <vscale x 16 x i16> @llvm.vp.mul.nxv16i16(<vscale x 16 x i16> %a, <vscale x 16 x i16> %b, <vscale x 16 x i1> %allones, i32 %evl)
1137   %y = call <vscale x 16 x i16> @llvm.vp.add.nxv16i16(<vscale x 16 x i16> %x, <vscale x 16 x i16> %c, <vscale x 16 x i1> %allones, i32 %evl)
1138   %u = call <vscale x 16 x i16> @llvm.vp.merge.nxv16i16(<vscale x 16 x i1> %m, <vscale x 16 x i16> %y, <vscale x 16 x i16> %a, i32 %evl)
1139   ret <vscale x 16 x i16> %u
1140 }
1141
1142 define <vscale x 16 x i16> @vmadd_vv_nxv16i16_unmasked(<vscale x 16 x i16> %a, <vscale x 16 x i16> %b, <vscale x 16 x i16> %c,  <vscale x 16 x i1> %m, i32 zeroext %evl) {
1143 ; CHECK-LABEL: vmadd_vv_nxv16i16_unmasked:
1144 ; CHECK:       # %bb.0:
1145 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
1146 ; CHECK-NEXT:    vmadd.vv v12, v8, v16
1147 ; CHECK-NEXT:    vsetvli zero, zero, e16, m4, tu, ma
1148 ; CHECK-NEXT:    vmv.v.v v8, v12
1149 ; CHECK-NEXT:    ret
1150   %splat = insertelement <vscale x 16 x i1> poison, i1 -1, i32 0
1151   %allones = shufflevector <vscale x 16 x i1> %splat, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
1152   %x = call <vscale x 16 x i16> @llvm.vp.mul.nxv16i16(<vscale x 16 x i16> %a, <vscale x 16 x i16> %b, <vscale x 16 x i1> %allones, i32 %evl)
1153   %y = call <vscale x 16 x i16> @llvm.vp.add.nxv16i16(<vscale x 16 x i16> %x, <vscale x 16 x i16> %c, <vscale x 16 x i1> %allones, i32 %evl)
1154   %u = call <vscale x 16 x i16> @llvm.vp.merge.nxv16i16(<vscale x 16 x i1> %allones, <vscale x 16 x i16> %y, <vscale x 16 x i16> %a, i32 %evl)
1155   ret <vscale x 16 x i16> %u
1156 }
1157
1158 define <vscale x 16 x i16> @vmadd_vx_nxv16i16(<vscale x 16 x i16> %a, i16 %b, <vscale x 16 x i16> %c,  <vscale x 16 x i1> %m, i32 zeroext %evl) {
1159 ; CHECK-LABEL: vmadd_vx_nxv16i16:
1160 ; CHECK:       # %bb.0:
1161 ; CHECK-NEXT:    vsetvli zero, a1, e16, m4, tu, mu
1162 ; CHECK-NEXT:    vmadd.vx v8, a0, v12, v0.t
1163 ; CHECK-NEXT:    ret
1164   %elt.head = insertelement <vscale x 16 x i16> poison, i16 %b, i32 0
1165   %vb = shufflevector <vscale x 16 x i16> %elt.head, <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer
1166   %splat = insertelement <vscale x 16 x i1> poison, i1 -1, i32 0
1167   %allones = shufflevector <vscale x 16 x i1> %splat, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
1168   %x = call <vscale x 16 x i16> @llvm.vp.mul.nxv16i16(<vscale x 16 x i16> %a, <vscale x 16 x i16> %vb, <vscale x 16 x i1> %allones, i32 %evl)
1169   %y = call <vscale x 16 x i16> @llvm.vp.add.nxv16i16(<vscale x 16 x i16> %x, <vscale x 16 x i16> %c, <vscale x 16 x i1> %allones, i32 %evl)
1170   %u = call <vscale x 16 x i16> @llvm.vp.merge.nxv16i16(<vscale x 16 x i1> %m, <vscale x 16 x i16> %y, <vscale x 16 x i16> %a, i32 %evl)
1171   ret <vscale x 16 x i16> %u
1172 }
1173
1174 define <vscale x 16 x i16> @vmadd_vx_nxv16i16_unmasked(<vscale x 16 x i16> %a, i16 %b, <vscale x 16 x i16> %c,  <vscale x 16 x i1> %m, i32 zeroext %evl) {
1175 ; CHECK-LABEL: vmadd_vx_nxv16i16_unmasked:
1176 ; CHECK:       # %bb.0:
1177 ; CHECK-NEXT:    vsetvli zero, a1, e16, m4, tu, ma
1178 ; CHECK-NEXT:    vmadd.vx v8, a0, v12
1179 ; CHECK-NEXT:    ret
1180   %elt.head = insertelement <vscale x 16 x i16> poison, i16 %b, i32 0
1181   %vb = shufflevector <vscale x 16 x i16> %elt.head, <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer
1182   %splat = insertelement <vscale x 16 x i1> poison, i1 -1, i32 0
1183   %allones = shufflevector <vscale x 16 x i1> %splat, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
1184   %x = call <vscale x 16 x i16> @llvm.vp.mul.nxv16i16(<vscale x 16 x i16> %a, <vscale x 16 x i16> %vb, <vscale x 16 x i1> %allones, i32 %evl)
1185   %y = call <vscale x 16 x i16> @llvm.vp.add.nxv16i16(<vscale x 16 x i16> %x, <vscale x 16 x i16> %c, <vscale x 16 x i1> %allones, i32 %evl)
1186   %u = call <vscale x 16 x i16> @llvm.vp.merge.nxv16i16(<vscale x 16 x i1> %allones, <vscale x 16 x i16> %y, <vscale x 16 x i16> %a, i32 %evl)
1187   ret <vscale x 16 x i16> %u
1188 }
1189
1190 define <vscale x 16 x i16> @vmadd_vv_nxv16i16_ta(<vscale x 16 x i16> %a, <vscale x 16 x i16> %b, <vscale x 16 x i16> %c,  <vscale x 16 x i1> %m, i32 zeroext %evl) {
1191 ; CHECK-LABEL: vmadd_vv_nxv16i16_ta:
1192 ; CHECK:       # %bb.0:
1193 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
1194 ; CHECK-NEXT:    vmadd.vv v12, v8, v16
1195 ; CHECK-NEXT:    vmerge.vvm v8, v8, v12, v0
1196 ; CHECK-NEXT:    ret
1197   %splat = insertelement <vscale x 16 x i1> poison, i1 -1, i32 0
1198   %allones = shufflevector <vscale x 16 x i1> %splat, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
1199   %x = call <vscale x 16 x i16> @llvm.vp.mul.nxv16i16(<vscale x 16 x i16> %a, <vscale x 16 x i16> %b, <vscale x 16 x i1> %allones, i32 %evl)
1200   %y = call <vscale x 16 x i16> @llvm.vp.add.nxv16i16(<vscale x 16 x i16> %x, <vscale x 16 x i16> %c, <vscale x 16 x i1> %allones, i32 %evl)
1201   %u = call <vscale x 16 x i16> @llvm.vp.select.nxv16i16(<vscale x 16 x i1> %m, <vscale x 16 x i16> %y, <vscale x 16 x i16> %a, i32 %evl)
1202   ret <vscale x 16 x i16> %u
1203 }
1204
1205 define <vscale x 16 x i16> @vmadd_vx_nxv16i16_ta(<vscale x 16 x i16> %a, i16 %b, <vscale x 16 x i16> %c,  <vscale x 16 x i1> %m, i32 zeroext %evl) {
1206 ; CHECK-LABEL: vmadd_vx_nxv16i16_ta:
1207 ; CHECK:       # %bb.0:
1208 ; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, ma
1209 ; CHECK-NEXT:    vmacc.vx v12, a0, v8
1210 ; CHECK-NEXT:    vmerge.vvm v8, v8, v12, v0
1211 ; CHECK-NEXT:    ret
1212   %elt.head = insertelement <vscale x 16 x i16> poison, i16 %b, i32 0
1213   %vb = shufflevector <vscale x 16 x i16> %elt.head, <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer
1214   %splat = insertelement <vscale x 16 x i1> poison, i1 -1, i32 0
1215   %allones = shufflevector <vscale x 16 x i1> %splat, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
1216   %x = call <vscale x 16 x i16> @llvm.vp.mul.nxv16i16(<vscale x 16 x i16> %a, <vscale x 16 x i16> %vb, <vscale x 16 x i1> %allones, i32 %evl)
1217   %y = call <vscale x 16 x i16> @llvm.vp.add.nxv16i16(<vscale x 16 x i16> %x, <vscale x 16 x i16> %c, <vscale x 16 x i1> %allones, i32 %evl)
1218   %u = call <vscale x 16 x i16> @llvm.vp.select.nxv16i16(<vscale x 16 x i1> %m, <vscale x 16 x i16> %y, <vscale x 16 x i16> %a, i32 %evl)
1219   ret <vscale x 16 x i16> %u
1220 }
1221
1222 declare <vscale x 32 x i16> @llvm.vp.mul.nxv32i16(<vscale x 32 x i16>, <vscale x 32 x i16>, <vscale x 32 x i1>, i32)
1223 declare <vscale x 32 x i16> @llvm.vp.add.nxv32i16(<vscale x 32 x i16>, <vscale x 32 x i16>, <vscale x 32 x i1>, i32)
1224 declare <vscale x 32 x i16> @llvm.vp.merge.nxv32i16(<vscale x 32 x i1>, <vscale x 32 x i16>, <vscale x 32 x i16>, i32)
1225 declare <vscale x 32 x i16> @llvm.vp.select.nxv32i16(<vscale x 32 x i1>, <vscale x 32 x i16>, <vscale x 32 x i16>, i32)
1226
1227 define <vscale x 32 x i16> @vmadd_vv_nxv32i16(<vscale x 32 x i16> %a, <vscale x 32 x i16> %b, <vscale x 32 x i16> %c,  <vscale x 32 x i1> %m, i32 zeroext %evl) {
1228 ; CHECK-LABEL: vmadd_vv_nxv32i16:
1229 ; CHECK:       # %bb.0:
1230 ; CHECK-NEXT:    vl8re16.v v24, (a0)
1231 ; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
1232 ; CHECK-NEXT:    vmacc.vv v24, v8, v16
1233 ; CHECK-NEXT:    vsetvli zero, zero, e16, m8, tu, ma
1234 ; CHECK-NEXT:    vmerge.vvm v8, v8, v24, v0
1235 ; CHECK-NEXT:    ret
1236   %splat = insertelement <vscale x 32 x i1> poison, i1 -1, i32 0
1237   %allones = shufflevector <vscale x 32 x i1> %splat, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
1238   %x = call <vscale x 32 x i16> @llvm.vp.mul.nxv32i16(<vscale x 32 x i16> %a, <vscale x 32 x i16> %b, <vscale x 32 x i1> %allones, i32 %evl)
1239   %y = call <vscale x 32 x i16> @llvm.vp.add.nxv32i16(<vscale x 32 x i16> %x, <vscale x 32 x i16> %c, <vscale x 32 x i1> %allones, i32 %evl)
1240   %u = call <vscale x 32 x i16> @llvm.vp.merge.nxv32i16(<vscale x 32 x i1> %m, <vscale x 32 x i16> %y, <vscale x 32 x i16> %a, i32 %evl)
1241   ret <vscale x 32 x i16> %u
1242 }
1243
1244 define <vscale x 32 x i16> @vmadd_vv_nxv32i16_unmasked(<vscale x 32 x i16> %a, <vscale x 32 x i16> %b, <vscale x 32 x i16> %c,  <vscale x 32 x i1> %m, i32 zeroext %evl) {
1245 ; CHECK-LABEL: vmadd_vv_nxv32i16_unmasked:
1246 ; CHECK:       # %bb.0:
1247 ; CHECK-NEXT:    vl8re16.v v24, (a0)
1248 ; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
1249 ; CHECK-NEXT:    vmacc.vv v24, v8, v16
1250 ; CHECK-NEXT:    vsetvli zero, zero, e16, m8, tu, ma
1251 ; CHECK-NEXT:    vmv.v.v v8, v24
1252 ; CHECK-NEXT:    ret
1253   %splat = insertelement <vscale x 32 x i1> poison, i1 -1, i32 0
1254   %allones = shufflevector <vscale x 32 x i1> %splat, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
1255   %x = call <vscale x 32 x i16> @llvm.vp.mul.nxv32i16(<vscale x 32 x i16> %a, <vscale x 32 x i16> %b, <vscale x 32 x i1> %allones, i32 %evl)
1256   %y = call <vscale x 32 x i16> @llvm.vp.add.nxv32i16(<vscale x 32 x i16> %x, <vscale x 32 x i16> %c, <vscale x 32 x i1> %allones, i32 %evl)
1257   %u = call <vscale x 32 x i16> @llvm.vp.merge.nxv32i16(<vscale x 32 x i1> %allones, <vscale x 32 x i16> %y, <vscale x 32 x i16> %a, i32 %evl)
1258   ret <vscale x 32 x i16> %u
1259 }
1260
1261 define <vscale x 32 x i16> @vmadd_vx_nxv32i16(<vscale x 32 x i16> %a, i16 %b, <vscale x 32 x i16> %c,  <vscale x 32 x i1> %m, i32 zeroext %evl) {
1262 ; CHECK-LABEL: vmadd_vx_nxv32i16:
1263 ; CHECK:       # %bb.0:
1264 ; CHECK-NEXT:    vsetvli zero, a1, e16, m8, tu, mu
1265 ; CHECK-NEXT:    vmadd.vx v8, a0, v16, v0.t
1266 ; CHECK-NEXT:    ret
1267   %elt.head = insertelement <vscale x 32 x i16> poison, i16 %b, i32 0
1268   %vb = shufflevector <vscale x 32 x i16> %elt.head, <vscale x 32 x i16> poison, <vscale x 32 x i32> zeroinitializer
1269   %splat = insertelement <vscale x 32 x i1> poison, i1 -1, i32 0
1270   %allones = shufflevector <vscale x 32 x i1> %splat, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
1271   %x = call <vscale x 32 x i16> @llvm.vp.mul.nxv32i16(<vscale x 32 x i16> %a, <vscale x 32 x i16> %vb, <vscale x 32 x i1> %allones, i32 %evl)
1272   %y = call <vscale x 32 x i16> @llvm.vp.add.nxv32i16(<vscale x 32 x i16> %x, <vscale x 32 x i16> %c, <vscale x 32 x i1> %allones, i32 %evl)
1273   %u = call <vscale x 32 x i16> @llvm.vp.merge.nxv32i16(<vscale x 32 x i1> %m, <vscale x 32 x i16> %y, <vscale x 32 x i16> %a, i32 %evl)
1274   ret <vscale x 32 x i16> %u
1275 }
1276
1277 define <vscale x 32 x i16> @vmadd_vx_nxv32i16_unmasked(<vscale x 32 x i16> %a, i16 %b, <vscale x 32 x i16> %c,  <vscale x 32 x i1> %m, i32 zeroext %evl) {
1278 ; CHECK-LABEL: vmadd_vx_nxv32i16_unmasked:
1279 ; CHECK:       # %bb.0:
1280 ; CHECK-NEXT:    vsetvli zero, a1, e16, m8, tu, ma
1281 ; CHECK-NEXT:    vmadd.vx v8, a0, v16
1282 ; CHECK-NEXT:    ret
1283   %elt.head = insertelement <vscale x 32 x i16> poison, i16 %b, i32 0
1284   %vb = shufflevector <vscale x 32 x i16> %elt.head, <vscale x 32 x i16> poison, <vscale x 32 x i32> zeroinitializer
1285   %splat = insertelement <vscale x 32 x i1> poison, i1 -1, i32 0
1286   %allones = shufflevector <vscale x 32 x i1> %splat, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
1287   %x = call <vscale x 32 x i16> @llvm.vp.mul.nxv32i16(<vscale x 32 x i16> %a, <vscale x 32 x i16> %vb, <vscale x 32 x i1> %allones, i32 %evl)
1288   %y = call <vscale x 32 x i16> @llvm.vp.add.nxv32i16(<vscale x 32 x i16> %x, <vscale x 32 x i16> %c, <vscale x 32 x i1> %allones, i32 %evl)
1289   %u = call <vscale x 32 x i16> @llvm.vp.merge.nxv32i16(<vscale x 32 x i1> %allones, <vscale x 32 x i16> %y, <vscale x 32 x i16> %a, i32 %evl)
1290   ret <vscale x 32 x i16> %u
1291 }
1292
1293 define <vscale x 32 x i16> @vmadd_vv_nxv32i16_ta(<vscale x 32 x i16> %a, <vscale x 32 x i16> %b, <vscale x 32 x i16> %c,  <vscale x 32 x i1> %m, i32 zeroext %evl) {
1294 ; CHECK-LABEL: vmadd_vv_nxv32i16_ta:
1295 ; CHECK:       # %bb.0:
1296 ; CHECK-NEXT:    vl8re16.v v24, (a0)
1297 ; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
1298 ; CHECK-NEXT:    vmacc.vv v24, v8, v16
1299 ; CHECK-NEXT:    vmerge.vvm v8, v8, v24, v0
1300 ; CHECK-NEXT:    ret
1301   %splat = insertelement <vscale x 32 x i1> poison, i1 -1, i32 0
1302   %allones = shufflevector <vscale x 32 x i1> %splat, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
1303   %x = call <vscale x 32 x i16> @llvm.vp.mul.nxv32i16(<vscale x 32 x i16> %a, <vscale x 32 x i16> %b, <vscale x 32 x i1> %allones, i32 %evl)
1304   %y = call <vscale x 32 x i16> @llvm.vp.add.nxv32i16(<vscale x 32 x i16> %x, <vscale x 32 x i16> %c, <vscale x 32 x i1> %allones, i32 %evl)
1305   %u = call <vscale x 32 x i16> @llvm.vp.select.nxv32i16(<vscale x 32 x i1> %m, <vscale x 32 x i16> %y, <vscale x 32 x i16> %a, i32 %evl)
1306   ret <vscale x 32 x i16> %u
1307 }
1308
1309 define <vscale x 32 x i16> @vmadd_vx_nxv32i16_ta(<vscale x 32 x i16> %a, i16 %b, <vscale x 32 x i16> %c,  <vscale x 32 x i1> %m, i32 zeroext %evl) {
1310 ; CHECK-LABEL: vmadd_vx_nxv32i16_ta:
1311 ; CHECK:       # %bb.0:
1312 ; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, ma
1313 ; CHECK-NEXT:    vmacc.vx v16, a0, v8
1314 ; CHECK-NEXT:    vmerge.vvm v8, v8, v16, v0
1315 ; CHECK-NEXT:    ret
1316   %elt.head = insertelement <vscale x 32 x i16> poison, i16 %b, i32 0
1317   %vb = shufflevector <vscale x 32 x i16> %elt.head, <vscale x 32 x i16> poison, <vscale x 32 x i32> zeroinitializer
1318   %splat = insertelement <vscale x 32 x i1> poison, i1 -1, i32 0
1319   %allones = shufflevector <vscale x 32 x i1> %splat, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer
1320   %x = call <vscale x 32 x i16> @llvm.vp.mul.nxv32i16(<vscale x 32 x i16> %a, <vscale x 32 x i16> %vb, <vscale x 32 x i1> %allones, i32 %evl)
1321   %y = call <vscale x 32 x i16> @llvm.vp.add.nxv32i16(<vscale x 32 x i16> %x, <vscale x 32 x i16> %c, <vscale x 32 x i1> %allones, i32 %evl)
1322   %u = call <vscale x 32 x i16> @llvm.vp.select.nxv32i16(<vscale x 32 x i1> %m, <vscale x 32 x i16> %y, <vscale x 32 x i16> %a, i32 %evl)
1323   ret <vscale x 32 x i16> %u
1324 }
1325
1326 declare <vscale x 1 x i32> @llvm.vp.mul.nxv1i32(<vscale x 1 x i32>, <vscale x 1 x i32>, <vscale x 1 x i1>, i32)
1327 declare <vscale x 1 x i32> @llvm.vp.add.nxv1i32(<vscale x 1 x i32>, <vscale x 1 x i32>, <vscale x 1 x i1>, i32)
1328 declare <vscale x 1 x i32> @llvm.vp.merge.nxv1i32(<vscale x 1 x i1>, <vscale x 1 x i32>, <vscale x 1 x i32>, i32)
1329 declare <vscale x 1 x i32> @llvm.vp.select.nxv1i32(<vscale x 1 x i1>, <vscale x 1 x i32>, <vscale x 1 x i32>, i32)
1330
1331 define <vscale x 1 x i32> @vmadd_vv_nxv1i32(<vscale x 1 x i32> %a, <vscale x 1 x i32> %b, <vscale x 1 x i32> %c,  <vscale x 1 x i1> %m, i32 zeroext %evl) {
1332 ; CHECK-LABEL: vmadd_vv_nxv1i32:
1333 ; CHECK:       # %bb.0:
1334 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
1335 ; CHECK-NEXT:    vmadd.vv v9, v8, v10
1336 ; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, tu, ma
1337 ; CHECK-NEXT:    vmerge.vvm v8, v8, v9, v0
1338 ; CHECK-NEXT:    ret
1339   %splat = insertelement <vscale x 1 x i1> poison, i1 -1, i32 0
1340   %allones = shufflevector <vscale x 1 x i1> %splat, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
1341   %x = call <vscale x 1 x i32> @llvm.vp.mul.nxv1i32(<vscale x 1 x i32> %a, <vscale x 1 x i32> %b, <vscale x 1 x i1> %allones, i32 %evl)
1342   %y = call <vscale x 1 x i32> @llvm.vp.add.nxv1i32(<vscale x 1 x i32> %x, <vscale x 1 x i32> %c, <vscale x 1 x i1> %allones, i32 %evl)
1343   %u = call <vscale x 1 x i32> @llvm.vp.merge.nxv1i32(<vscale x 1 x i1> %m, <vscale x 1 x i32> %y, <vscale x 1 x i32> %a, i32 %evl)
1344   ret <vscale x 1 x i32> %u
1345 }
1346
1347 define <vscale x 1 x i32> @vmadd_vv_nxv1i32_unmasked(<vscale x 1 x i32> %a, <vscale x 1 x i32> %b, <vscale x 1 x i32> %c,  <vscale x 1 x i1> %m, i32 zeroext %evl) {
1348 ; CHECK-LABEL: vmadd_vv_nxv1i32_unmasked:
1349 ; CHECK:       # %bb.0:
1350 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
1351 ; CHECK-NEXT:    vmadd.vv v9, v8, v10
1352 ; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, tu, ma
1353 ; CHECK-NEXT:    vmv.v.v v8, v9
1354 ; CHECK-NEXT:    ret
1355   %splat = insertelement <vscale x 1 x i1> poison, i1 -1, i32 0
1356   %allones = shufflevector <vscale x 1 x i1> %splat, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
1357   %x = call <vscale x 1 x i32> @llvm.vp.mul.nxv1i32(<vscale x 1 x i32> %a, <vscale x 1 x i32> %b, <vscale x 1 x i1> %allones, i32 %evl)
1358   %y = call <vscale x 1 x i32> @llvm.vp.add.nxv1i32(<vscale x 1 x i32> %x, <vscale x 1 x i32> %c, <vscale x 1 x i1> %allones, i32 %evl)
1359   %u = call <vscale x 1 x i32> @llvm.vp.merge.nxv1i32(<vscale x 1 x i1> %allones, <vscale x 1 x i32> %y, <vscale x 1 x i32> %a, i32 %evl)
1360   ret <vscale x 1 x i32> %u
1361 }
1362
1363 define <vscale x 1 x i32> @vmadd_vx_nxv1i32(<vscale x 1 x i32> %a, i32 %b, <vscale x 1 x i32> %c,  <vscale x 1 x i1> %m, i32 zeroext %evl) {
1364 ; CHECK-LABEL: vmadd_vx_nxv1i32:
1365 ; CHECK:       # %bb.0:
1366 ; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, tu, mu
1367 ; CHECK-NEXT:    vmadd.vx v8, a0, v9, v0.t
1368 ; CHECK-NEXT:    ret
1369   %elt.head = insertelement <vscale x 1 x i32> poison, i32 %b, i32 0
1370   %vb = shufflevector <vscale x 1 x i32> %elt.head, <vscale x 1 x i32> poison, <vscale x 1 x i32> zeroinitializer
1371   %splat = insertelement <vscale x 1 x i1> poison, i1 -1, i32 0
1372   %allones = shufflevector <vscale x 1 x i1> %splat, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
1373   %x = call <vscale x 1 x i32> @llvm.vp.mul.nxv1i32(<vscale x 1 x i32> %a, <vscale x 1 x i32> %vb, <vscale x 1 x i1> %allones, i32 %evl)
1374   %y = call <vscale x 1 x i32> @llvm.vp.add.nxv1i32(<vscale x 1 x i32> %x, <vscale x 1 x i32> %c, <vscale x 1 x i1> %allones, i32 %evl)
1375   %u = call <vscale x 1 x i32> @llvm.vp.merge.nxv1i32(<vscale x 1 x i1> %m, <vscale x 1 x i32> %y, <vscale x 1 x i32> %a, i32 %evl)
1376   ret <vscale x 1 x i32> %u
1377 }
1378
1379 define <vscale x 1 x i32> @vmadd_vx_nxv1i32_unmasked(<vscale x 1 x i32> %a, i32 %b, <vscale x 1 x i32> %c,  <vscale x 1 x i1> %m, i32 zeroext %evl) {
1380 ; CHECK-LABEL: vmadd_vx_nxv1i32_unmasked:
1381 ; CHECK:       # %bb.0:
1382 ; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, tu, ma
1383 ; CHECK-NEXT:    vmadd.vx v8, a0, v9
1384 ; CHECK-NEXT:    ret
1385   %elt.head = insertelement <vscale x 1 x i32> poison, i32 %b, i32 0
1386   %vb = shufflevector <vscale x 1 x i32> %elt.head, <vscale x 1 x i32> poison, <vscale x 1 x i32> zeroinitializer
1387   %splat = insertelement <vscale x 1 x i1> poison, i1 -1, i32 0
1388   %allones = shufflevector <vscale x 1 x i1> %splat, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
1389   %x = call <vscale x 1 x i32> @llvm.vp.mul.nxv1i32(<vscale x 1 x i32> %a, <vscale x 1 x i32> %vb, <vscale x 1 x i1> %allones, i32 %evl)
1390   %y = call <vscale x 1 x i32> @llvm.vp.add.nxv1i32(<vscale x 1 x i32> %x, <vscale x 1 x i32> %c, <vscale x 1 x i1> %allones, i32 %evl)
1391   %u = call <vscale x 1 x i32> @llvm.vp.merge.nxv1i32(<vscale x 1 x i1> %allones, <vscale x 1 x i32> %y, <vscale x 1 x i32> %a, i32 %evl)
1392   ret <vscale x 1 x i32> %u
1393 }
1394
1395 define <vscale x 1 x i32> @vmadd_vv_nxv1i32_ta(<vscale x 1 x i32> %a, <vscale x 1 x i32> %b, <vscale x 1 x i32> %c,  <vscale x 1 x i1> %m, i32 zeroext %evl) {
1396 ; CHECK-LABEL: vmadd_vv_nxv1i32_ta:
1397 ; CHECK:       # %bb.0:
1398 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
1399 ; CHECK-NEXT:    vmadd.vv v9, v8, v10
1400 ; CHECK-NEXT:    vmerge.vvm v8, v8, v9, v0
1401 ; CHECK-NEXT:    ret
1402   %splat = insertelement <vscale x 1 x i1> poison, i1 -1, i32 0
1403   %allones = shufflevector <vscale x 1 x i1> %splat, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
1404   %x = call <vscale x 1 x i32> @llvm.vp.mul.nxv1i32(<vscale x 1 x i32> %a, <vscale x 1 x i32> %b, <vscale x 1 x i1> %allones, i32 %evl)
1405   %y = call <vscale x 1 x i32> @llvm.vp.add.nxv1i32(<vscale x 1 x i32> %x, <vscale x 1 x i32> %c, <vscale x 1 x i1> %allones, i32 %evl)
1406   %u = call <vscale x 1 x i32> @llvm.vp.select.nxv1i32(<vscale x 1 x i1> %m, <vscale x 1 x i32> %y, <vscale x 1 x i32> %a, i32 %evl)
1407   ret <vscale x 1 x i32> %u
1408 }
1409
1410 define <vscale x 1 x i32> @vmadd_vx_nxv1i32_ta(<vscale x 1 x i32> %a, i32 %b, <vscale x 1 x i32> %c,  <vscale x 1 x i1> %m, i32 zeroext %evl) {
1411 ; CHECK-LABEL: vmadd_vx_nxv1i32_ta:
1412 ; CHECK:       # %bb.0:
1413 ; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma
1414 ; CHECK-NEXT:    vmacc.vx v9, a0, v8
1415 ; CHECK-NEXT:    vmerge.vvm v8, v8, v9, v0
1416 ; CHECK-NEXT:    ret
1417   %elt.head = insertelement <vscale x 1 x i32> poison, i32 %b, i32 0
1418   %vb = shufflevector <vscale x 1 x i32> %elt.head, <vscale x 1 x i32> poison, <vscale x 1 x i32> zeroinitializer
1419   %splat = insertelement <vscale x 1 x i1> poison, i1 -1, i32 0
1420   %allones = shufflevector <vscale x 1 x i1> %splat, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
1421   %x = call <vscale x 1 x i32> @llvm.vp.mul.nxv1i32(<vscale x 1 x i32> %a, <vscale x 1 x i32> %vb, <vscale x 1 x i1> %allones, i32 %evl)
1422   %y = call <vscale x 1 x i32> @llvm.vp.add.nxv1i32(<vscale x 1 x i32> %x, <vscale x 1 x i32> %c, <vscale x 1 x i1> %allones, i32 %evl)
1423   %u = call <vscale x 1 x i32> @llvm.vp.select.nxv1i32(<vscale x 1 x i1> %m, <vscale x 1 x i32> %y, <vscale x 1 x i32> %a, i32 %evl)
1424   ret <vscale x 1 x i32> %u
1425 }
1426
1427 declare <vscale x 2 x i32> @llvm.vp.mul.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i1>, i32)
1428 declare <vscale x 2 x i32> @llvm.vp.add.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i1>, i32)
1429 declare <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1>, <vscale x 2 x i32>, <vscale x 2 x i32>, i32)
1430 declare <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1>, <vscale x 2 x i32>, <vscale x 2 x i32>, i32)
1431
1432 define <vscale x 2 x i32> @vmadd_vv_nxv2i32(<vscale x 2 x i32> %a, <vscale x 2 x i32> %b, <vscale x 2 x i32> %c,  <vscale x 2 x i1> %m, i32 zeroext %evl) {
1433 ; CHECK-LABEL: vmadd_vv_nxv2i32:
1434 ; CHECK:       # %bb.0:
1435 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
1436 ; CHECK-NEXT:    vmadd.vv v9, v8, v10
1437 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, tu, ma
1438 ; CHECK-NEXT:    vmerge.vvm v8, v8, v9, v0
1439 ; CHECK-NEXT:    ret
1440   %splat = insertelement <vscale x 2 x i1> poison, i1 -1, i32 0
1441   %allones = shufflevector <vscale x 2 x i1> %splat, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
1442   %x = call <vscale x 2 x i32> @llvm.vp.mul.nxv2i32(<vscale x 2 x i32> %a, <vscale x 2 x i32> %b, <vscale x 2 x i1> %allones, i32 %evl)
1443   %y = call <vscale x 2 x i32> @llvm.vp.add.nxv2i32(<vscale x 2 x i32> %x, <vscale x 2 x i32> %c, <vscale x 2 x i1> %allones, i32 %evl)
1444   %u = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %y, <vscale x 2 x i32> %a, i32 %evl)
1445   ret <vscale x 2 x i32> %u
1446 }
1447
1448 define <vscale x 2 x i32> @vmadd_vv_nxv2i32_unmasked(<vscale x 2 x i32> %a, <vscale x 2 x i32> %b, <vscale x 2 x i32> %c,  <vscale x 2 x i1> %m, i32 zeroext %evl) {
1449 ; CHECK-LABEL: vmadd_vv_nxv2i32_unmasked:
1450 ; CHECK:       # %bb.0:
1451 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
1452 ; CHECK-NEXT:    vmadd.vv v9, v8, v10
1453 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, tu, ma
1454 ; CHECK-NEXT:    vmv.v.v v8, v9
1455 ; CHECK-NEXT:    ret
1456   %splat = insertelement <vscale x 2 x i1> poison, i1 -1, i32 0
1457   %allones = shufflevector <vscale x 2 x i1> %splat, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
1458   %x = call <vscale x 2 x i32> @llvm.vp.mul.nxv2i32(<vscale x 2 x i32> %a, <vscale x 2 x i32> %b, <vscale x 2 x i1> %allones, i32 %evl)
1459   %y = call <vscale x 2 x i32> @llvm.vp.add.nxv2i32(<vscale x 2 x i32> %x, <vscale x 2 x i32> %c, <vscale x 2 x i1> %allones, i32 %evl)
1460   %u = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> %allones, <vscale x 2 x i32> %y, <vscale x 2 x i32> %a, i32 %evl)
1461   ret <vscale x 2 x i32> %u
1462 }
1463
1464 define <vscale x 2 x i32> @vmadd_vx_nxv2i32(<vscale x 2 x i32> %a, i32 %b, <vscale x 2 x i32> %c,  <vscale x 2 x i1> %m, i32 zeroext %evl) {
1465 ; CHECK-LABEL: vmadd_vx_nxv2i32:
1466 ; CHECK:       # %bb.0:
1467 ; CHECK-NEXT:    vsetvli zero, a1, e32, m1, tu, mu
1468 ; CHECK-NEXT:    vmadd.vx v8, a0, v9, v0.t
1469 ; CHECK-NEXT:    ret
1470   %elt.head = insertelement <vscale x 2 x i32> poison, i32 %b, i32 0
1471   %vb = shufflevector <vscale x 2 x i32> %elt.head, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
1472   %splat = insertelement <vscale x 2 x i1> poison, i1 -1, i32 0
1473   %allones = shufflevector <vscale x 2 x i1> %splat, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
1474   %x = call <vscale x 2 x i32> @llvm.vp.mul.nxv2i32(<vscale x 2 x i32> %a, <vscale x 2 x i32> %vb, <vscale x 2 x i1> %allones, i32 %evl)
1475   %y = call <vscale x 2 x i32> @llvm.vp.add.nxv2i32(<vscale x 2 x i32> %x, <vscale x 2 x i32> %c, <vscale x 2 x i1> %allones, i32 %evl)
1476   %u = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %y, <vscale x 2 x i32> %a, i32 %evl)
1477   ret <vscale x 2 x i32> %u
1478 }
1479
1480 define <vscale x 2 x i32> @vmadd_vx_nxv2i32_unmasked(<vscale x 2 x i32> %a, i32 %b, <vscale x 2 x i32> %c,  <vscale x 2 x i1> %m, i32 zeroext %evl) {
1481 ; CHECK-LABEL: vmadd_vx_nxv2i32_unmasked:
1482 ; CHECK:       # %bb.0:
1483 ; CHECK-NEXT:    vsetvli zero, a1, e32, m1, tu, ma
1484 ; CHECK-NEXT:    vmadd.vx v8, a0, v9
1485 ; CHECK-NEXT:    ret
1486   %elt.head = insertelement <vscale x 2 x i32> poison, i32 %b, i32 0
1487   %vb = shufflevector <vscale x 2 x i32> %elt.head, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
1488   %splat = insertelement <vscale x 2 x i1> poison, i1 -1, i32 0
1489   %allones = shufflevector <vscale x 2 x i1> %splat, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
1490   %x = call <vscale x 2 x i32> @llvm.vp.mul.nxv2i32(<vscale x 2 x i32> %a, <vscale x 2 x i32> %vb, <vscale x 2 x i1> %allones, i32 %evl)
1491   %y = call <vscale x 2 x i32> @llvm.vp.add.nxv2i32(<vscale x 2 x i32> %x, <vscale x 2 x i32> %c, <vscale x 2 x i1> %allones, i32 %evl)
1492   %u = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> %allones, <vscale x 2 x i32> %y, <vscale x 2 x i32> %a, i32 %evl)
1493   ret <vscale x 2 x i32> %u
1494 }
1495
1496 define <vscale x 2 x i32> @vmadd_vv_nxv2i32_ta(<vscale x 2 x i32> %a, <vscale x 2 x i32> %b, <vscale x 2 x i32> %c,  <vscale x 2 x i1> %m, i32 zeroext %evl) {
1497 ; CHECK-LABEL: vmadd_vv_nxv2i32_ta:
1498 ; CHECK:       # %bb.0:
1499 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
1500 ; CHECK-NEXT:    vmadd.vv v9, v8, v10
1501 ; CHECK-NEXT:    vmerge.vvm v8, v8, v9, v0
1502 ; CHECK-NEXT:    ret
1503   %splat = insertelement <vscale x 2 x i1> poison, i1 -1, i32 0
1504   %allones = shufflevector <vscale x 2 x i1> %splat, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
1505   %x = call <vscale x 2 x i32> @llvm.vp.mul.nxv2i32(<vscale x 2 x i32> %a, <vscale x 2 x i32> %b, <vscale x 2 x i1> %allones, i32 %evl)
1506   %y = call <vscale x 2 x i32> @llvm.vp.add.nxv2i32(<vscale x 2 x i32> %x, <vscale x 2 x i32> %c, <vscale x 2 x i1> %allones, i32 %evl)
1507   %u = call <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %y, <vscale x 2 x i32> %a, i32 %evl)
1508   ret <vscale x 2 x i32> %u
1509 }
1510
1511 define <vscale x 2 x i32> @vmadd_vx_nxv2i32_ta(<vscale x 2 x i32> %a, i32 %b, <vscale x 2 x i32> %c,  <vscale x 2 x i1> %m, i32 zeroext %evl) {
1512 ; CHECK-LABEL: vmadd_vx_nxv2i32_ta:
1513 ; CHECK:       # %bb.0:
1514 ; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
1515 ; CHECK-NEXT:    vmacc.vx v9, a0, v8
1516 ; CHECK-NEXT:    vmerge.vvm v8, v8, v9, v0
1517 ; CHECK-NEXT:    ret
1518   %elt.head = insertelement <vscale x 2 x i32> poison, i32 %b, i32 0
1519   %vb = shufflevector <vscale x 2 x i32> %elt.head, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
1520   %splat = insertelement <vscale x 2 x i1> poison, i1 -1, i32 0
1521   %allones = shufflevector <vscale x 2 x i1> %splat, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
1522   %x = call <vscale x 2 x i32> @llvm.vp.mul.nxv2i32(<vscale x 2 x i32> %a, <vscale x 2 x i32> %vb, <vscale x 2 x i1> %allones, i32 %evl)
1523   %y = call <vscale x 2 x i32> @llvm.vp.add.nxv2i32(<vscale x 2 x i32> %x, <vscale x 2 x i32> %c, <vscale x 2 x i1> %allones, i32 %evl)
1524   %u = call <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %y, <vscale x 2 x i32> %a, i32 %evl)
1525   ret <vscale x 2 x i32> %u
1526 }
1527
1528 declare <vscale x 4 x i32> @llvm.vp.mul.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32)
1529 declare <vscale x 4 x i32> @llvm.vp.add.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32)
1530 declare <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>, i32)
1531 declare <vscale x 4 x i32> @llvm.vp.select.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>, i32)
1532
1533 define <vscale x 4 x i32> @vmadd_vv_nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c,  <vscale x 4 x i1> %m, i32 zeroext %evl) {
1534 ; CHECK-LABEL: vmadd_vv_nxv4i32:
1535 ; CHECK:       # %bb.0:
1536 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
1537 ; CHECK-NEXT:    vmadd.vv v10, v8, v12
1538 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, tu, ma
1539 ; CHECK-NEXT:    vmerge.vvm v8, v8, v10, v0
1540 ; CHECK-NEXT:    ret
1541   %splat = insertelement <vscale x 4 x i1> poison, i1 -1, i32 0
1542   %allones = shufflevector <vscale x 4 x i1> %splat, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
1543   %x = call <vscale x 4 x i32> @llvm.vp.mul.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i1> %allones, i32 %evl)
1544   %y = call <vscale x 4 x i32> @llvm.vp.add.nxv4i32(<vscale x 4 x i32> %x, <vscale x 4 x i32> %c, <vscale x 4 x i1> %allones, i32 %evl)
1545   %u = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> %m, <vscale x 4 x i32> %y, <vscale x 4 x i32> %a, i32 %evl)
1546   ret <vscale x 4 x i32> %u
1547 }
1548
1549 define <vscale x 4 x i32> @vmadd_vv_nxv4i32_unmasked(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c,  <vscale x 4 x i1> %m, i32 zeroext %evl) {
1550 ; CHECK-LABEL: vmadd_vv_nxv4i32_unmasked:
1551 ; CHECK:       # %bb.0:
1552 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
1553 ; CHECK-NEXT:    vmadd.vv v10, v8, v12
1554 ; CHECK-NEXT:    vsetvli zero, zero, e32, m2, tu, ma
1555 ; CHECK-NEXT:    vmv.v.v v8, v10
1556 ; CHECK-NEXT:    ret
1557   %splat = insertelement <vscale x 4 x i1> poison, i1 -1, i32 0
1558   %allones = shufflevector <vscale x 4 x i1> %splat, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
1559   %x = call <vscale x 4 x i32> @llvm.vp.mul.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i1> %allones, i32 %evl)
1560   %y = call <vscale x 4 x i32> @llvm.vp.add.nxv4i32(<vscale x 4 x i32> %x, <vscale x 4 x i32> %c, <vscale x 4 x i1> %allones, i32 %evl)
1561   %u = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> %allones, <vscale x 4 x i32> %y, <vscale x 4 x i32> %a, i32 %evl)
1562   ret <vscale x 4 x i32> %u
1563 }
1564
1565 define <vscale x 4 x i32> @vmadd_vx_nxv4i32(<vscale x 4 x i32> %a, i32 %b, <vscale x 4 x i32> %c,  <vscale x 4 x i1> %m, i32 zeroext %evl) {
1566 ; CHECK-LABEL: vmadd_vx_nxv4i32:
1567 ; CHECK:       # %bb.0:
1568 ; CHECK-NEXT:    vsetvli zero, a1, e32, m2, tu, mu
1569 ; CHECK-NEXT:    vmadd.vx v8, a0, v10, v0.t
1570 ; CHECK-NEXT:    ret
1571   %elt.head = insertelement <vscale x 4 x i32> poison, i32 %b, i32 0
1572   %vb = shufflevector <vscale x 4 x i32> %elt.head, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
1573   %splat = insertelement <vscale x 4 x i1> poison, i1 -1, i32 0
1574   %allones = shufflevector <vscale x 4 x i1> %splat, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
1575   %x = call <vscale x 4 x i32> @llvm.vp.mul.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %vb, <vscale x 4 x i1> %allones, i32 %evl)
1576   %y = call <vscale x 4 x i32> @llvm.vp.add.nxv4i32(<vscale x 4 x i32> %x, <vscale x 4 x i32> %c, <vscale x 4 x i1> %allones, i32 %evl)
1577   %u = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> %m, <vscale x 4 x i32> %y, <vscale x 4 x i32> %a, i32 %evl)
1578   ret <vscale x 4 x i32> %u
1579 }
1580
1581 define <vscale x 4 x i32> @vmadd_vx_nxv4i32_unmasked(<vscale x 4 x i32> %a, i32 %b, <vscale x 4 x i32> %c,  <vscale x 4 x i1> %m, i32 zeroext %evl) {
1582 ; CHECK-LABEL: vmadd_vx_nxv4i32_unmasked:
1583 ; CHECK:       # %bb.0:
1584 ; CHECK-NEXT:    vsetvli zero, a1, e32, m2, tu, ma
1585 ; CHECK-NEXT:    vmadd.vx v8, a0, v10
1586 ; CHECK-NEXT:    ret
1587   %elt.head = insertelement <vscale x 4 x i32> poison, i32 %b, i32 0
1588   %vb = shufflevector <vscale x 4 x i32> %elt.head, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
1589   %splat = insertelement <vscale x 4 x i1> poison, i1 -1, i32 0
1590   %allones = shufflevector <vscale x 4 x i1> %splat, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
1591   %x = call <vscale x 4 x i32> @llvm.vp.mul.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %vb, <vscale x 4 x i1> %allones, i32 %evl)
1592   %y = call <vscale x 4 x i32> @llvm.vp.add.nxv4i32(<vscale x 4 x i32> %x, <vscale x 4 x i32> %c, <vscale x 4 x i1> %allones, i32 %evl)
1593   %u = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> %allones, <vscale x 4 x i32> %y, <vscale x 4 x i32> %a, i32 %evl)
1594   ret <vscale x 4 x i32> %u
1595 }
1596
1597 define <vscale x 4 x i32> @vmadd_vv_nxv4i32_ta(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c,  <vscale x 4 x i1> %m, i32 zeroext %evl) {
1598 ; CHECK-LABEL: vmadd_vv_nxv4i32_ta:
1599 ; CHECK:       # %bb.0:
1600 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
1601 ; CHECK-NEXT:    vmadd.vv v10, v8, v12
1602 ; CHECK-NEXT:    vmerge.vvm v8, v8, v10, v0
1603 ; CHECK-NEXT:    ret
1604   %splat = insertelement <vscale x 4 x i1> poison, i1 -1, i32 0
1605   %allones = shufflevector <vscale x 4 x i1> %splat, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
1606   %x = call <vscale x 4 x i32> @llvm.vp.mul.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i1> %allones, i32 %evl)
1607   %y = call <vscale x 4 x i32> @llvm.vp.add.nxv4i32(<vscale x 4 x i32> %x, <vscale x 4 x i32> %c, <vscale x 4 x i1> %allones, i32 %evl)
1608   %u = call <vscale x 4 x i32> @llvm.vp.select.nxv4i32(<vscale x 4 x i1> %m, <vscale x 4 x i32> %y, <vscale x 4 x i32> %a, i32 %evl)
1609   ret <vscale x 4 x i32> %u
1610 }
1611
1612 define <vscale x 4 x i32> @vmadd_vx_nxv4i32_ta(<vscale x 4 x i32> %a, i32 %b, <vscale x 4 x i32> %c,  <vscale x 4 x i1> %m, i32 zeroext %evl) {
1613 ; CHECK-LABEL: vmadd_vx_nxv4i32_ta:
1614 ; CHECK:       # %bb.0:
1615 ; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
1616 ; CHECK-NEXT:    vmacc.vx v10, a0, v8
1617 ; CHECK-NEXT:    vmerge.vvm v8, v8, v10, v0
1618 ; CHECK-NEXT:    ret
1619   %elt.head = insertelement <vscale x 4 x i32> poison, i32 %b, i32 0
1620   %vb = shufflevector <vscale x 4 x i32> %elt.head, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
1621   %splat = insertelement <vscale x 4 x i1> poison, i1 -1, i32 0
1622   %allones = shufflevector <vscale x 4 x i1> %splat, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
1623   %x = call <vscale x 4 x i32> @llvm.vp.mul.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %vb, <vscale x 4 x i1> %allones, i32 %evl)
1624   %y = call <vscale x 4 x i32> @llvm.vp.add.nxv4i32(<vscale x 4 x i32> %x, <vscale x 4 x i32> %c, <vscale x 4 x i1> %allones, i32 %evl)
1625   %u = call <vscale x 4 x i32> @llvm.vp.select.nxv4i32(<vscale x 4 x i1> %m, <vscale x 4 x i32> %y, <vscale x 4 x i32> %a, i32 %evl)
1626   ret <vscale x 4 x i32> %u
1627 }
1628
1629 declare <vscale x 8 x i32> @llvm.vp.mul.nxv8i32(<vscale x 8 x i32>, <vscale x 8 x i32>, <vscale x 8 x i1>, i32)
1630 declare <vscale x 8 x i32> @llvm.vp.add.nxv8i32(<vscale x 8 x i32>, <vscale x 8 x i32>, <vscale x 8 x i1>, i32)
1631 declare <vscale x 8 x i32> @llvm.vp.merge.nxv8i32(<vscale x 8 x i1>, <vscale x 8 x i32>, <vscale x 8 x i32>, i32)
1632 declare <vscale x 8 x i32> @llvm.vp.select.nxv8i32(<vscale x 8 x i1>, <vscale x 8 x i32>, <vscale x 8 x i32>, i32)
1633
1634 define <vscale x 8 x i32> @vmadd_vv_nxv8i32(<vscale x 8 x i32> %a, <vscale x 8 x i32> %b, <vscale x 8 x i32> %c,  <vscale x 8 x i1> %m, i32 zeroext %evl) {
1635 ; CHECK-LABEL: vmadd_vv_nxv8i32:
1636 ; CHECK:       # %bb.0:
1637 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
1638 ; CHECK-NEXT:    vmadd.vv v12, v8, v16
1639 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, tu, ma
1640 ; CHECK-NEXT:    vmerge.vvm v8, v8, v12, v0
1641 ; CHECK-NEXT:    ret
1642   %splat = insertelement <vscale x 8 x i1> poison, i1 -1, i32 0
1643   %allones = shufflevector <vscale x 8 x i1> %splat, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
1644   %x = call <vscale x 8 x i32> @llvm.vp.mul.nxv8i32(<vscale x 8 x i32> %a, <vscale x 8 x i32> %b, <vscale x 8 x i1> %allones, i32 %evl)
1645   %y = call <vscale x 8 x i32> @llvm.vp.add.nxv8i32(<vscale x 8 x i32> %x, <vscale x 8 x i32> %c, <vscale x 8 x i1> %allones, i32 %evl)
1646   %u = call <vscale x 8 x i32> @llvm.vp.merge.nxv8i32(<vscale x 8 x i1> %m, <vscale x 8 x i32> %y, <vscale x 8 x i32> %a, i32 %evl)
1647   ret <vscale x 8 x i32> %u
1648 }
1649
1650 define <vscale x 8 x i32> @vmadd_vv_nxv8i32_unmasked(<vscale x 8 x i32> %a, <vscale x 8 x i32> %b, <vscale x 8 x i32> %c,  <vscale x 8 x i1> %m, i32 zeroext %evl) {
1651 ; CHECK-LABEL: vmadd_vv_nxv8i32_unmasked:
1652 ; CHECK:       # %bb.0:
1653 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
1654 ; CHECK-NEXT:    vmadd.vv v12, v8, v16
1655 ; CHECK-NEXT:    vsetvli zero, zero, e32, m4, tu, ma
1656 ; CHECK-NEXT:    vmv.v.v v8, v12
1657 ; CHECK-NEXT:    ret
1658   %splat = insertelement <vscale x 8 x i1> poison, i1 -1, i32 0
1659   %allones = shufflevector <vscale x 8 x i1> %splat, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
1660   %x = call <vscale x 8 x i32> @llvm.vp.mul.nxv8i32(<vscale x 8 x i32> %a, <vscale x 8 x i32> %b, <vscale x 8 x i1> %allones, i32 %evl)
1661   %y = call <vscale x 8 x i32> @llvm.vp.add.nxv8i32(<vscale x 8 x i32> %x, <vscale x 8 x i32> %c, <vscale x 8 x i1> %allones, i32 %evl)
1662   %u = call <vscale x 8 x i32> @llvm.vp.merge.nxv8i32(<vscale x 8 x i1> %allones, <vscale x 8 x i32> %y, <vscale x 8 x i32> %a, i32 %evl)
1663   ret <vscale x 8 x i32> %u
1664 }
1665
1666 define <vscale x 8 x i32> @vmadd_vx_nxv8i32(<vscale x 8 x i32> %a, i32 %b, <vscale x 8 x i32> %c,  <vscale x 8 x i1> %m, i32 zeroext %evl) {
1667 ; CHECK-LABEL: vmadd_vx_nxv8i32:
1668 ; CHECK:       # %bb.0:
1669 ; CHECK-NEXT:    vsetvli zero, a1, e32, m4, tu, mu
1670 ; CHECK-NEXT:    vmadd.vx v8, a0, v12, v0.t
1671 ; CHECK-NEXT:    ret
1672   %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0
1673   %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
1674   %splat = insertelement <vscale x 8 x i1> poison, i1 -1, i32 0
1675   %allones = shufflevector <vscale x 8 x i1> %splat, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
1676   %x = call <vscale x 8 x i32> @llvm.vp.mul.nxv8i32(<vscale x 8 x i32> %a, <vscale x 8 x i32> %vb, <vscale x 8 x i1> %allones, i32 %evl)
1677   %y = call <vscale x 8 x i32> @llvm.vp.add.nxv8i32(<vscale x 8 x i32> %x, <vscale x 8 x i32> %c, <vscale x 8 x i1> %allones, i32 %evl)
1678   %u = call <vscale x 8 x i32> @llvm.vp.merge.nxv8i32(<vscale x 8 x i1> %m, <vscale x 8 x i32> %y, <vscale x 8 x i32> %a, i32 %evl)
1679   ret <vscale x 8 x i32> %u
1680 }
1681
1682 define <vscale x 8 x i32> @vmadd_vx_nxv8i32_unmasked(<vscale x 8 x i32> %a, i32 %b, <vscale x 8 x i32> %c,  <vscale x 8 x i1> %m, i32 zeroext %evl) {
1683 ; CHECK-LABEL: vmadd_vx_nxv8i32_unmasked:
1684 ; CHECK:       # %bb.0:
1685 ; CHECK-NEXT:    vsetvli zero, a1, e32, m4, tu, ma
1686 ; CHECK-NEXT:    vmadd.vx v8, a0, v12
1687 ; CHECK-NEXT:    ret
1688   %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0
1689   %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
1690   %splat = insertelement <vscale x 8 x i1> poison, i1 -1, i32 0
1691   %allones = shufflevector <vscale x 8 x i1> %splat, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
1692   %x = call <vscale x 8 x i32> @llvm.vp.mul.nxv8i32(<vscale x 8 x i32> %a, <vscale x 8 x i32> %vb, <vscale x 8 x i1> %allones, i32 %evl)
1693   %y = call <vscale x 8 x i32> @llvm.vp.add.nxv8i32(<vscale x 8 x i32> %x, <vscale x 8 x i32> %c, <vscale x 8 x i1> %allones, i32 %evl)
1694   %u = call <vscale x 8 x i32> @llvm.vp.merge.nxv8i32(<vscale x 8 x i1> %allones, <vscale x 8 x i32> %y, <vscale x 8 x i32> %a, i32 %evl)
1695   ret <vscale x 8 x i32> %u
1696 }
1697
1698 define <vscale x 8 x i32> @vmadd_vv_nxv8i32_ta(<vscale x 8 x i32> %a, <vscale x 8 x i32> %b, <vscale x 8 x i32> %c,  <vscale x 8 x i1> %m, i32 zeroext %evl) {
1699 ; CHECK-LABEL: vmadd_vv_nxv8i32_ta:
1700 ; CHECK:       # %bb.0:
1701 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
1702 ; CHECK-NEXT:    vmadd.vv v12, v8, v16
1703 ; CHECK-NEXT:    vmerge.vvm v8, v8, v12, v0
1704 ; CHECK-NEXT:    ret
1705   %splat = insertelement <vscale x 8 x i1> poison, i1 -1, i32 0
1706   %allones = shufflevector <vscale x 8 x i1> %splat, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
1707   %x = call <vscale x 8 x i32> @llvm.vp.mul.nxv8i32(<vscale x 8 x i32> %a, <vscale x 8 x i32> %b, <vscale x 8 x i1> %allones, i32 %evl)
1708   %y = call <vscale x 8 x i32> @llvm.vp.add.nxv8i32(<vscale x 8 x i32> %x, <vscale x 8 x i32> %c, <vscale x 8 x i1> %allones, i32 %evl)
1709   %u = call <vscale x 8 x i32> @llvm.vp.select.nxv8i32(<vscale x 8 x i1> %m, <vscale x 8 x i32> %y, <vscale x 8 x i32> %a, i32 %evl)
1710   ret <vscale x 8 x i32> %u
1711 }
1712
1713 define <vscale x 8 x i32> @vmadd_vx_nxv8i32_ta(<vscale x 8 x i32> %a, i32 %b, <vscale x 8 x i32> %c,  <vscale x 8 x i1> %m, i32 zeroext %evl) {
1714 ; CHECK-LABEL: vmadd_vx_nxv8i32_ta:
1715 ; CHECK:       # %bb.0:
1716 ; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
1717 ; CHECK-NEXT:    vmacc.vx v12, a0, v8
1718 ; CHECK-NEXT:    vmerge.vvm v8, v8, v12, v0
1719 ; CHECK-NEXT:    ret
1720   %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0
1721   %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
1722   %splat = insertelement <vscale x 8 x i1> poison, i1 -1, i32 0
1723   %allones = shufflevector <vscale x 8 x i1> %splat, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
1724   %x = call <vscale x 8 x i32> @llvm.vp.mul.nxv8i32(<vscale x 8 x i32> %a, <vscale x 8 x i32> %vb, <vscale x 8 x i1> %allones, i32 %evl)
1725   %y = call <vscale x 8 x i32> @llvm.vp.add.nxv8i32(<vscale x 8 x i32> %x, <vscale x 8 x i32> %c, <vscale x 8 x i1> %allones, i32 %evl)
1726   %u = call <vscale x 8 x i32> @llvm.vp.select.nxv8i32(<vscale x 8 x i1> %m, <vscale x 8 x i32> %y, <vscale x 8 x i32> %a, i32 %evl)
1727   ret <vscale x 8 x i32> %u
1728 }
1729
1730 declare <vscale x 16 x i32> @llvm.vp.mul.nxv16i32(<vscale x 16 x i32>, <vscale x 16 x i32>, <vscale x 16 x i1>, i32)
1731 declare <vscale x 16 x i32> @llvm.vp.add.nxv16i32(<vscale x 16 x i32>, <vscale x 16 x i32>, <vscale x 16 x i1>, i32)
1732 declare <vscale x 16 x i32> @llvm.vp.merge.nxv16i32(<vscale x 16 x i1>, <vscale x 16 x i32>, <vscale x 16 x i32>, i32)
1733 declare <vscale x 16 x i32> @llvm.vp.select.nxv16i32(<vscale x 16 x i1>, <vscale x 16 x i32>, <vscale x 16 x i32>, i32)
1734
1735 define <vscale x 16 x i32> @vmadd_vv_nxv16i32(<vscale x 16 x i32> %a, <vscale x 16 x i32> %b, <vscale x 16 x i32> %c,  <vscale x 16 x i1> %m, i32 zeroext %evl) {
1736 ; CHECK-LABEL: vmadd_vv_nxv16i32:
1737 ; CHECK:       # %bb.0:
1738 ; CHECK-NEXT:    vl8re32.v v24, (a0)
1739 ; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
1740 ; CHECK-NEXT:    vmacc.vv v24, v8, v16
1741 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, tu, ma
1742 ; CHECK-NEXT:    vmerge.vvm v8, v8, v24, v0
1743 ; CHECK-NEXT:    ret
1744   %splat = insertelement <vscale x 16 x i1> poison, i1 -1, i32 0
1745   %allones = shufflevector <vscale x 16 x i1> %splat, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
1746   %x = call <vscale x 16 x i32> @llvm.vp.mul.nxv16i32(<vscale x 16 x i32> %a, <vscale x 16 x i32> %b, <vscale x 16 x i1> %allones, i32 %evl)
1747   %y = call <vscale x 16 x i32> @llvm.vp.add.nxv16i32(<vscale x 16 x i32> %x, <vscale x 16 x i32> %c, <vscale x 16 x i1> %allones, i32 %evl)
1748   %u = call <vscale x 16 x i32> @llvm.vp.merge.nxv16i32(<vscale x 16 x i1> %m, <vscale x 16 x i32> %y, <vscale x 16 x i32> %a, i32 %evl)
1749   ret <vscale x 16 x i32> %u
1750 }
1751
1752 define <vscale x 16 x i32> @vmadd_vv_nxv16i32_unmasked(<vscale x 16 x i32> %a, <vscale x 16 x i32> %b, <vscale x 16 x i32> %c,  <vscale x 16 x i1> %m, i32 zeroext %evl) {
1753 ; CHECK-LABEL: vmadd_vv_nxv16i32_unmasked:
1754 ; CHECK:       # %bb.0:
1755 ; CHECK-NEXT:    vl8re32.v v24, (a0)
1756 ; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
1757 ; CHECK-NEXT:    vmacc.vv v24, v8, v16
1758 ; CHECK-NEXT:    vsetvli zero, zero, e32, m8, tu, ma
1759 ; CHECK-NEXT:    vmv.v.v v8, v24
1760 ; CHECK-NEXT:    ret
1761   %splat = insertelement <vscale x 16 x i1> poison, i1 -1, i32 0
1762   %allones = shufflevector <vscale x 16 x i1> %splat, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
1763   %x = call <vscale x 16 x i32> @llvm.vp.mul.nxv16i32(<vscale x 16 x i32> %a, <vscale x 16 x i32> %b, <vscale x 16 x i1> %allones, i32 %evl)
1764   %y = call <vscale x 16 x i32> @llvm.vp.add.nxv16i32(<vscale x 16 x i32> %x, <vscale x 16 x i32> %c, <vscale x 16 x i1> %allones, i32 %evl)
1765   %u = call <vscale x 16 x i32> @llvm.vp.merge.nxv16i32(<vscale x 16 x i1> %allones, <vscale x 16 x i32> %y, <vscale x 16 x i32> %a, i32 %evl)
1766   ret <vscale x 16 x i32> %u
1767 }
1768
1769 define <vscale x 16 x i32> @vmadd_vx_nxv16i32(<vscale x 16 x i32> %a, i32 %b, <vscale x 16 x i32> %c,  <vscale x 16 x i1> %m, i32 zeroext %evl) {
1770 ; CHECK-LABEL: vmadd_vx_nxv16i32:
1771 ; CHECK:       # %bb.0:
1772 ; CHECK-NEXT:    vsetvli zero, a1, e32, m8, tu, mu
1773 ; CHECK-NEXT:    vmadd.vx v8, a0, v16, v0.t
1774 ; CHECK-NEXT:    ret
1775   %elt.head = insertelement <vscale x 16 x i32> poison, i32 %b, i32 0
1776   %vb = shufflevector <vscale x 16 x i32> %elt.head, <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
1777   %splat = insertelement <vscale x 16 x i1> poison, i1 -1, i32 0
1778   %allones = shufflevector <vscale x 16 x i1> %splat, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
1779   %x = call <vscale x 16 x i32> @llvm.vp.mul.nxv16i32(<vscale x 16 x i32> %a, <vscale x 16 x i32> %vb, <vscale x 16 x i1> %allones, i32 %evl)
1780   %y = call <vscale x 16 x i32> @llvm.vp.add.nxv16i32(<vscale x 16 x i32> %x, <vscale x 16 x i32> %c, <vscale x 16 x i1> %allones, i32 %evl)
1781   %u = call <vscale x 16 x i32> @llvm.vp.merge.nxv16i32(<vscale x 16 x i1> %m, <vscale x 16 x i32> %y, <vscale x 16 x i32> %a, i32 %evl)
1782   ret <vscale x 16 x i32> %u
1783 }
1784
1785 define <vscale x 16 x i32> @vmadd_vx_nxv16i32_unmasked(<vscale x 16 x i32> %a, i32 %b, <vscale x 16 x i32> %c,  <vscale x 16 x i1> %m, i32 zeroext %evl) {
1786 ; CHECK-LABEL: vmadd_vx_nxv16i32_unmasked:
1787 ; CHECK:       # %bb.0:
1788 ; CHECK-NEXT:    vsetvli zero, a1, e32, m8, tu, ma
1789 ; CHECK-NEXT:    vmadd.vx v8, a0, v16
1790 ; CHECK-NEXT:    ret
1791   %elt.head = insertelement <vscale x 16 x i32> poison, i32 %b, i32 0
1792   %vb = shufflevector <vscale x 16 x i32> %elt.head, <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
1793   %splat = insertelement <vscale x 16 x i1> poison, i1 -1, i32 0
1794   %allones = shufflevector <vscale x 16 x i1> %splat, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
1795   %x = call <vscale x 16 x i32> @llvm.vp.mul.nxv16i32(<vscale x 16 x i32> %a, <vscale x 16 x i32> %vb, <vscale x 16 x i1> %allones, i32 %evl)
1796   %y = call <vscale x 16 x i32> @llvm.vp.add.nxv16i32(<vscale x 16 x i32> %x, <vscale x 16 x i32> %c, <vscale x 16 x i1> %allones, i32 %evl)
1797   %u = call <vscale x 16 x i32> @llvm.vp.merge.nxv16i32(<vscale x 16 x i1> %allones, <vscale x 16 x i32> %y, <vscale x 16 x i32> %a, i32 %evl)
1798   ret <vscale x 16 x i32> %u
1799 }
1800
1801 define <vscale x 16 x i32> @vmadd_vv_nxv16i32_ta(<vscale x 16 x i32> %a, <vscale x 16 x i32> %b, <vscale x 16 x i32> %c,  <vscale x 16 x i1> %m, i32 zeroext %evl) {
1802 ; CHECK-LABEL: vmadd_vv_nxv16i32_ta:
1803 ; CHECK:       # %bb.0:
1804 ; CHECK-NEXT:    vl8re32.v v24, (a0)
1805 ; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
1806 ; CHECK-NEXT:    vmacc.vv v24, v8, v16
1807 ; CHECK-NEXT:    vmerge.vvm v8, v8, v24, v0
1808 ; CHECK-NEXT:    ret
1809   %splat = insertelement <vscale x 16 x i1> poison, i1 -1, i32 0
1810   %allones = shufflevector <vscale x 16 x i1> %splat, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
1811   %x = call <vscale x 16 x i32> @llvm.vp.mul.nxv16i32(<vscale x 16 x i32> %a, <vscale x 16 x i32> %b, <vscale x 16 x i1> %allones, i32 %evl)
1812   %y = call <vscale x 16 x i32> @llvm.vp.add.nxv16i32(<vscale x 16 x i32> %x, <vscale x 16 x i32> %c, <vscale x 16 x i1> %allones, i32 %evl)
1813   %u = call <vscale x 16 x i32> @llvm.vp.select.nxv16i32(<vscale x 16 x i1> %m, <vscale x 16 x i32> %y, <vscale x 16 x i32> %a, i32 %evl)
1814   ret <vscale x 16 x i32> %u
1815 }
1816
1817 define <vscale x 16 x i32> @vmadd_vx_nxv16i32_ta(<vscale x 16 x i32> %a, i32 %b, <vscale x 16 x i32> %c,  <vscale x 16 x i1> %m, i32 zeroext %evl) {
1818 ; CHECK-LABEL: vmadd_vx_nxv16i32_ta:
1819 ; CHECK:       # %bb.0:
1820 ; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
1821 ; CHECK-NEXT:    vmacc.vx v16, a0, v8
1822 ; CHECK-NEXT:    vmerge.vvm v8, v8, v16, v0
1823 ; CHECK-NEXT:    ret
1824   %elt.head = insertelement <vscale x 16 x i32> poison, i32 %b, i32 0
1825   %vb = shufflevector <vscale x 16 x i32> %elt.head, <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
1826   %splat = insertelement <vscale x 16 x i1> poison, i1 -1, i32 0
1827   %allones = shufflevector <vscale x 16 x i1> %splat, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
1828   %x = call <vscale x 16 x i32> @llvm.vp.mul.nxv16i32(<vscale x 16 x i32> %a, <vscale x 16 x i32> %vb, <vscale x 16 x i1> %allones, i32 %evl)
1829   %y = call <vscale x 16 x i32> @llvm.vp.add.nxv16i32(<vscale x 16 x i32> %x, <vscale x 16 x i32> %c, <vscale x 16 x i1> %allones, i32 %evl)
1830   %u = call <vscale x 16 x i32> @llvm.vp.select.nxv16i32(<vscale x 16 x i1> %m, <vscale x 16 x i32> %y, <vscale x 16 x i32> %a, i32 %evl)
1831   ret <vscale x 16 x i32> %u
1832 }
1833
1834 declare <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64>, <vscale x 1 x i64>, <vscale x 1 x i1>, i32)
1835 declare <vscale x 1 x i64> @llvm.vp.add.nxv1i64(<vscale x 1 x i64>, <vscale x 1 x i64>, <vscale x 1 x i1>, i32)
1836 declare <vscale x 1 x i64> @llvm.vp.merge.nxv1i64(<vscale x 1 x i1>, <vscale x 1 x i64>, <vscale x 1 x i64>, i32)
1837 declare <vscale x 1 x i64> @llvm.vp.select.nxv1i64(<vscale x 1 x i1>, <vscale x 1 x i64>, <vscale x 1 x i64>, i32)
1838
1839 define <vscale x 1 x i64> @vmadd_vv_nxv1i64(<vscale x 1 x i64> %a, <vscale x 1 x i64> %b, <vscale x 1 x i64> %c,  <vscale x 1 x i1> %m, i32 zeroext %evl) {
1840 ; CHECK-LABEL: vmadd_vv_nxv1i64:
1841 ; CHECK:       # %bb.0:
1842 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
1843 ; CHECK-NEXT:    vmadd.vv v9, v8, v10
1844 ; CHECK-NEXT:    vsetvli zero, zero, e64, m1, tu, ma
1845 ; CHECK-NEXT:    vmerge.vvm v8, v8, v9, v0
1846 ; CHECK-NEXT:    ret
1847   %splat = insertelement <vscale x 1 x i1> poison, i1 -1, i32 0
1848   %allones = shufflevector <vscale x 1 x i1> %splat, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
1849   %x = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> %a, <vscale x 1 x i64> %b, <vscale x 1 x i1> %allones, i32 %evl)
1850   %y = call <vscale x 1 x i64> @llvm.vp.add.nxv1i64(<vscale x 1 x i64> %x, <vscale x 1 x i64> %c, <vscale x 1 x i1> %allones, i32 %evl)
1851   %u = call <vscale x 1 x i64> @llvm.vp.merge.nxv1i64(<vscale x 1 x i1> %m, <vscale x 1 x i64> %y, <vscale x 1 x i64> %a, i32 %evl)
1852   ret <vscale x 1 x i64> %u
1853 }
1854
1855 define <vscale x 1 x i64> @vmadd_vv_nxv1i64_unmasked(<vscale x 1 x i64> %a, <vscale x 1 x i64> %b, <vscale x 1 x i64> %c,  <vscale x 1 x i1> %m, i32 zeroext %evl) {
1856 ; CHECK-LABEL: vmadd_vv_nxv1i64_unmasked:
1857 ; CHECK:       # %bb.0:
1858 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
1859 ; CHECK-NEXT:    vmadd.vv v9, v8, v10
1860 ; CHECK-NEXT:    vsetvli zero, zero, e64, m1, tu, ma
1861 ; CHECK-NEXT:    vmv.v.v v8, v9
1862 ; CHECK-NEXT:    ret
1863   %splat = insertelement <vscale x 1 x i1> poison, i1 -1, i32 0
1864   %allones = shufflevector <vscale x 1 x i1> %splat, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
1865   %x = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> %a, <vscale x 1 x i64> %b, <vscale x 1 x i1> %allones, i32 %evl)
1866   %y = call <vscale x 1 x i64> @llvm.vp.add.nxv1i64(<vscale x 1 x i64> %x, <vscale x 1 x i64> %c, <vscale x 1 x i1> %allones, i32 %evl)
1867   %u = call <vscale x 1 x i64> @llvm.vp.merge.nxv1i64(<vscale x 1 x i1> %allones, <vscale x 1 x i64> %y, <vscale x 1 x i64> %a, i32 %evl)
1868   ret <vscale x 1 x i64> %u
1869 }
1870
1871 define <vscale x 1 x i64> @vmadd_vx_nxv1i64(<vscale x 1 x i64> %a, i64 %b, <vscale x 1 x i64> %c,  <vscale x 1 x i1> %m, i32 zeroext %evl) {
1872 ; RV32-LABEL: vmadd_vx_nxv1i64:
1873 ; RV32:       # %bb.0:
1874 ; RV32-NEXT:    addi sp, sp, -16
1875 ; RV32-NEXT:    .cfi_def_cfa_offset 16
1876 ; RV32-NEXT:    sw a1, 12(sp)
1877 ; RV32-NEXT:    sw a0, 8(sp)
1878 ; RV32-NEXT:    addi a0, sp, 8
1879 ; RV32-NEXT:    vsetvli a1, zero, e64, m1, ta, ma
1880 ; RV32-NEXT:    vlse64.v v10, (a0), zero
1881 ; RV32-NEXT:    vsetvli zero, a2, e64, m1, ta, ma
1882 ; RV32-NEXT:    vmadd.vv v10, v8, v9
1883 ; RV32-NEXT:    vsetvli zero, zero, e64, m1, tu, ma
1884 ; RV32-NEXT:    vmerge.vvm v8, v8, v10, v0
1885 ; RV32-NEXT:    addi sp, sp, 16
1886 ; RV32-NEXT:    ret
1887 ;
1888 ; RV64-LABEL: vmadd_vx_nxv1i64:
1889 ; RV64:       # %bb.0:
1890 ; RV64-NEXT:    vsetvli zero, a1, e64, m1, tu, mu
1891 ; RV64-NEXT:    vmadd.vx v8, a0, v9, v0.t
1892 ; RV64-NEXT:    ret
1893   %elt.head = insertelement <vscale x 1 x i64> poison, i64 %b, i32 0
1894   %vb = shufflevector <vscale x 1 x i64> %elt.head, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
1895   %splat = insertelement <vscale x 1 x i1> poison, i1 -1, i32 0
1896   %allones = shufflevector <vscale x 1 x i1> %splat, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
1897   %x = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> %a, <vscale x 1 x i64> %vb, <vscale x 1 x i1> %allones, i32 %evl)
1898   %y = call <vscale x 1 x i64> @llvm.vp.add.nxv1i64(<vscale x 1 x i64> %x, <vscale x 1 x i64> %c, <vscale x 1 x i1> %allones, i32 %evl)
1899   %u = call <vscale x 1 x i64> @llvm.vp.merge.nxv1i64(<vscale x 1 x i1> %m, <vscale x 1 x i64> %y, <vscale x 1 x i64> %a, i32 %evl)
1900   ret <vscale x 1 x i64> %u
1901 }
1902
1903 define <vscale x 1 x i64> @vmadd_vx_nxv1i64_unmasked(<vscale x 1 x i64> %a, i64 %b, <vscale x 1 x i64> %c,  <vscale x 1 x i1> %m, i32 zeroext %evl) {
1904 ; RV32-LABEL: vmadd_vx_nxv1i64_unmasked:
1905 ; RV32:       # %bb.0:
1906 ; RV32-NEXT:    addi sp, sp, -16
1907 ; RV32-NEXT:    .cfi_def_cfa_offset 16
1908 ; RV32-NEXT:    sw a1, 12(sp)
1909 ; RV32-NEXT:    sw a0, 8(sp)
1910 ; RV32-NEXT:    addi a0, sp, 8
1911 ; RV32-NEXT:    vsetvli a1, zero, e64, m1, ta, ma
1912 ; RV32-NEXT:    vlse64.v v10, (a0), zero
1913 ; RV32-NEXT:    vsetvli zero, a2, e64, m1, ta, ma
1914 ; RV32-NEXT:    vmadd.vv v10, v8, v9
1915 ; RV32-NEXT:    vsetvli zero, zero, e64, m1, tu, ma
1916 ; RV32-NEXT:    vmv.v.v v8, v10
1917 ; RV32-NEXT:    addi sp, sp, 16
1918 ; RV32-NEXT:    ret
1919 ;
1920 ; RV64-LABEL: vmadd_vx_nxv1i64_unmasked:
1921 ; RV64:       # %bb.0:
1922 ; RV64-NEXT:    vsetvli zero, a1, e64, m1, tu, ma
1923 ; RV64-NEXT:    vmadd.vx v8, a0, v9
1924 ; RV64-NEXT:    ret
1925   %elt.head = insertelement <vscale x 1 x i64> poison, i64 %b, i32 0
1926   %vb = shufflevector <vscale x 1 x i64> %elt.head, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
1927   %splat = insertelement <vscale x 1 x i1> poison, i1 -1, i32 0
1928   %allones = shufflevector <vscale x 1 x i1> %splat, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
1929   %x = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> %a, <vscale x 1 x i64> %vb, <vscale x 1 x i1> %allones, i32 %evl)
1930   %y = call <vscale x 1 x i64> @llvm.vp.add.nxv1i64(<vscale x 1 x i64> %x, <vscale x 1 x i64> %c, <vscale x 1 x i1> %allones, i32 %evl)
1931   %u = call <vscale x 1 x i64> @llvm.vp.merge.nxv1i64(<vscale x 1 x i1> %allones, <vscale x 1 x i64> %y, <vscale x 1 x i64> %a, i32 %evl)
1932   ret <vscale x 1 x i64> %u
1933 }
1934
1935 define <vscale x 1 x i64> @vmadd_vv_nxv1i64_ta(<vscale x 1 x i64> %a, <vscale x 1 x i64> %b, <vscale x 1 x i64> %c,  <vscale x 1 x i1> %m, i32 zeroext %evl) {
1936 ; CHECK-LABEL: vmadd_vv_nxv1i64_ta:
1937 ; CHECK:       # %bb.0:
1938 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
1939 ; CHECK-NEXT:    vmadd.vv v9, v8, v10
1940 ; CHECK-NEXT:    vmerge.vvm v8, v8, v9, v0
1941 ; CHECK-NEXT:    ret
1942   %splat = insertelement <vscale x 1 x i1> poison, i1 -1, i32 0
1943   %allones = shufflevector <vscale x 1 x i1> %splat, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
1944   %x = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> %a, <vscale x 1 x i64> %b, <vscale x 1 x i1> %allones, i32 %evl)
1945   %y = call <vscale x 1 x i64> @llvm.vp.add.nxv1i64(<vscale x 1 x i64> %x, <vscale x 1 x i64> %c, <vscale x 1 x i1> %allones, i32 %evl)
1946   %u = call <vscale x 1 x i64> @llvm.vp.select.nxv1i64(<vscale x 1 x i1> %m, <vscale x 1 x i64> %y, <vscale x 1 x i64> %a, i32 %evl)
1947   ret <vscale x 1 x i64> %u
1948 }
1949
1950 define <vscale x 1 x i64> @vmadd_vx_nxv1i64_ta(<vscale x 1 x i64> %a, i64 %b, <vscale x 1 x i64> %c,  <vscale x 1 x i1> %m, i32 zeroext %evl) {
1951 ; RV32-LABEL: vmadd_vx_nxv1i64_ta:
1952 ; RV32:       # %bb.0:
1953 ; RV32-NEXT:    addi sp, sp, -16
1954 ; RV32-NEXT:    .cfi_def_cfa_offset 16
1955 ; RV32-NEXT:    sw a1, 12(sp)
1956 ; RV32-NEXT:    sw a0, 8(sp)
1957 ; RV32-NEXT:    addi a0, sp, 8
1958 ; RV32-NEXT:    vsetvli a1, zero, e64, m1, ta, ma
1959 ; RV32-NEXT:    vlse64.v v10, (a0), zero
1960 ; RV32-NEXT:    vsetvli zero, a2, e64, m1, ta, ma
1961 ; RV32-NEXT:    vmadd.vv v10, v8, v9
1962 ; RV32-NEXT:    vmerge.vvm v8, v8, v10, v0
1963 ; RV32-NEXT:    addi sp, sp, 16
1964 ; RV32-NEXT:    ret
1965 ;
1966 ; RV64-LABEL: vmadd_vx_nxv1i64_ta:
1967 ; RV64:       # %bb.0:
1968 ; RV64-NEXT:    vsetvli zero, a1, e64, m1, ta, ma
1969 ; RV64-NEXT:    vmacc.vx v9, a0, v8
1970 ; RV64-NEXT:    vmerge.vvm v8, v8, v9, v0
1971 ; RV64-NEXT:    ret
1972   %elt.head = insertelement <vscale x 1 x i64> poison, i64 %b, i32 0
1973   %vb = shufflevector <vscale x 1 x i64> %elt.head, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
1974   %splat = insertelement <vscale x 1 x i1> poison, i1 -1, i32 0
1975   %allones = shufflevector <vscale x 1 x i1> %splat, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
1976   %x = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> %a, <vscale x 1 x i64> %vb, <vscale x 1 x i1> %allones, i32 %evl)
1977   %y = call <vscale x 1 x i64> @llvm.vp.add.nxv1i64(<vscale x 1 x i64> %x, <vscale x 1 x i64> %c, <vscale x 1 x i1> %allones, i32 %evl)
1978   %u = call <vscale x 1 x i64> @llvm.vp.select.nxv1i64(<vscale x 1 x i1> %m, <vscale x 1 x i64> %y, <vscale x 1 x i64> %a, i32 %evl)
1979   ret <vscale x 1 x i64> %u
1980 }
1981
1982 declare <vscale x 2 x i64> @llvm.vp.mul.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i1>, i32)
1983 declare <vscale x 2 x i64> @llvm.vp.add.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i1>, i32)
1984 declare <vscale x 2 x i64> @llvm.vp.merge.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>, i32)
1985 declare <vscale x 2 x i64> @llvm.vp.select.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>, i32)
1986
1987 define <vscale x 2 x i64> @vmadd_vv_nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c,  <vscale x 2 x i1> %m, i32 zeroext %evl) {
1988 ; CHECK-LABEL: vmadd_vv_nxv2i64:
1989 ; CHECK:       # %bb.0:
1990 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
1991 ; CHECK-NEXT:    vmadd.vv v10, v8, v12
1992 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, tu, ma
1993 ; CHECK-NEXT:    vmerge.vvm v8, v8, v10, v0
1994 ; CHECK-NEXT:    ret
1995   %splat = insertelement <vscale x 2 x i1> poison, i1 -1, i32 0
1996   %allones = shufflevector <vscale x 2 x i1> %splat, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
1997   %x = call <vscale x 2 x i64> @llvm.vp.mul.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i1> %allones, i32 %evl)
1998   %y = call <vscale x 2 x i64> @llvm.vp.add.nxv2i64(<vscale x 2 x i64> %x, <vscale x 2 x i64> %c, <vscale x 2 x i1> %allones, i32 %evl)
1999   %u = call <vscale x 2 x i64> @llvm.vp.merge.nxv2i64(<vscale x 2 x i1> %m, <vscale x 2 x i64> %y, <vscale x 2 x i64> %a, i32 %evl)
2000   ret <vscale x 2 x i64> %u
2001 }
2002
2003 define <vscale x 2 x i64> @vmadd_vv_nxv2i64_unmasked(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c,  <vscale x 2 x i1> %m, i32 zeroext %evl) {
2004 ; CHECK-LABEL: vmadd_vv_nxv2i64_unmasked:
2005 ; CHECK:       # %bb.0:
2006 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
2007 ; CHECK-NEXT:    vmadd.vv v10, v8, v12
2008 ; CHECK-NEXT:    vsetvli zero, zero, e64, m2, tu, ma
2009 ; CHECK-NEXT:    vmv.v.v v8, v10
2010 ; CHECK-NEXT:    ret
2011   %splat = insertelement <vscale x 2 x i1> poison, i1 -1, i32 0
2012   %allones = shufflevector <vscale x 2 x i1> %splat, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
2013   %x = call <vscale x 2 x i64> @llvm.vp.mul.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i1> %allones, i32 %evl)
2014   %y = call <vscale x 2 x i64> @llvm.vp.add.nxv2i64(<vscale x 2 x i64> %x, <vscale x 2 x i64> %c, <vscale x 2 x i1> %allones, i32 %evl)
2015   %u = call <vscale x 2 x i64> @llvm.vp.merge.nxv2i64(<vscale x 2 x i1> %allones, <vscale x 2 x i64> %y, <vscale x 2 x i64> %a, i32 %evl)
2016   ret <vscale x 2 x i64> %u
2017 }
2018
2019 define <vscale x 2 x i64> @vmadd_vx_nxv2i64(<vscale x 2 x i64> %a, i64 %b, <vscale x 2 x i64> %c,  <vscale x 2 x i1> %m, i32 zeroext %evl) {
2020 ; RV32-LABEL: vmadd_vx_nxv2i64:
2021 ; RV32:       # %bb.0:
2022 ; RV32-NEXT:    addi sp, sp, -16
2023 ; RV32-NEXT:    .cfi_def_cfa_offset 16
2024 ; RV32-NEXT:    sw a1, 12(sp)
2025 ; RV32-NEXT:    sw a0, 8(sp)
2026 ; RV32-NEXT:    addi a0, sp, 8
2027 ; RV32-NEXT:    vsetvli a1, zero, e64, m2, ta, ma
2028 ; RV32-NEXT:    vlse64.v v12, (a0), zero
2029 ; RV32-NEXT:    vsetvli zero, a2, e64, m2, ta, ma
2030 ; RV32-NEXT:    vmadd.vv v12, v8, v10
2031 ; RV32-NEXT:    vsetvli zero, zero, e64, m2, tu, ma
2032 ; RV32-NEXT:    vmerge.vvm v8, v8, v12, v0
2033 ; RV32-NEXT:    addi sp, sp, 16
2034 ; RV32-NEXT:    ret
2035 ;
2036 ; RV64-LABEL: vmadd_vx_nxv2i64:
2037 ; RV64:       # %bb.0:
2038 ; RV64-NEXT:    vsetvli zero, a1, e64, m2, tu, mu
2039 ; RV64-NEXT:    vmadd.vx v8, a0, v10, v0.t
2040 ; RV64-NEXT:    ret
2041   %elt.head = insertelement <vscale x 2 x i64> poison, i64 %b, i32 0
2042   %vb = shufflevector <vscale x 2 x i64> %elt.head, <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
2043   %splat = insertelement <vscale x 2 x i1> poison, i1 -1, i32 0
2044   %allones = shufflevector <vscale x 2 x i1> %splat, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
2045   %x = call <vscale x 2 x i64> @llvm.vp.mul.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %vb, <vscale x 2 x i1> %allones, i32 %evl)
2046   %y = call <vscale x 2 x i64> @llvm.vp.add.nxv2i64(<vscale x 2 x i64> %x, <vscale x 2 x i64> %c, <vscale x 2 x i1> %allones, i32 %evl)
2047   %u = call <vscale x 2 x i64> @llvm.vp.merge.nxv2i64(<vscale x 2 x i1> %m, <vscale x 2 x i64> %y, <vscale x 2 x i64> %a, i32 %evl)
2048   ret <vscale x 2 x i64> %u
2049 }
2050
2051 define <vscale x 2 x i64> @vmadd_vx_nxv2i64_unmasked(<vscale x 2 x i64> %a, i64 %b, <vscale x 2 x i64> %c,  <vscale x 2 x i1> %m, i32 zeroext %evl) {
2052 ; RV32-LABEL: vmadd_vx_nxv2i64_unmasked:
2053 ; RV32:       # %bb.0:
2054 ; RV32-NEXT:    addi sp, sp, -16
2055 ; RV32-NEXT:    .cfi_def_cfa_offset 16
2056 ; RV32-NEXT:    sw a1, 12(sp)
2057 ; RV32-NEXT:    sw a0, 8(sp)
2058 ; RV32-NEXT:    addi a0, sp, 8
2059 ; RV32-NEXT:    vsetvli a1, zero, e64, m2, ta, ma
2060 ; RV32-NEXT:    vlse64.v v12, (a0), zero
2061 ; RV32-NEXT:    vsetvli zero, a2, e64, m2, ta, ma
2062 ; RV32-NEXT:    vmadd.vv v12, v8, v10
2063 ; RV32-NEXT:    vsetvli zero, zero, e64, m2, tu, ma
2064 ; RV32-NEXT:    vmv.v.v v8, v12
2065 ; RV32-NEXT:    addi sp, sp, 16
2066 ; RV32-NEXT:    ret
2067 ;
2068 ; RV64-LABEL: vmadd_vx_nxv2i64_unmasked:
2069 ; RV64:       # %bb.0:
2070 ; RV64-NEXT:    vsetvli zero, a1, e64, m2, tu, ma
2071 ; RV64-NEXT:    vmadd.vx v8, a0, v10
2072 ; RV64-NEXT:    ret
2073   %elt.head = insertelement <vscale x 2 x i64> poison, i64 %b, i32 0
2074   %vb = shufflevector <vscale x 2 x i64> %elt.head, <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
2075   %splat = insertelement <vscale x 2 x i1> poison, i1 -1, i32 0
2076   %allones = shufflevector <vscale x 2 x i1> %splat, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
2077   %x = call <vscale x 2 x i64> @llvm.vp.mul.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %vb, <vscale x 2 x i1> %allones, i32 %evl)
2078   %y = call <vscale x 2 x i64> @llvm.vp.add.nxv2i64(<vscale x 2 x i64> %x, <vscale x 2 x i64> %c, <vscale x 2 x i1> %allones, i32 %evl)
2079   %u = call <vscale x 2 x i64> @llvm.vp.merge.nxv2i64(<vscale x 2 x i1> %allones, <vscale x 2 x i64> %y, <vscale x 2 x i64> %a, i32 %evl)
2080   ret <vscale x 2 x i64> %u
2081 }
2082
2083 define <vscale x 2 x i64> @vmadd_vv_nxv2i64_ta(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c,  <vscale x 2 x i1> %m, i32 zeroext %evl) {
2084 ; CHECK-LABEL: vmadd_vv_nxv2i64_ta:
2085 ; CHECK:       # %bb.0:
2086 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
2087 ; CHECK-NEXT:    vmadd.vv v10, v8, v12
2088 ; CHECK-NEXT:    vmerge.vvm v8, v8, v10, v0
2089 ; CHECK-NEXT:    ret
2090   %splat = insertelement <vscale x 2 x i1> poison, i1 -1, i32 0
2091   %allones = shufflevector <vscale x 2 x i1> %splat, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
2092   %x = call <vscale x 2 x i64> @llvm.vp.mul.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i1> %allones, i32 %evl)
2093   %y = call <vscale x 2 x i64> @llvm.vp.add.nxv2i64(<vscale x 2 x i64> %x, <vscale x 2 x i64> %c, <vscale x 2 x i1> %allones, i32 %evl)
2094   %u = call <vscale x 2 x i64> @llvm.vp.select.nxv2i64(<vscale x 2 x i1> %m, <vscale x 2 x i64> %y, <vscale x 2 x i64> %a, i32 %evl)
2095   ret <vscale x 2 x i64> %u
2096 }
2097
2098 define <vscale x 2 x i64> @vmadd_vx_nxv2i64_ta(<vscale x 2 x i64> %a, i64 %b, <vscale x 2 x i64> %c,  <vscale x 2 x i1> %m, i32 zeroext %evl) {
2099 ; RV32-LABEL: vmadd_vx_nxv2i64_ta:
2100 ; RV32:       # %bb.0:
2101 ; RV32-NEXT:    addi sp, sp, -16
2102 ; RV32-NEXT:    .cfi_def_cfa_offset 16
2103 ; RV32-NEXT:    sw a1, 12(sp)
2104 ; RV32-NEXT:    sw a0, 8(sp)
2105 ; RV32-NEXT:    addi a0, sp, 8
2106 ; RV32-NEXT:    vsetvli a1, zero, e64, m2, ta, ma
2107 ; RV32-NEXT:    vlse64.v v12, (a0), zero
2108 ; RV32-NEXT:    vsetvli zero, a2, e64, m2, ta, ma
2109 ; RV32-NEXT:    vmadd.vv v12, v8, v10
2110 ; RV32-NEXT:    vmerge.vvm v8, v8, v12, v0
2111 ; RV32-NEXT:    addi sp, sp, 16
2112 ; RV32-NEXT:    ret
2113 ;
2114 ; RV64-LABEL: vmadd_vx_nxv2i64_ta:
2115 ; RV64:       # %bb.0:
2116 ; RV64-NEXT:    vsetvli zero, a1, e64, m2, ta, ma
2117 ; RV64-NEXT:    vmacc.vx v10, a0, v8
2118 ; RV64-NEXT:    vmerge.vvm v8, v8, v10, v0
2119 ; RV64-NEXT:    ret
2120   %elt.head = insertelement <vscale x 2 x i64> poison, i64 %b, i32 0
2121   %vb = shufflevector <vscale x 2 x i64> %elt.head, <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
2122   %splat = insertelement <vscale x 2 x i1> poison, i1 -1, i32 0
2123   %allones = shufflevector <vscale x 2 x i1> %splat, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
2124   %x = call <vscale x 2 x i64> @llvm.vp.mul.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %vb, <vscale x 2 x i1> %allones, i32 %evl)
2125   %y = call <vscale x 2 x i64> @llvm.vp.add.nxv2i64(<vscale x 2 x i64> %x, <vscale x 2 x i64> %c, <vscale x 2 x i1> %allones, i32 %evl)
2126   %u = call <vscale x 2 x i64> @llvm.vp.select.nxv2i64(<vscale x 2 x i1> %m, <vscale x 2 x i64> %y, <vscale x 2 x i64> %a, i32 %evl)
2127   ret <vscale x 2 x i64> %u
2128 }
2129
2130 declare <vscale x 4 x i64> @llvm.vp.mul.nxv4i64(<vscale x 4 x i64>, <vscale x 4 x i64>, <vscale x 4 x i1>, i32)
2131 declare <vscale x 4 x i64> @llvm.vp.add.nxv4i64(<vscale x 4 x i64>, <vscale x 4 x i64>, <vscale x 4 x i1>, i32)
2132 declare <vscale x 4 x i64> @llvm.vp.merge.nxv4i64(<vscale x 4 x i1>, <vscale x 4 x i64>, <vscale x 4 x i64>, i32)
2133 declare <vscale x 4 x i64> @llvm.vp.select.nxv4i64(<vscale x 4 x i1>, <vscale x 4 x i64>, <vscale x 4 x i64>, i32)
2134
2135 define <vscale x 4 x i64> @vmadd_vv_nxv4i64(<vscale x 4 x i64> %a, <vscale x 4 x i64> %b, <vscale x 4 x i64> %c,  <vscale x 4 x i1> %m, i32 zeroext %evl) {
2136 ; CHECK-LABEL: vmadd_vv_nxv4i64:
2137 ; CHECK:       # %bb.0:
2138 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
2139 ; CHECK-NEXT:    vmadd.vv v12, v8, v16
2140 ; CHECK-NEXT:    vsetvli zero, zero, e64, m4, tu, ma
2141 ; CHECK-NEXT:    vmerge.vvm v8, v8, v12, v0
2142 ; CHECK-NEXT:    ret
2143   %splat = insertelement <vscale x 4 x i1> poison, i1 -1, i32 0
2144   %allones = shufflevector <vscale x 4 x i1> %splat, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
2145   %x = call <vscale x 4 x i64> @llvm.vp.mul.nxv4i64(<vscale x 4 x i64> %a, <vscale x 4 x i64> %b, <vscale x 4 x i1> %allones, i32 %evl)
2146   %y = call <vscale x 4 x i64> @llvm.vp.add.nxv4i64(<vscale x 4 x i64> %x, <vscale x 4 x i64> %c, <vscale x 4 x i1> %allones, i32 %evl)
2147   %u = call <vscale x 4 x i64> @llvm.vp.merge.nxv4i64(<vscale x 4 x i1> %m, <vscale x 4 x i64> %y, <vscale x 4 x i64> %a, i32 %evl)
2148   ret <vscale x 4 x i64> %u
2149 }
2150
2151 define <vscale x 4 x i64> @vmadd_vv_nxv4i64_unmasked(<vscale x 4 x i64> %a, <vscale x 4 x i64> %b, <vscale x 4 x i64> %c,  <vscale x 4 x i1> %m, i32 zeroext %evl) {
2152 ; CHECK-LABEL: vmadd_vv_nxv4i64_unmasked:
2153 ; CHECK:       # %bb.0:
2154 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
2155 ; CHECK-NEXT:    vmadd.vv v12, v8, v16
2156 ; CHECK-NEXT:    vsetvli zero, zero, e64, m4, tu, ma
2157 ; CHECK-NEXT:    vmv.v.v v8, v12
2158 ; CHECK-NEXT:    ret
2159   %splat = insertelement <vscale x 4 x i1> poison, i1 -1, i32 0
2160   %allones = shufflevector <vscale x 4 x i1> %splat, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
2161   %x = call <vscale x 4 x i64> @llvm.vp.mul.nxv4i64(<vscale x 4 x i64> %a, <vscale x 4 x i64> %b, <vscale x 4 x i1> %allones, i32 %evl)
2162   %y = call <vscale x 4 x i64> @llvm.vp.add.nxv4i64(<vscale x 4 x i64> %x, <vscale x 4 x i64> %c, <vscale x 4 x i1> %allones, i32 %evl)
2163   %u = call <vscale x 4 x i64> @llvm.vp.merge.nxv4i64(<vscale x 4 x i1> %allones, <vscale x 4 x i64> %y, <vscale x 4 x i64> %a, i32 %evl)
2164   ret <vscale x 4 x i64> %u
2165 }
2166
2167 define <vscale x 4 x i64> @vmadd_vx_nxv4i64(<vscale x 4 x i64> %a, i64 %b, <vscale x 4 x i64> %c,  <vscale x 4 x i1> %m, i32 zeroext %evl) {
2168 ; RV32-LABEL: vmadd_vx_nxv4i64:
2169 ; RV32:       # %bb.0:
2170 ; RV32-NEXT:    addi sp, sp, -16
2171 ; RV32-NEXT:    .cfi_def_cfa_offset 16
2172 ; RV32-NEXT:    sw a1, 12(sp)
2173 ; RV32-NEXT:    sw a0, 8(sp)
2174 ; RV32-NEXT:    addi a0, sp, 8
2175 ; RV32-NEXT:    vsetvli a1, zero, e64, m4, ta, ma
2176 ; RV32-NEXT:    vlse64.v v16, (a0), zero
2177 ; RV32-NEXT:    vsetvli zero, a2, e64, m4, ta, ma
2178 ; RV32-NEXT:    vmadd.vv v16, v8, v12
2179 ; RV32-NEXT:    vsetvli zero, zero, e64, m4, tu, ma
2180 ; RV32-NEXT:    vmerge.vvm v8, v8, v16, v0
2181 ; RV32-NEXT:    addi sp, sp, 16
2182 ; RV32-NEXT:    ret
2183 ;
2184 ; RV64-LABEL: vmadd_vx_nxv4i64:
2185 ; RV64:       # %bb.0:
2186 ; RV64-NEXT:    vsetvli zero, a1, e64, m4, tu, mu
2187 ; RV64-NEXT:    vmadd.vx v8, a0, v12, v0.t
2188 ; RV64-NEXT:    ret
2189   %elt.head = insertelement <vscale x 4 x i64> poison, i64 %b, i32 0
2190   %vb = shufflevector <vscale x 4 x i64> %elt.head, <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
2191   %splat = insertelement <vscale x 4 x i1> poison, i1 -1, i32 0
2192   %allones = shufflevector <vscale x 4 x i1> %splat, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
2193   %x = call <vscale x 4 x i64> @llvm.vp.mul.nxv4i64(<vscale x 4 x i64> %a, <vscale x 4 x i64> %vb, <vscale x 4 x i1> %allones, i32 %evl)
2194   %y = call <vscale x 4 x i64> @llvm.vp.add.nxv4i64(<vscale x 4 x i64> %x, <vscale x 4 x i64> %c, <vscale x 4 x i1> %allones, i32 %evl)
2195   %u = call <vscale x 4 x i64> @llvm.vp.merge.nxv4i64(<vscale x 4 x i1> %m, <vscale x 4 x i64> %y, <vscale x 4 x i64> %a, i32 %evl)
2196   ret <vscale x 4 x i64> %u
2197 }
2198
2199 define <vscale x 4 x i64> @vmadd_vx_nxv4i64_unmasked(<vscale x 4 x i64> %a, i64 %b, <vscale x 4 x i64> %c,  <vscale x 4 x i1> %m, i32 zeroext %evl) {
2200 ; RV32-LABEL: vmadd_vx_nxv4i64_unmasked:
2201 ; RV32:       # %bb.0:
2202 ; RV32-NEXT:    addi sp, sp, -16
2203 ; RV32-NEXT:    .cfi_def_cfa_offset 16
2204 ; RV32-NEXT:    sw a1, 12(sp)
2205 ; RV32-NEXT:    sw a0, 8(sp)
2206 ; RV32-NEXT:    addi a0, sp, 8
2207 ; RV32-NEXT:    vsetvli a1, zero, e64, m4, ta, ma
2208 ; RV32-NEXT:    vlse64.v v16, (a0), zero
2209 ; RV32-NEXT:    vsetvli zero, a2, e64, m4, ta, ma
2210 ; RV32-NEXT:    vmadd.vv v16, v8, v12
2211 ; RV32-NEXT:    vsetvli zero, zero, e64, m4, tu, ma
2212 ; RV32-NEXT:    vmv.v.v v8, v16
2213 ; RV32-NEXT:    addi sp, sp, 16
2214 ; RV32-NEXT:    ret
2215 ;
2216 ; RV64-LABEL: vmadd_vx_nxv4i64_unmasked:
2217 ; RV64:       # %bb.0:
2218 ; RV64-NEXT:    vsetvli zero, a1, e64, m4, tu, ma
2219 ; RV64-NEXT:    vmadd.vx v8, a0, v12
2220 ; RV64-NEXT:    ret
2221   %elt.head = insertelement <vscale x 4 x i64> poison, i64 %b, i32 0
2222   %vb = shufflevector <vscale x 4 x i64> %elt.head, <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
2223   %splat = insertelement <vscale x 4 x i1> poison, i1 -1, i32 0
2224   %allones = shufflevector <vscale x 4 x i1> %splat, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
2225   %x = call <vscale x 4 x i64> @llvm.vp.mul.nxv4i64(<vscale x 4 x i64> %a, <vscale x 4 x i64> %vb, <vscale x 4 x i1> %allones, i32 %evl)
2226   %y = call <vscale x 4 x i64> @llvm.vp.add.nxv4i64(<vscale x 4 x i64> %x, <vscale x 4 x i64> %c, <vscale x 4 x i1> %allones, i32 %evl)
2227   %u = call <vscale x 4 x i64> @llvm.vp.merge.nxv4i64(<vscale x 4 x i1> %allones, <vscale x 4 x i64> %y, <vscale x 4 x i64> %a, i32 %evl)
2228   ret <vscale x 4 x i64> %u
2229 }
2230
2231 define <vscale x 4 x i64> @vmadd_vv_nxv4i64_ta(<vscale x 4 x i64> %a, <vscale x 4 x i64> %b, <vscale x 4 x i64> %c,  <vscale x 4 x i1> %m, i32 zeroext %evl) {
2232 ; CHECK-LABEL: vmadd_vv_nxv4i64_ta:
2233 ; CHECK:       # %bb.0:
2234 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
2235 ; CHECK-NEXT:    vmadd.vv v12, v8, v16
2236 ; CHECK-NEXT:    vmerge.vvm v8, v8, v12, v0
2237 ; CHECK-NEXT:    ret
2238   %splat = insertelement <vscale x 4 x i1> poison, i1 -1, i32 0
2239   %allones = shufflevector <vscale x 4 x i1> %splat, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
2240   %x = call <vscale x 4 x i64> @llvm.vp.mul.nxv4i64(<vscale x 4 x i64> %a, <vscale x 4 x i64> %b, <vscale x 4 x i1> %allones, i32 %evl)
2241   %y = call <vscale x 4 x i64> @llvm.vp.add.nxv4i64(<vscale x 4 x i64> %x, <vscale x 4 x i64> %c, <vscale x 4 x i1> %allones, i32 %evl)
2242   %u = call <vscale x 4 x i64> @llvm.vp.select.nxv4i64(<vscale x 4 x i1> %m, <vscale x 4 x i64> %y, <vscale x 4 x i64> %a, i32 %evl)
2243   ret <vscale x 4 x i64> %u
2244 }
2245
2246 define <vscale x 4 x i64> @vmadd_vx_nxv4i64_ta(<vscale x 4 x i64> %a, i64 %b, <vscale x 4 x i64> %c,  <vscale x 4 x i1> %m, i32 zeroext %evl) {
2247 ; RV32-LABEL: vmadd_vx_nxv4i64_ta:
2248 ; RV32:       # %bb.0:
2249 ; RV32-NEXT:    addi sp, sp, -16
2250 ; RV32-NEXT:    .cfi_def_cfa_offset 16
2251 ; RV32-NEXT:    sw a1, 12(sp)
2252 ; RV32-NEXT:    sw a0, 8(sp)
2253 ; RV32-NEXT:    addi a0, sp, 8
2254 ; RV32-NEXT:    vsetvli a1, zero, e64, m4, ta, ma
2255 ; RV32-NEXT:    vlse64.v v16, (a0), zero
2256 ; RV32-NEXT:    vsetvli zero, a2, e64, m4, ta, ma
2257 ; RV32-NEXT:    vmadd.vv v16, v8, v12
2258 ; RV32-NEXT:    vmerge.vvm v8, v8, v16, v0
2259 ; RV32-NEXT:    addi sp, sp, 16
2260 ; RV32-NEXT:    ret
2261 ;
2262 ; RV64-LABEL: vmadd_vx_nxv4i64_ta:
2263 ; RV64:       # %bb.0:
2264 ; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
2265 ; RV64-NEXT:    vmacc.vx v12, a0, v8
2266 ; RV64-NEXT:    vmerge.vvm v8, v8, v12, v0
2267 ; RV64-NEXT:    ret
2268   %elt.head = insertelement <vscale x 4 x i64> poison, i64 %b, i32 0
2269   %vb = shufflevector <vscale x 4 x i64> %elt.head, <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
2270   %splat = insertelement <vscale x 4 x i1> poison, i1 -1, i32 0
2271   %allones = shufflevector <vscale x 4 x i1> %splat, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
2272   %x = call <vscale x 4 x i64> @llvm.vp.mul.nxv4i64(<vscale x 4 x i64> %a, <vscale x 4 x i64> %vb, <vscale x 4 x i1> %allones, i32 %evl)
2273   %y = call <vscale x 4 x i64> @llvm.vp.add.nxv4i64(<vscale x 4 x i64> %x, <vscale x 4 x i64> %c, <vscale x 4 x i1> %allones, i32 %evl)
2274   %u = call <vscale x 4 x i64> @llvm.vp.select.nxv4i64(<vscale x 4 x i1> %m, <vscale x 4 x i64> %y, <vscale x 4 x i64> %a, i32 %evl)
2275   ret <vscale x 4 x i64> %u
2276 }
2277
2278 declare <vscale x 8 x i64> @llvm.vp.mul.nxv8i64(<vscale x 8 x i64>, <vscale x 8 x i64>, <vscale x 8 x i1>, i32)
2279 declare <vscale x 8 x i64> @llvm.vp.add.nxv8i64(<vscale x 8 x i64>, <vscale x 8 x i64>, <vscale x 8 x i1>, i32)
2280 declare <vscale x 8 x i64> @llvm.vp.merge.nxv8i64(<vscale x 8 x i1>, <vscale x 8 x i64>, <vscale x 8 x i64>, i32)
2281 declare <vscale x 8 x i64> @llvm.vp.select.nxv8i64(<vscale x 8 x i1>, <vscale x 8 x i64>, <vscale x 8 x i64>, i32)
2282
2283 define <vscale x 8 x i64> @vmadd_vv_nxv8i64(<vscale x 8 x i64> %a, <vscale x 8 x i64> %b, <vscale x 8 x i64> %c,  <vscale x 8 x i1> %m, i32 zeroext %evl) {
2284 ; CHECK-LABEL: vmadd_vv_nxv8i64:
2285 ; CHECK:       # %bb.0:
2286 ; CHECK-NEXT:    vl8re64.v v24, (a0)
2287 ; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2288 ; CHECK-NEXT:    vmacc.vv v24, v8, v16
2289 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, tu, ma
2290 ; CHECK-NEXT:    vmerge.vvm v8, v8, v24, v0
2291 ; CHECK-NEXT:    ret
2292   %splat = insertelement <vscale x 8 x i1> poison, i1 -1, i32 0
2293   %allones = shufflevector <vscale x 8 x i1> %splat, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
2294   %x = call <vscale x 8 x i64> @llvm.vp.mul.nxv8i64(<vscale x 8 x i64> %a, <vscale x 8 x i64> %b, <vscale x 8 x i1> %allones, i32 %evl)
2295   %y = call <vscale x 8 x i64> @llvm.vp.add.nxv8i64(<vscale x 8 x i64> %x, <vscale x 8 x i64> %c, <vscale x 8 x i1> %allones, i32 %evl)
2296   %u = call <vscale x 8 x i64> @llvm.vp.merge.nxv8i64(<vscale x 8 x i1> %m, <vscale x 8 x i64> %y, <vscale x 8 x i64> %a, i32 %evl)
2297   ret <vscale x 8 x i64> %u
2298 }
2299
2300 define <vscale x 8 x i64> @vmadd_vv_nxv8i64_unmasked(<vscale x 8 x i64> %a, <vscale x 8 x i64> %b, <vscale x 8 x i64> %c,  <vscale x 8 x i1> %m, i32 zeroext %evl) {
2301 ; CHECK-LABEL: vmadd_vv_nxv8i64_unmasked:
2302 ; CHECK:       # %bb.0:
2303 ; CHECK-NEXT:    vl8re64.v v24, (a0)
2304 ; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2305 ; CHECK-NEXT:    vmacc.vv v24, v8, v16
2306 ; CHECK-NEXT:    vsetvli zero, zero, e64, m8, tu, ma
2307 ; CHECK-NEXT:    vmv.v.v v8, v24
2308 ; CHECK-NEXT:    ret
2309   %splat = insertelement <vscale x 8 x i1> poison, i1 -1, i32 0
2310   %allones = shufflevector <vscale x 8 x i1> %splat, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
2311   %x = call <vscale x 8 x i64> @llvm.vp.mul.nxv8i64(<vscale x 8 x i64> %a, <vscale x 8 x i64> %b, <vscale x 8 x i1> %allones, i32 %evl)
2312   %y = call <vscale x 8 x i64> @llvm.vp.add.nxv8i64(<vscale x 8 x i64> %x, <vscale x 8 x i64> %c, <vscale x 8 x i1> %allones, i32 %evl)
2313   %u = call <vscale x 8 x i64> @llvm.vp.merge.nxv8i64(<vscale x 8 x i1> %allones, <vscale x 8 x i64> %y, <vscale x 8 x i64> %a, i32 %evl)
2314   ret <vscale x 8 x i64> %u
2315 }
2316
2317 define <vscale x 8 x i64> @vmadd_vx_nxv8i64(<vscale x 8 x i64> %a, i64 %b, <vscale x 8 x i64> %c,  <vscale x 8 x i1> %m, i32 zeroext %evl) {
2318 ; RV32-LABEL: vmadd_vx_nxv8i64:
2319 ; RV32:       # %bb.0:
2320 ; RV32-NEXT:    addi sp, sp, -16
2321 ; RV32-NEXT:    .cfi_def_cfa_offset 16
2322 ; RV32-NEXT:    sw a1, 12(sp)
2323 ; RV32-NEXT:    sw a0, 8(sp)
2324 ; RV32-NEXT:    addi a0, sp, 8
2325 ; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
2326 ; RV32-NEXT:    vlse64.v v24, (a0), zero
2327 ; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
2328 ; RV32-NEXT:    vmadd.vv v24, v8, v16
2329 ; RV32-NEXT:    vsetvli zero, zero, e64, m8, tu, ma
2330 ; RV32-NEXT:    vmerge.vvm v8, v8, v24, v0
2331 ; RV32-NEXT:    addi sp, sp, 16
2332 ; RV32-NEXT:    ret
2333 ;
2334 ; RV64-LABEL: vmadd_vx_nxv8i64:
2335 ; RV64:       # %bb.0:
2336 ; RV64-NEXT:    vsetvli zero, a1, e64, m8, tu, mu
2337 ; RV64-NEXT:    vmadd.vx v8, a0, v16, v0.t
2338 ; RV64-NEXT:    ret
2339   %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0
2340   %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
2341   %splat = insertelement <vscale x 8 x i1> poison, i1 -1, i32 0
2342   %allones = shufflevector <vscale x 8 x i1> %splat, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
2343   %x = call <vscale x 8 x i64> @llvm.vp.mul.nxv8i64(<vscale x 8 x i64> %a, <vscale x 8 x i64> %vb, <vscale x 8 x i1> %allones, i32 %evl)
2344   %y = call <vscale x 8 x i64> @llvm.vp.add.nxv8i64(<vscale x 8 x i64> %x, <vscale x 8 x i64> %c, <vscale x 8 x i1> %allones, i32 %evl)
2345   %u = call <vscale x 8 x i64> @llvm.vp.merge.nxv8i64(<vscale x 8 x i1> %m, <vscale x 8 x i64> %y, <vscale x 8 x i64> %a, i32 %evl)
2346   ret <vscale x 8 x i64> %u
2347 }
2348
2349 define <vscale x 8 x i64> @vmadd_vx_nxv8i64_unmasked(<vscale x 8 x i64> %a, i64 %b, <vscale x 8 x i64> %c,  <vscale x 8 x i1> %m, i32 zeroext %evl) {
2350 ; RV32-LABEL: vmadd_vx_nxv8i64_unmasked:
2351 ; RV32:       # %bb.0:
2352 ; RV32-NEXT:    addi sp, sp, -16
2353 ; RV32-NEXT:    .cfi_def_cfa_offset 16
2354 ; RV32-NEXT:    sw a1, 12(sp)
2355 ; RV32-NEXT:    sw a0, 8(sp)
2356 ; RV32-NEXT:    addi a0, sp, 8
2357 ; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
2358 ; RV32-NEXT:    vlse64.v v24, (a0), zero
2359 ; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
2360 ; RV32-NEXT:    vmadd.vv v24, v8, v16
2361 ; RV32-NEXT:    vsetvli zero, zero, e64, m8, tu, ma
2362 ; RV32-NEXT:    vmv.v.v v8, v24
2363 ; RV32-NEXT:    addi sp, sp, 16
2364 ; RV32-NEXT:    ret
2365 ;
2366 ; RV64-LABEL: vmadd_vx_nxv8i64_unmasked:
2367 ; RV64:       # %bb.0:
2368 ; RV64-NEXT:    vsetvli zero, a1, e64, m8, tu, ma
2369 ; RV64-NEXT:    vmadd.vx v8, a0, v16
2370 ; RV64-NEXT:    ret
2371   %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0
2372   %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
2373   %splat = insertelement <vscale x 8 x i1> poison, i1 -1, i32 0
2374   %allones = shufflevector <vscale x 8 x i1> %splat, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
2375   %x = call <vscale x 8 x i64> @llvm.vp.mul.nxv8i64(<vscale x 8 x i64> %a, <vscale x 8 x i64> %vb, <vscale x 8 x i1> %allones, i32 %evl)
2376   %y = call <vscale x 8 x i64> @llvm.vp.add.nxv8i64(<vscale x 8 x i64> %x, <vscale x 8 x i64> %c, <vscale x 8 x i1> %allones, i32 %evl)
2377   %u = call <vscale x 8 x i64> @llvm.vp.merge.nxv8i64(<vscale x 8 x i1> %allones, <vscale x 8 x i64> %y, <vscale x 8 x i64> %a, i32 %evl)
2378   ret <vscale x 8 x i64> %u
2379 }
2380
2381 define <vscale x 8 x i64> @vmadd_vv_nxv8i64_ta(<vscale x 8 x i64> %a, <vscale x 8 x i64> %b, <vscale x 8 x i64> %c,  <vscale x 8 x i1> %m, i32 zeroext %evl) {
2382 ; CHECK-LABEL: vmadd_vv_nxv8i64_ta:
2383 ; CHECK:       # %bb.0:
2384 ; CHECK-NEXT:    vl8re64.v v24, (a0)
2385 ; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2386 ; CHECK-NEXT:    vmacc.vv v24, v8, v16
2387 ; CHECK-NEXT:    vmerge.vvm v8, v8, v24, v0
2388 ; CHECK-NEXT:    ret
2389   %splat = insertelement <vscale x 8 x i1> poison, i1 -1, i32 0
2390   %allones = shufflevector <vscale x 8 x i1> %splat, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
2391   %x = call <vscale x 8 x i64> @llvm.vp.mul.nxv8i64(<vscale x 8 x i64> %a, <vscale x 8 x i64> %b, <vscale x 8 x i1> %allones, i32 %evl)
2392   %y = call <vscale x 8 x i64> @llvm.vp.add.nxv8i64(<vscale x 8 x i64> %x, <vscale x 8 x i64> %c, <vscale x 8 x i1> %allones, i32 %evl)
2393   %u = call <vscale x 8 x i64> @llvm.vp.select.nxv8i64(<vscale x 8 x i1> %m, <vscale x 8 x i64> %y, <vscale x 8 x i64> %a, i32 %evl)
2394   ret <vscale x 8 x i64> %u
2395 }
2396
2397 define <vscale x 8 x i64> @vmadd_vx_nxv8i64_ta(<vscale x 8 x i64> %a, i64 %b, <vscale x 8 x i64> %c,  <vscale x 8 x i1> %m, i32 zeroext %evl) {
2398 ; RV32-LABEL: vmadd_vx_nxv8i64_ta:
2399 ; RV32:       # %bb.0:
2400 ; RV32-NEXT:    addi sp, sp, -16
2401 ; RV32-NEXT:    .cfi_def_cfa_offset 16
2402 ; RV32-NEXT:    sw a1, 12(sp)
2403 ; RV32-NEXT:    sw a0, 8(sp)
2404 ; RV32-NEXT:    addi a0, sp, 8
2405 ; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
2406 ; RV32-NEXT:    vlse64.v v24, (a0), zero
2407 ; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
2408 ; RV32-NEXT:    vmadd.vv v24, v8, v16
2409 ; RV32-NEXT:    vmerge.vvm v8, v8, v24, v0
2410 ; RV32-NEXT:    addi sp, sp, 16
2411 ; RV32-NEXT:    ret
2412 ;
2413 ; RV64-LABEL: vmadd_vx_nxv8i64_ta:
2414 ; RV64:       # %bb.0:
2415 ; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2416 ; RV64-NEXT:    vmacc.vx v16, a0, v8
2417 ; RV64-NEXT:    vmerge.vvm v8, v8, v16, v0
2418 ; RV64-NEXT:    ret
2419   %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0
2420   %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
2421   %splat = insertelement <vscale x 8 x i1> poison, i1 -1, i32 0
2422   %allones = shufflevector <vscale x 8 x i1> %splat, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
2423   %x = call <vscale x 8 x i64> @llvm.vp.mul.nxv8i64(<vscale x 8 x i64> %a, <vscale x 8 x i64> %vb, <vscale x 8 x i1> %allones, i32 %evl)
2424   %y = call <vscale x 8 x i64> @llvm.vp.add.nxv8i64(<vscale x 8 x i64> %x, <vscale x 8 x i64> %c, <vscale x 8 x i1> %allones, i32 %evl)
2425   %u = call <vscale x 8 x i64> @llvm.vp.select.nxv8i64(<vscale x 8 x i1> %m, <vscale x 8 x i64> %y, <vscale x 8 x i64> %a, i32 %evl)
2426   ret <vscale x 8 x i64> %u
2427 }