1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=ilp32d \
3 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
4 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=lp64d \
5 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
7 declare <2 x i8> @llvm.vp.mul.nxv2i8(<2 x i8>, <2 x i8>, <2 x i1>, i32)
8 declare <2 x i8> @llvm.vp.add.nxv2i8(<2 x i8>, <2 x i8>, <2 x i1>, i32)
9 declare <2 x i8> @llvm.vp.merge.nxv2i8(<2 x i1>, <2 x i8>, <2 x i8>, i32)
10 declare <2 x i8> @llvm.vp.select.nxv2i8(<2 x i1>, <2 x i8>, <2 x i8>, i32)
12 define <2 x i8> @vmacc_vv_nxv2i8(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, <2 x i1> %m, i32 zeroext %evl) {
13 ; CHECK-LABEL: vmacc_vv_nxv2i8:
15 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, mu
16 ; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t
17 ; CHECK-NEXT: vmv1r.v v8, v10
19 %splat = insertelement <2 x i1> poison, i1 -1, i32 0
20 %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer
21 %x = call <2 x i8> @llvm.vp.mul.nxv2i8(<2 x i8> %a, <2 x i8> %b, <2 x i1> %allones, i32 %evl)
22 %y = call <2 x i8> @llvm.vp.add.nxv2i8(<2 x i8> %x, <2 x i8> %c, <2 x i1> %allones, i32 %evl)
23 %u = call <2 x i8> @llvm.vp.merge.nxv2i8(<2 x i1> %m, <2 x i8> %y, <2 x i8> %c, i32 %evl)
27 define <2 x i8> @vmacc_vv_nxv2i8_unmasked(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, <2 x i1> %m, i32 zeroext %evl) {
28 ; CHECK-LABEL: vmacc_vv_nxv2i8_unmasked:
30 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, ma
31 ; CHECK-NEXT: vmacc.vv v10, v8, v9
32 ; CHECK-NEXT: vmv1r.v v8, v10
34 %splat = insertelement <2 x i1> poison, i1 -1, i32 0
35 %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer
36 %x = call <2 x i8> @llvm.vp.mul.nxv2i8(<2 x i8> %a, <2 x i8> %b, <2 x i1> %allones, i32 %evl)
37 %y = call <2 x i8> @llvm.vp.add.nxv2i8(<2 x i8> %x, <2 x i8> %c, <2 x i1> %allones, i32 %evl)
38 %u = call <2 x i8> @llvm.vp.merge.nxv2i8(<2 x i1> %allones, <2 x i8> %y, <2 x i8> %c, i32 %evl)
42 define <2 x i8> @vmacc_vx_nxv2i8(<2 x i8> %a, i8 %b, <2 x i8> %c, <2 x i1> %m, i32 zeroext %evl) {
43 ; CHECK-LABEL: vmacc_vx_nxv2i8:
45 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, tu, mu
46 ; CHECK-NEXT: vmacc.vx v9, a0, v8, v0.t
47 ; CHECK-NEXT: vmv1r.v v8, v9
49 %elt.head = insertelement <2 x i8> poison, i8 %b, i32 0
50 %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer
51 %splat = insertelement <2 x i1> poison, i1 -1, i32 0
52 %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer
53 %x = call <2 x i8> @llvm.vp.mul.nxv2i8(<2 x i8> %a, <2 x i8> %vb, <2 x i1> %allones, i32 %evl)
54 %y = call <2 x i8> @llvm.vp.add.nxv2i8(<2 x i8> %x, <2 x i8> %c, <2 x i1> %allones, i32 %evl)
55 %u = call <2 x i8> @llvm.vp.merge.nxv2i8(<2 x i1> %m, <2 x i8> %y, <2 x i8> %c, i32 %evl)
59 define <2 x i8> @vmacc_vx_nxv2i8_unmasked(<2 x i8> %a, i8 %b, <2 x i8> %c, <2 x i1> %m, i32 zeroext %evl) {
60 ; CHECK-LABEL: vmacc_vx_nxv2i8_unmasked:
62 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, tu, ma
63 ; CHECK-NEXT: vmacc.vx v9, a0, v8
64 ; CHECK-NEXT: vmv1r.v v8, v9
66 %elt.head = insertelement <2 x i8> poison, i8 %b, i32 0
67 %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer
68 %splat = insertelement <2 x i1> poison, i1 -1, i32 0
69 %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer
70 %x = call <2 x i8> @llvm.vp.mul.nxv2i8(<2 x i8> %a, <2 x i8> %vb, <2 x i1> %allones, i32 %evl)
71 %y = call <2 x i8> @llvm.vp.add.nxv2i8(<2 x i8> %x, <2 x i8> %c, <2 x i1> %allones, i32 %evl)
72 %u = call <2 x i8> @llvm.vp.merge.nxv2i8(<2 x i1> %allones, <2 x i8> %y, <2 x i8> %c, i32 %evl)
76 define <2 x i8> @vmacc_vv_nxv2i8_ta(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, <2 x i1> %m, i32 zeroext %evl) {
77 ; CHECK-LABEL: vmacc_vv_nxv2i8_ta:
79 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu
80 ; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t
81 ; CHECK-NEXT: vmv1r.v v8, v10
83 %splat = insertelement <2 x i1> poison, i1 -1, i32 0
84 %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer
85 %x = call <2 x i8> @llvm.vp.mul.nxv2i8(<2 x i8> %a, <2 x i8> %b, <2 x i1> %allones, i32 %evl)
86 %y = call <2 x i8> @llvm.vp.add.nxv2i8(<2 x i8> %x, <2 x i8> %c, <2 x i1> %allones, i32 %evl)
87 %u = call <2 x i8> @llvm.vp.select.nxv2i8(<2 x i1> %m, <2 x i8> %y, <2 x i8> %c, i32 %evl)
91 define <2 x i8> @vmacc_vx_nxv2i8_ta(<2 x i8> %a, i8 %b, <2 x i8> %c, <2 x i1> %m, i32 zeroext %evl) {
92 ; CHECK-LABEL: vmacc_vx_nxv2i8_ta:
94 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu
95 ; CHECK-NEXT: vmacc.vx v9, a0, v8, v0.t
96 ; CHECK-NEXT: vmv1r.v v8, v9
98 %elt.head = insertelement <2 x i8> poison, i8 %b, i32 0
99 %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer
100 %splat = insertelement <2 x i1> poison, i1 -1, i32 0
101 %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer
102 %x = call <2 x i8> @llvm.vp.mul.nxv2i8(<2 x i8> %a, <2 x i8> %vb, <2 x i1> %allones, i32 %evl)
103 %y = call <2 x i8> @llvm.vp.add.nxv2i8(<2 x i8> %x, <2 x i8> %c, <2 x i1> %allones, i32 %evl)
104 %u = call <2 x i8> @llvm.vp.select.nxv2i8(<2 x i1> %m, <2 x i8> %y, <2 x i8> %c, i32 %evl)
108 declare <4 x i8> @llvm.vp.mul.nxv4i8(<4 x i8>, <4 x i8>, <4 x i1>, i32)
109 declare <4 x i8> @llvm.vp.add.nxv4i8(<4 x i8>, <4 x i8>, <4 x i1>, i32)
110 declare <4 x i8> @llvm.vp.merge.nxv4i8(<4 x i1>, <4 x i8>, <4 x i8>, i32)
111 declare <4 x i8> @llvm.vp.select.nxv4i8(<4 x i1>, <4 x i8>, <4 x i8>, i32)
113 define <4 x i8> @vmacc_vv_nxv4i8(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c, <4 x i1> %m, i32 zeroext %evl) {
114 ; CHECK-LABEL: vmacc_vv_nxv4i8:
116 ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, tu, mu
117 ; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t
118 ; CHECK-NEXT: vmv1r.v v8, v10
120 %splat = insertelement <4 x i1> poison, i1 -1, i32 0
121 %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer
122 %x = call <4 x i8> @llvm.vp.mul.nxv4i8(<4 x i8> %a, <4 x i8> %b, <4 x i1> %allones, i32 %evl)
123 %y = call <4 x i8> @llvm.vp.add.nxv4i8(<4 x i8> %x, <4 x i8> %c, <4 x i1> %allones, i32 %evl)
124 %u = call <4 x i8> @llvm.vp.merge.nxv4i8(<4 x i1> %m, <4 x i8> %y, <4 x i8> %c, i32 %evl)
128 define <4 x i8> @vmacc_vv_nxv4i8_unmasked(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c, <4 x i1> %m, i32 zeroext %evl) {
129 ; CHECK-LABEL: vmacc_vv_nxv4i8_unmasked:
131 ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, tu, ma
132 ; CHECK-NEXT: vmacc.vv v10, v8, v9
133 ; CHECK-NEXT: vmv1r.v v8, v10
135 %splat = insertelement <4 x i1> poison, i1 -1, i32 0
136 %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer
137 %x = call <4 x i8> @llvm.vp.mul.nxv4i8(<4 x i8> %a, <4 x i8> %b, <4 x i1> %allones, i32 %evl)
138 %y = call <4 x i8> @llvm.vp.add.nxv4i8(<4 x i8> %x, <4 x i8> %c, <4 x i1> %allones, i32 %evl)
139 %u = call <4 x i8> @llvm.vp.merge.nxv4i8(<4 x i1> %allones, <4 x i8> %y, <4 x i8> %c, i32 %evl)
143 define <4 x i8> @vmacc_vx_nxv4i8(<4 x i8> %a, i8 %b, <4 x i8> %c, <4 x i1> %m, i32 zeroext %evl) {
144 ; CHECK-LABEL: vmacc_vx_nxv4i8:
146 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, tu, mu
147 ; CHECK-NEXT: vmacc.vx v9, a0, v8, v0.t
148 ; CHECK-NEXT: vmv1r.v v8, v9
150 %elt.head = insertelement <4 x i8> poison, i8 %b, i32 0
151 %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer
152 %splat = insertelement <4 x i1> poison, i1 -1, i32 0
153 %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer
154 %x = call <4 x i8> @llvm.vp.mul.nxv4i8(<4 x i8> %a, <4 x i8> %vb, <4 x i1> %allones, i32 %evl)
155 %y = call <4 x i8> @llvm.vp.add.nxv4i8(<4 x i8> %x, <4 x i8> %c, <4 x i1> %allones, i32 %evl)
156 %u = call <4 x i8> @llvm.vp.merge.nxv4i8(<4 x i1> %m, <4 x i8> %y, <4 x i8> %c, i32 %evl)
160 define <4 x i8> @vmacc_vx_nxv4i8_unmasked(<4 x i8> %a, i8 %b, <4 x i8> %c, <4 x i1> %m, i32 zeroext %evl) {
161 ; CHECK-LABEL: vmacc_vx_nxv4i8_unmasked:
163 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, tu, ma
164 ; CHECK-NEXT: vmacc.vx v9, a0, v8
165 ; CHECK-NEXT: vmv1r.v v8, v9
167 %elt.head = insertelement <4 x i8> poison, i8 %b, i32 0
168 %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer
169 %splat = insertelement <4 x i1> poison, i1 -1, i32 0
170 %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer
171 %x = call <4 x i8> @llvm.vp.mul.nxv4i8(<4 x i8> %a, <4 x i8> %vb, <4 x i1> %allones, i32 %evl)
172 %y = call <4 x i8> @llvm.vp.add.nxv4i8(<4 x i8> %x, <4 x i8> %c, <4 x i1> %allones, i32 %evl)
173 %u = call <4 x i8> @llvm.vp.merge.nxv4i8(<4 x i1> %allones, <4 x i8> %y, <4 x i8> %c, i32 %evl)
177 define <4 x i8> @vmacc_vv_nxv4i8_ta(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c, <4 x i1> %m, i32 zeroext %evl) {
178 ; CHECK-LABEL: vmacc_vv_nxv4i8_ta:
180 ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu
181 ; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t
182 ; CHECK-NEXT: vmv1r.v v8, v10
184 %splat = insertelement <4 x i1> poison, i1 -1, i32 0
185 %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer
186 %x = call <4 x i8> @llvm.vp.mul.nxv4i8(<4 x i8> %a, <4 x i8> %b, <4 x i1> %allones, i32 %evl)
187 %y = call <4 x i8> @llvm.vp.add.nxv4i8(<4 x i8> %x, <4 x i8> %c, <4 x i1> %allones, i32 %evl)
188 %u = call <4 x i8> @llvm.vp.select.nxv4i8(<4 x i1> %m, <4 x i8> %y, <4 x i8> %c, i32 %evl)
192 define <4 x i8> @vmacc_vx_nxv4i8_ta(<4 x i8> %a, i8 %b, <4 x i8> %c, <4 x i1> %m, i32 zeroext %evl) {
193 ; CHECK-LABEL: vmacc_vx_nxv4i8_ta:
195 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu
196 ; CHECK-NEXT: vmacc.vx v9, a0, v8, v0.t
197 ; CHECK-NEXT: vmv1r.v v8, v9
199 %elt.head = insertelement <4 x i8> poison, i8 %b, i32 0
200 %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer
201 %splat = insertelement <4 x i1> poison, i1 -1, i32 0
202 %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer
203 %x = call <4 x i8> @llvm.vp.mul.nxv4i8(<4 x i8> %a, <4 x i8> %vb, <4 x i1> %allones, i32 %evl)
204 %y = call <4 x i8> @llvm.vp.add.nxv4i8(<4 x i8> %x, <4 x i8> %c, <4 x i1> %allones, i32 %evl)
205 %u = call <4 x i8> @llvm.vp.select.nxv4i8(<4 x i1> %m, <4 x i8> %y, <4 x i8> %c, i32 %evl)
209 declare <8 x i8> @llvm.vp.mul.nxv8i8(<8 x i8>, <8 x i8>, <8 x i1>, i32)
210 declare <8 x i8> @llvm.vp.add.nxv8i8(<8 x i8>, <8 x i8>, <8 x i1>, i32)
211 declare <8 x i8> @llvm.vp.merge.nxv8i8(<8 x i1>, <8 x i8>, <8 x i8>, i32)
212 declare <8 x i8> @llvm.vp.select.nxv8i8(<8 x i1>, <8 x i8>, <8 x i8>, i32)
214 define <8 x i8> @vmacc_vv_nxv8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c, <8 x i1> %m, i32 zeroext %evl) {
215 ; CHECK-LABEL: vmacc_vv_nxv8i8:
217 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, mu
218 ; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t
219 ; CHECK-NEXT: vmv1r.v v8, v10
221 %splat = insertelement <8 x i1> poison, i1 -1, i32 0
222 %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer
223 %x = call <8 x i8> @llvm.vp.mul.nxv8i8(<8 x i8> %a, <8 x i8> %b, <8 x i1> %allones, i32 %evl)
224 %y = call <8 x i8> @llvm.vp.add.nxv8i8(<8 x i8> %x, <8 x i8> %c, <8 x i1> %allones, i32 %evl)
225 %u = call <8 x i8> @llvm.vp.merge.nxv8i8(<8 x i1> %m, <8 x i8> %y, <8 x i8> %c, i32 %evl)
229 define <8 x i8> @vmacc_vv_nxv8i8_unmasked(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c, <8 x i1> %m, i32 zeroext %evl) {
230 ; CHECK-LABEL: vmacc_vv_nxv8i8_unmasked:
232 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, ma
233 ; CHECK-NEXT: vmacc.vv v10, v8, v9
234 ; CHECK-NEXT: vmv1r.v v8, v10
236 %splat = insertelement <8 x i1> poison, i1 -1, i32 0
237 %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer
238 %x = call <8 x i8> @llvm.vp.mul.nxv8i8(<8 x i8> %a, <8 x i8> %b, <8 x i1> %allones, i32 %evl)
239 %y = call <8 x i8> @llvm.vp.add.nxv8i8(<8 x i8> %x, <8 x i8> %c, <8 x i1> %allones, i32 %evl)
240 %u = call <8 x i8> @llvm.vp.merge.nxv8i8(<8 x i1> %allones, <8 x i8> %y, <8 x i8> %c, i32 %evl)
244 define <8 x i8> @vmacc_vx_nxv8i8(<8 x i8> %a, i8 %b, <8 x i8> %c, <8 x i1> %m, i32 zeroext %evl) {
245 ; CHECK-LABEL: vmacc_vx_nxv8i8:
247 ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, tu, mu
248 ; CHECK-NEXT: vmacc.vx v9, a0, v8, v0.t
249 ; CHECK-NEXT: vmv1r.v v8, v9
251 %elt.head = insertelement <8 x i8> poison, i8 %b, i32 0
252 %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer
253 %splat = insertelement <8 x i1> poison, i1 -1, i32 0
254 %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer
255 %x = call <8 x i8> @llvm.vp.mul.nxv8i8(<8 x i8> %a, <8 x i8> %vb, <8 x i1> %allones, i32 %evl)
256 %y = call <8 x i8> @llvm.vp.add.nxv8i8(<8 x i8> %x, <8 x i8> %c, <8 x i1> %allones, i32 %evl)
257 %u = call <8 x i8> @llvm.vp.merge.nxv8i8(<8 x i1> %m, <8 x i8> %y, <8 x i8> %c, i32 %evl)
261 define <8 x i8> @vmacc_vx_nxv8i8_unmasked(<8 x i8> %a, i8 %b, <8 x i8> %c, <8 x i1> %m, i32 zeroext %evl) {
262 ; CHECK-LABEL: vmacc_vx_nxv8i8_unmasked:
264 ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, tu, ma
265 ; CHECK-NEXT: vmacc.vx v9, a0, v8
266 ; CHECK-NEXT: vmv1r.v v8, v9
268 %elt.head = insertelement <8 x i8> poison, i8 %b, i32 0
269 %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer
270 %splat = insertelement <8 x i1> poison, i1 -1, i32 0
271 %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer
272 %x = call <8 x i8> @llvm.vp.mul.nxv8i8(<8 x i8> %a, <8 x i8> %vb, <8 x i1> %allones, i32 %evl)
273 %y = call <8 x i8> @llvm.vp.add.nxv8i8(<8 x i8> %x, <8 x i8> %c, <8 x i1> %allones, i32 %evl)
274 %u = call <8 x i8> @llvm.vp.merge.nxv8i8(<8 x i1> %allones, <8 x i8> %y, <8 x i8> %c, i32 %evl)
278 define <8 x i8> @vmacc_vv_nxv8i8_ta(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c, <8 x i1> %m, i32 zeroext %evl) {
279 ; CHECK-LABEL: vmacc_vv_nxv8i8_ta:
281 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu
282 ; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t
283 ; CHECK-NEXT: vmv1r.v v8, v10
285 %splat = insertelement <8 x i1> poison, i1 -1, i32 0
286 %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer
287 %x = call <8 x i8> @llvm.vp.mul.nxv8i8(<8 x i8> %a, <8 x i8> %b, <8 x i1> %allones, i32 %evl)
288 %y = call <8 x i8> @llvm.vp.add.nxv8i8(<8 x i8> %x, <8 x i8> %c, <8 x i1> %allones, i32 %evl)
289 %u = call <8 x i8> @llvm.vp.select.nxv8i8(<8 x i1> %m, <8 x i8> %y, <8 x i8> %c, i32 %evl)
293 define <8 x i8> @vmacc_vx_nxv8i8_ta(<8 x i8> %a, i8 %b, <8 x i8> %c, <8 x i1> %m, i32 zeroext %evl) {
294 ; CHECK-LABEL: vmacc_vx_nxv8i8_ta:
296 ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu
297 ; CHECK-NEXT: vmacc.vx v9, a0, v8, v0.t
298 ; CHECK-NEXT: vmv1r.v v8, v9
300 %elt.head = insertelement <8 x i8> poison, i8 %b, i32 0
301 %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer
302 %splat = insertelement <8 x i1> poison, i1 -1, i32 0
303 %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer
304 %x = call <8 x i8> @llvm.vp.mul.nxv8i8(<8 x i8> %a, <8 x i8> %vb, <8 x i1> %allones, i32 %evl)
305 %y = call <8 x i8> @llvm.vp.add.nxv8i8(<8 x i8> %x, <8 x i8> %c, <8 x i1> %allones, i32 %evl)
306 %u = call <8 x i8> @llvm.vp.select.nxv8i8(<8 x i1> %m, <8 x i8> %y, <8 x i8> %c, i32 %evl)
310 declare <16 x i8> @llvm.vp.mul.nxv16i8(<16 x i8>, <16 x i8>, <16 x i1>, i32)
311 declare <16 x i8> @llvm.vp.add.nxv16i8(<16 x i8>, <16 x i8>, <16 x i1>, i32)
312 declare <16 x i8> @llvm.vp.merge.nxv16i8(<16 x i1>, <16 x i8>, <16 x i8>, i32)
313 declare <16 x i8> @llvm.vp.select.nxv16i8(<16 x i1>, <16 x i8>, <16 x i8>, i32)
315 define <16 x i8> @vmacc_vv_nxv16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i1> %m, i32 zeroext %evl) {
316 ; CHECK-LABEL: vmacc_vv_nxv16i8:
318 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, tu, mu
319 ; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t
320 ; CHECK-NEXT: vmv1r.v v8, v10
322 %splat = insertelement <16 x i1> poison, i1 -1, i32 0
323 %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer
324 %x = call <16 x i8> @llvm.vp.mul.nxv16i8(<16 x i8> %a, <16 x i8> %b, <16 x i1> %allones, i32 %evl)
325 %y = call <16 x i8> @llvm.vp.add.nxv16i8(<16 x i8> %x, <16 x i8> %c, <16 x i1> %allones, i32 %evl)
326 %u = call <16 x i8> @llvm.vp.merge.nxv16i8(<16 x i1> %m, <16 x i8> %y, <16 x i8> %c, i32 %evl)
330 define <16 x i8> @vmacc_vv_nxv16i8_unmasked(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i1> %m, i32 zeroext %evl) {
331 ; CHECK-LABEL: vmacc_vv_nxv16i8_unmasked:
333 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, tu, ma
334 ; CHECK-NEXT: vmacc.vv v10, v8, v9
335 ; CHECK-NEXT: vmv1r.v v8, v10
337 %splat = insertelement <16 x i1> poison, i1 -1, i32 0
338 %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer
339 %x = call <16 x i8> @llvm.vp.mul.nxv16i8(<16 x i8> %a, <16 x i8> %b, <16 x i1> %allones, i32 %evl)
340 %y = call <16 x i8> @llvm.vp.add.nxv16i8(<16 x i8> %x, <16 x i8> %c, <16 x i1> %allones, i32 %evl)
341 %u = call <16 x i8> @llvm.vp.merge.nxv16i8(<16 x i1> %allones, <16 x i8> %y, <16 x i8> %c, i32 %evl)
345 define <16 x i8> @vmacc_vx_nxv16i8(<16 x i8> %a, i8 %b, <16 x i8> %c, <16 x i1> %m, i32 zeroext %evl) {
346 ; CHECK-LABEL: vmacc_vx_nxv16i8:
348 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, tu, mu
349 ; CHECK-NEXT: vmacc.vx v9, a0, v8, v0.t
350 ; CHECK-NEXT: vmv1r.v v8, v9
352 %elt.head = insertelement <16 x i8> poison, i8 %b, i32 0
353 %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer
354 %splat = insertelement <16 x i1> poison, i1 -1, i32 0
355 %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer
356 %x = call <16 x i8> @llvm.vp.mul.nxv16i8(<16 x i8> %a, <16 x i8> %vb, <16 x i1> %allones, i32 %evl)
357 %y = call <16 x i8> @llvm.vp.add.nxv16i8(<16 x i8> %x, <16 x i8> %c, <16 x i1> %allones, i32 %evl)
358 %u = call <16 x i8> @llvm.vp.merge.nxv16i8(<16 x i1> %m, <16 x i8> %y, <16 x i8> %c, i32 %evl)
362 define <16 x i8> @vmacc_vx_nxv16i8_unmasked(<16 x i8> %a, i8 %b, <16 x i8> %c, <16 x i1> %m, i32 zeroext %evl) {
363 ; CHECK-LABEL: vmacc_vx_nxv16i8_unmasked:
365 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, tu, ma
366 ; CHECK-NEXT: vmacc.vx v9, a0, v8
367 ; CHECK-NEXT: vmv1r.v v8, v9
369 %elt.head = insertelement <16 x i8> poison, i8 %b, i32 0
370 %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer
371 %splat = insertelement <16 x i1> poison, i1 -1, i32 0
372 %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer
373 %x = call <16 x i8> @llvm.vp.mul.nxv16i8(<16 x i8> %a, <16 x i8> %vb, <16 x i1> %allones, i32 %evl)
374 %y = call <16 x i8> @llvm.vp.add.nxv16i8(<16 x i8> %x, <16 x i8> %c, <16 x i1> %allones, i32 %evl)
375 %u = call <16 x i8> @llvm.vp.merge.nxv16i8(<16 x i1> %allones, <16 x i8> %y, <16 x i8> %c, i32 %evl)
379 define <16 x i8> @vmacc_vv_nxv16i8_ta(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i1> %m, i32 zeroext %evl) {
380 ; CHECK-LABEL: vmacc_vv_nxv16i8_ta:
382 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu
383 ; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t
384 ; CHECK-NEXT: vmv.v.v v8, v10
386 %splat = insertelement <16 x i1> poison, i1 -1, i32 0
387 %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer
388 %x = call <16 x i8> @llvm.vp.mul.nxv16i8(<16 x i8> %a, <16 x i8> %b, <16 x i1> %allones, i32 %evl)
389 %y = call <16 x i8> @llvm.vp.add.nxv16i8(<16 x i8> %x, <16 x i8> %c, <16 x i1> %allones, i32 %evl)
390 %u = call <16 x i8> @llvm.vp.select.nxv16i8(<16 x i1> %m, <16 x i8> %y, <16 x i8> %c, i32 %evl)
394 define <16 x i8> @vmacc_vx_nxv16i8_ta(<16 x i8> %a, i8 %b, <16 x i8> %c, <16 x i1> %m, i32 zeroext %evl) {
395 ; CHECK-LABEL: vmacc_vx_nxv16i8_ta:
397 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu
398 ; CHECK-NEXT: vmacc.vx v9, a0, v8, v0.t
399 ; CHECK-NEXT: vmv.v.v v8, v9
401 %elt.head = insertelement <16 x i8> poison, i8 %b, i32 0
402 %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer
403 %splat = insertelement <16 x i1> poison, i1 -1, i32 0
404 %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer
405 %x = call <16 x i8> @llvm.vp.mul.nxv16i8(<16 x i8> %a, <16 x i8> %vb, <16 x i1> %allones, i32 %evl)
406 %y = call <16 x i8> @llvm.vp.add.nxv16i8(<16 x i8> %x, <16 x i8> %c, <16 x i1> %allones, i32 %evl)
407 %u = call <16 x i8> @llvm.vp.select.nxv16i8(<16 x i1> %m, <16 x i8> %y, <16 x i8> %c, i32 %evl)
411 declare <32 x i8> @llvm.vp.mul.nxv32i8(<32 x i8>, <32 x i8>, <32 x i1>, i32)
412 declare <32 x i8> @llvm.vp.add.nxv32i8(<32 x i8>, <32 x i8>, <32 x i1>, i32)
413 declare <32 x i8> @llvm.vp.merge.nxv32i8(<32 x i1>, <32 x i8>, <32 x i8>, i32)
414 declare <32 x i8> @llvm.vp.select.nxv32i8(<32 x i1>, <32 x i8>, <32 x i8>, i32)
416 define <32 x i8> @vmacc_vv_nxv32i8(<32 x i8> %a, <32 x i8> %b, <32 x i8> %c, <32 x i1> %m, i32 zeroext %evl) {
417 ; CHECK-LABEL: vmacc_vv_nxv32i8:
419 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, tu, mu
420 ; CHECK-NEXT: vmacc.vv v12, v8, v10, v0.t
421 ; CHECK-NEXT: vmv2r.v v8, v12
423 %splat = insertelement <32 x i1> poison, i1 -1, i32 0
424 %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer
425 %x = call <32 x i8> @llvm.vp.mul.nxv32i8(<32 x i8> %a, <32 x i8> %b, <32 x i1> %allones, i32 %evl)
426 %y = call <32 x i8> @llvm.vp.add.nxv32i8(<32 x i8> %x, <32 x i8> %c, <32 x i1> %allones, i32 %evl)
427 %u = call <32 x i8> @llvm.vp.merge.nxv32i8(<32 x i1> %m, <32 x i8> %y, <32 x i8> %c, i32 %evl)
431 define <32 x i8> @vmacc_vv_nxv32i8_unmasked(<32 x i8> %a, <32 x i8> %b, <32 x i8> %c, <32 x i1> %m, i32 zeroext %evl) {
432 ; CHECK-LABEL: vmacc_vv_nxv32i8_unmasked:
434 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, tu, ma
435 ; CHECK-NEXT: vmacc.vv v12, v8, v10
436 ; CHECK-NEXT: vmv2r.v v8, v12
438 %splat = insertelement <32 x i1> poison, i1 -1, i32 0
439 %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer
440 %x = call <32 x i8> @llvm.vp.mul.nxv32i8(<32 x i8> %a, <32 x i8> %b, <32 x i1> %allones, i32 %evl)
441 %y = call <32 x i8> @llvm.vp.add.nxv32i8(<32 x i8> %x, <32 x i8> %c, <32 x i1> %allones, i32 %evl)
442 %u = call <32 x i8> @llvm.vp.merge.nxv32i8(<32 x i1> %allones, <32 x i8> %y, <32 x i8> %c, i32 %evl)
446 define <32 x i8> @vmacc_vx_nxv32i8(<32 x i8> %a, i8 %b, <32 x i8> %c, <32 x i1> %m, i32 zeroext %evl) {
447 ; CHECK-LABEL: vmacc_vx_nxv32i8:
449 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, tu, mu
450 ; CHECK-NEXT: vmacc.vx v10, a0, v8, v0.t
451 ; CHECK-NEXT: vmv2r.v v8, v10
453 %elt.head = insertelement <32 x i8> poison, i8 %b, i32 0
454 %vb = shufflevector <32 x i8> %elt.head, <32 x i8> poison, <32 x i32> zeroinitializer
455 %splat = insertelement <32 x i1> poison, i1 -1, i32 0
456 %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer
457 %x = call <32 x i8> @llvm.vp.mul.nxv32i8(<32 x i8> %a, <32 x i8> %vb, <32 x i1> %allones, i32 %evl)
458 %y = call <32 x i8> @llvm.vp.add.nxv32i8(<32 x i8> %x, <32 x i8> %c, <32 x i1> %allones, i32 %evl)
459 %u = call <32 x i8> @llvm.vp.merge.nxv32i8(<32 x i1> %m, <32 x i8> %y, <32 x i8> %c, i32 %evl)
463 define <32 x i8> @vmacc_vx_nxv32i8_unmasked(<32 x i8> %a, i8 %b, <32 x i8> %c, <32 x i1> %m, i32 zeroext %evl) {
464 ; CHECK-LABEL: vmacc_vx_nxv32i8_unmasked:
466 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, tu, ma
467 ; CHECK-NEXT: vmacc.vx v10, a0, v8
468 ; CHECK-NEXT: vmv2r.v v8, v10
470 %elt.head = insertelement <32 x i8> poison, i8 %b, i32 0
471 %vb = shufflevector <32 x i8> %elt.head, <32 x i8> poison, <32 x i32> zeroinitializer
472 %splat = insertelement <32 x i1> poison, i1 -1, i32 0
473 %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer
474 %x = call <32 x i8> @llvm.vp.mul.nxv32i8(<32 x i8> %a, <32 x i8> %vb, <32 x i1> %allones, i32 %evl)
475 %y = call <32 x i8> @llvm.vp.add.nxv32i8(<32 x i8> %x, <32 x i8> %c, <32 x i1> %allones, i32 %evl)
476 %u = call <32 x i8> @llvm.vp.merge.nxv32i8(<32 x i1> %allones, <32 x i8> %y, <32 x i8> %c, i32 %evl)
480 define <32 x i8> @vmacc_vv_nxv32i8_ta(<32 x i8> %a, <32 x i8> %b, <32 x i8> %c, <32 x i1> %m, i32 zeroext %evl) {
481 ; CHECK-LABEL: vmacc_vv_nxv32i8_ta:
483 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu
484 ; CHECK-NEXT: vmacc.vv v12, v8, v10, v0.t
485 ; CHECK-NEXT: vmv.v.v v8, v12
487 %splat = insertelement <32 x i1> poison, i1 -1, i32 0
488 %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer
489 %x = call <32 x i8> @llvm.vp.mul.nxv32i8(<32 x i8> %a, <32 x i8> %b, <32 x i1> %allones, i32 %evl)
490 %y = call <32 x i8> @llvm.vp.add.nxv32i8(<32 x i8> %x, <32 x i8> %c, <32 x i1> %allones, i32 %evl)
491 %u = call <32 x i8> @llvm.vp.select.nxv32i8(<32 x i1> %m, <32 x i8> %y, <32 x i8> %c, i32 %evl)
495 define <32 x i8> @vmacc_vx_nxv32i8_ta(<32 x i8> %a, i8 %b, <32 x i8> %c, <32 x i1> %m, i32 zeroext %evl) {
496 ; CHECK-LABEL: vmacc_vx_nxv32i8_ta:
498 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu
499 ; CHECK-NEXT: vmacc.vx v10, a0, v8, v0.t
500 ; CHECK-NEXT: vmv.v.v v8, v10
502 %elt.head = insertelement <32 x i8> poison, i8 %b, i32 0
503 %vb = shufflevector <32 x i8> %elt.head, <32 x i8> poison, <32 x i32> zeroinitializer
504 %splat = insertelement <32 x i1> poison, i1 -1, i32 0
505 %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer
506 %x = call <32 x i8> @llvm.vp.mul.nxv32i8(<32 x i8> %a, <32 x i8> %vb, <32 x i1> %allones, i32 %evl)
507 %y = call <32 x i8> @llvm.vp.add.nxv32i8(<32 x i8> %x, <32 x i8> %c, <32 x i1> %allones, i32 %evl)
508 %u = call <32 x i8> @llvm.vp.select.nxv32i8(<32 x i1> %m, <32 x i8> %y, <32 x i8> %c, i32 %evl)
512 declare <64 x i8> @llvm.vp.mul.nxv64i8(<64 x i8>, <64 x i8>, <64 x i1>, i32)
513 declare <64 x i8> @llvm.vp.add.nxv64i8(<64 x i8>, <64 x i8>, <64 x i1>, i32)
514 declare <64 x i8> @llvm.vp.merge.nxv64i8(<64 x i1>, <64 x i8>, <64 x i8>, i32)
515 declare <64 x i8> @llvm.vp.select.nxv64i8(<64 x i1>, <64 x i8>, <64 x i8>, i32)
517 define <64 x i8> @vmacc_vv_nxv64i8(<64 x i8> %a, <64 x i8> %b, <64 x i8> %c, <64 x i1> %m, i32 zeroext %evl) {
518 ; CHECK-LABEL: vmacc_vv_nxv64i8:
520 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, tu, mu
521 ; CHECK-NEXT: vmacc.vv v16, v8, v12, v0.t
522 ; CHECK-NEXT: vmv4r.v v8, v16
524 %splat = insertelement <64 x i1> poison, i1 -1, i32 0
525 %allones = shufflevector <64 x i1> %splat, <64 x i1> poison, <64 x i32> zeroinitializer
526 %x = call <64 x i8> @llvm.vp.mul.nxv64i8(<64 x i8> %a, <64 x i8> %b, <64 x i1> %allones, i32 %evl)
527 %y = call <64 x i8> @llvm.vp.add.nxv64i8(<64 x i8> %x, <64 x i8> %c, <64 x i1> %allones, i32 %evl)
528 %u = call <64 x i8> @llvm.vp.merge.nxv64i8(<64 x i1> %m, <64 x i8> %y, <64 x i8> %c, i32 %evl)
532 define <64 x i8> @vmacc_vv_nxv64i8_unmasked(<64 x i8> %a, <64 x i8> %b, <64 x i8> %c, <64 x i1> %m, i32 zeroext %evl) {
533 ; CHECK-LABEL: vmacc_vv_nxv64i8_unmasked:
535 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, tu, ma
536 ; CHECK-NEXT: vmacc.vv v16, v8, v12
537 ; CHECK-NEXT: vmv4r.v v8, v16
539 %splat = insertelement <64 x i1> poison, i1 -1, i32 0
540 %allones = shufflevector <64 x i1> %splat, <64 x i1> poison, <64 x i32> zeroinitializer
541 %x = call <64 x i8> @llvm.vp.mul.nxv64i8(<64 x i8> %a, <64 x i8> %b, <64 x i1> %allones, i32 %evl)
542 %y = call <64 x i8> @llvm.vp.add.nxv64i8(<64 x i8> %x, <64 x i8> %c, <64 x i1> %allones, i32 %evl)
543 %u = call <64 x i8> @llvm.vp.merge.nxv64i8(<64 x i1> %allones, <64 x i8> %y, <64 x i8> %c, i32 %evl)
547 define <64 x i8> @vmacc_vx_nxv64i8(<64 x i8> %a, i8 %b, <64 x i8> %c, <64 x i1> %m, i32 zeroext %evl) {
548 ; CHECK-LABEL: vmacc_vx_nxv64i8:
550 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, tu, mu
551 ; CHECK-NEXT: vmacc.vx v12, a0, v8, v0.t
552 ; CHECK-NEXT: vmv4r.v v8, v12
554 %elt.head = insertelement <64 x i8> poison, i8 %b, i32 0
555 %vb = shufflevector <64 x i8> %elt.head, <64 x i8> poison, <64 x i32> zeroinitializer
556 %splat = insertelement <64 x i1> poison, i1 -1, i32 0
557 %allones = shufflevector <64 x i1> %splat, <64 x i1> poison, <64 x i32> zeroinitializer
558 %x = call <64 x i8> @llvm.vp.mul.nxv64i8(<64 x i8> %a, <64 x i8> %vb, <64 x i1> %allones, i32 %evl)
559 %y = call <64 x i8> @llvm.vp.add.nxv64i8(<64 x i8> %x, <64 x i8> %c, <64 x i1> %allones, i32 %evl)
560 %u = call <64 x i8> @llvm.vp.merge.nxv64i8(<64 x i1> %m, <64 x i8> %y, <64 x i8> %c, i32 %evl)
564 define <64 x i8> @vmacc_vx_nxv64i8_unmasked(<64 x i8> %a, i8 %b, <64 x i8> %c, <64 x i1> %m, i32 zeroext %evl) {
565 ; CHECK-LABEL: vmacc_vx_nxv64i8_unmasked:
567 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, tu, ma
568 ; CHECK-NEXT: vmacc.vx v12, a0, v8
569 ; CHECK-NEXT: vmv4r.v v8, v12
571 %elt.head = insertelement <64 x i8> poison, i8 %b, i32 0
572 %vb = shufflevector <64 x i8> %elt.head, <64 x i8> poison, <64 x i32> zeroinitializer
573 %splat = insertelement <64 x i1> poison, i1 -1, i32 0
574 %allones = shufflevector <64 x i1> %splat, <64 x i1> poison, <64 x i32> zeroinitializer
575 %x = call <64 x i8> @llvm.vp.mul.nxv64i8(<64 x i8> %a, <64 x i8> %vb, <64 x i1> %allones, i32 %evl)
576 %y = call <64 x i8> @llvm.vp.add.nxv64i8(<64 x i8> %x, <64 x i8> %c, <64 x i1> %allones, i32 %evl)
577 %u = call <64 x i8> @llvm.vp.merge.nxv64i8(<64 x i1> %allones, <64 x i8> %y, <64 x i8> %c, i32 %evl)
581 define <64 x i8> @vmacc_vv_nxv64i8_ta(<64 x i8> %a, <64 x i8> %b, <64 x i8> %c, <64 x i1> %m, i32 zeroext %evl) {
582 ; CHECK-LABEL: vmacc_vv_nxv64i8_ta:
584 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu
585 ; CHECK-NEXT: vmacc.vv v16, v8, v12, v0.t
586 ; CHECK-NEXT: vmv.v.v v8, v16
588 %splat = insertelement <64 x i1> poison, i1 -1, i32 0
589 %allones = shufflevector <64 x i1> %splat, <64 x i1> poison, <64 x i32> zeroinitializer
590 %x = call <64 x i8> @llvm.vp.mul.nxv64i8(<64 x i8> %a, <64 x i8> %b, <64 x i1> %allones, i32 %evl)
591 %y = call <64 x i8> @llvm.vp.add.nxv64i8(<64 x i8> %x, <64 x i8> %c, <64 x i1> %allones, i32 %evl)
592 %u = call <64 x i8> @llvm.vp.select.nxv64i8(<64 x i1> %m, <64 x i8> %y, <64 x i8> %c, i32 %evl)
596 define <64 x i8> @vmacc_vx_nxv64i8_ta(<64 x i8> %a, i8 %b, <64 x i8> %c, <64 x i1> %m, i32 zeroext %evl) {
597 ; CHECK-LABEL: vmacc_vx_nxv64i8_ta:
599 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu
600 ; CHECK-NEXT: vmacc.vx v12, a0, v8, v0.t
601 ; CHECK-NEXT: vmv.v.v v8, v12
603 %elt.head = insertelement <64 x i8> poison, i8 %b, i32 0
604 %vb = shufflevector <64 x i8> %elt.head, <64 x i8> poison, <64 x i32> zeroinitializer
605 %splat = insertelement <64 x i1> poison, i1 -1, i32 0
606 %allones = shufflevector <64 x i1> %splat, <64 x i1> poison, <64 x i32> zeroinitializer
607 %x = call <64 x i8> @llvm.vp.mul.nxv64i8(<64 x i8> %a, <64 x i8> %vb, <64 x i1> %allones, i32 %evl)
608 %y = call <64 x i8> @llvm.vp.add.nxv64i8(<64 x i8> %x, <64 x i8> %c, <64 x i1> %allones, i32 %evl)
609 %u = call <64 x i8> @llvm.vp.select.nxv64i8(<64 x i1> %m, <64 x i8> %y, <64 x i8> %c, i32 %evl)
613 declare <2 x i16> @llvm.vp.mul.nxv2i16(<2 x i16>, <2 x i16>, <2 x i1>, i32)
614 declare <2 x i16> @llvm.vp.add.nxv2i16(<2 x i16>, <2 x i16>, <2 x i1>, i32)
615 declare <2 x i16> @llvm.vp.merge.nxv2i16(<2 x i1>, <2 x i16>, <2 x i16>, i32)
616 declare <2 x i16> @llvm.vp.select.nxv2i16(<2 x i1>, <2 x i16>, <2 x i16>, i32)
618 define <2 x i16> @vmacc_vv_nxv2i16(<2 x i16> %a, <2 x i16> %b, <2 x i16> %c, <2 x i1> %m, i32 zeroext %evl) {
619 ; CHECK-LABEL: vmacc_vv_nxv2i16:
621 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu
622 ; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t
623 ; CHECK-NEXT: vmv1r.v v8, v10
625 %splat = insertelement <2 x i1> poison, i1 -1, i32 0
626 %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer
627 %x = call <2 x i16> @llvm.vp.mul.nxv2i16(<2 x i16> %a, <2 x i16> %b, <2 x i1> %allones, i32 %evl)
628 %y = call <2 x i16> @llvm.vp.add.nxv2i16(<2 x i16> %x, <2 x i16> %c, <2 x i1> %allones, i32 %evl)
629 %u = call <2 x i16> @llvm.vp.merge.nxv2i16(<2 x i1> %m, <2 x i16> %y, <2 x i16> %c, i32 %evl)
633 define <2 x i16> @vmacc_vv_nxv2i16_unmasked(<2 x i16> %a, <2 x i16> %b, <2 x i16> %c, <2 x i1> %m, i32 zeroext %evl) {
634 ; CHECK-LABEL: vmacc_vv_nxv2i16_unmasked:
636 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, ma
637 ; CHECK-NEXT: vmacc.vv v10, v8, v9
638 ; CHECK-NEXT: vmv1r.v v8, v10
640 %splat = insertelement <2 x i1> poison, i1 -1, i32 0
641 %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer
642 %x = call <2 x i16> @llvm.vp.mul.nxv2i16(<2 x i16> %a, <2 x i16> %b, <2 x i1> %allones, i32 %evl)
643 %y = call <2 x i16> @llvm.vp.add.nxv2i16(<2 x i16> %x, <2 x i16> %c, <2 x i1> %allones, i32 %evl)
644 %u = call <2 x i16> @llvm.vp.merge.nxv2i16(<2 x i1> %allones, <2 x i16> %y, <2 x i16> %c, i32 %evl)
648 define <2 x i16> @vmacc_vx_nxv2i16(<2 x i16> %a, i16 %b, <2 x i16> %c, <2 x i1> %m, i32 zeroext %evl) {
649 ; CHECK-LABEL: vmacc_vx_nxv2i16:
651 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, tu, mu
652 ; CHECK-NEXT: vmacc.vx v9, a0, v8, v0.t
653 ; CHECK-NEXT: vmv1r.v v8, v9
655 %elt.head = insertelement <2 x i16> poison, i16 %b, i32 0
656 %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer
657 %splat = insertelement <2 x i1> poison, i1 -1, i32 0
658 %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer
659 %x = call <2 x i16> @llvm.vp.mul.nxv2i16(<2 x i16> %a, <2 x i16> %vb, <2 x i1> %allones, i32 %evl)
660 %y = call <2 x i16> @llvm.vp.add.nxv2i16(<2 x i16> %x, <2 x i16> %c, <2 x i1> %allones, i32 %evl)
661 %u = call <2 x i16> @llvm.vp.merge.nxv2i16(<2 x i1> %m, <2 x i16> %y, <2 x i16> %c, i32 %evl)
665 define <2 x i16> @vmacc_vx_nxv2i16_unmasked(<2 x i16> %a, i16 %b, <2 x i16> %c, <2 x i1> %m, i32 zeroext %evl) {
666 ; CHECK-LABEL: vmacc_vx_nxv2i16_unmasked:
668 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, tu, ma
669 ; CHECK-NEXT: vmacc.vx v9, a0, v8
670 ; CHECK-NEXT: vmv1r.v v8, v9
672 %elt.head = insertelement <2 x i16> poison, i16 %b, i32 0
673 %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer
674 %splat = insertelement <2 x i1> poison, i1 -1, i32 0
675 %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer
676 %x = call <2 x i16> @llvm.vp.mul.nxv2i16(<2 x i16> %a, <2 x i16> %vb, <2 x i1> %allones, i32 %evl)
677 %y = call <2 x i16> @llvm.vp.add.nxv2i16(<2 x i16> %x, <2 x i16> %c, <2 x i1> %allones, i32 %evl)
678 %u = call <2 x i16> @llvm.vp.merge.nxv2i16(<2 x i1> %allones, <2 x i16> %y, <2 x i16> %c, i32 %evl)
682 define <2 x i16> @vmacc_vv_nxv2i16_ta(<2 x i16> %a, <2 x i16> %b, <2 x i16> %c, <2 x i1> %m, i32 zeroext %evl) {
683 ; CHECK-LABEL: vmacc_vv_nxv2i16_ta:
685 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu
686 ; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t
687 ; CHECK-NEXT: vmv1r.v v8, v10
689 %splat = insertelement <2 x i1> poison, i1 -1, i32 0
690 %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer
691 %x = call <2 x i16> @llvm.vp.mul.nxv2i16(<2 x i16> %a, <2 x i16> %b, <2 x i1> %allones, i32 %evl)
692 %y = call <2 x i16> @llvm.vp.add.nxv2i16(<2 x i16> %x, <2 x i16> %c, <2 x i1> %allones, i32 %evl)
693 %u = call <2 x i16> @llvm.vp.select.nxv2i16(<2 x i1> %m, <2 x i16> %y, <2 x i16> %c, i32 %evl)
697 define <2 x i16> @vmacc_vx_nxv2i16_ta(<2 x i16> %a, i16 %b, <2 x i16> %c, <2 x i1> %m, i32 zeroext %evl) {
698 ; CHECK-LABEL: vmacc_vx_nxv2i16_ta:
700 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu
701 ; CHECK-NEXT: vmacc.vx v9, a0, v8, v0.t
702 ; CHECK-NEXT: vmv1r.v v8, v9
704 %elt.head = insertelement <2 x i16> poison, i16 %b, i32 0
705 %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer
706 %splat = insertelement <2 x i1> poison, i1 -1, i32 0
707 %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer
708 %x = call <2 x i16> @llvm.vp.mul.nxv2i16(<2 x i16> %a, <2 x i16> %vb, <2 x i1> %allones, i32 %evl)
709 %y = call <2 x i16> @llvm.vp.add.nxv2i16(<2 x i16> %x, <2 x i16> %c, <2 x i1> %allones, i32 %evl)
710 %u = call <2 x i16> @llvm.vp.select.nxv2i16(<2 x i1> %m, <2 x i16> %y, <2 x i16> %c, i32 %evl)
714 declare <4 x i16> @llvm.vp.mul.nxv4i16(<4 x i16>, <4 x i16>, <4 x i1>, i32)
715 declare <4 x i16> @llvm.vp.add.nxv4i16(<4 x i16>, <4 x i16>, <4 x i1>, i32)
716 declare <4 x i16> @llvm.vp.merge.nxv4i16(<4 x i1>, <4 x i16>, <4 x i16>, i32)
717 declare <4 x i16> @llvm.vp.select.nxv4i16(<4 x i1>, <4 x i16>, <4 x i16>, i32)
719 define <4 x i16> @vmacc_vv_nxv4i16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c, <4 x i1> %m, i32 zeroext %evl) {
720 ; CHECK-LABEL: vmacc_vv_nxv4i16:
722 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu
723 ; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t
724 ; CHECK-NEXT: vmv1r.v v8, v10
726 %splat = insertelement <4 x i1> poison, i1 -1, i32 0
727 %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer
728 %x = call <4 x i16> @llvm.vp.mul.nxv4i16(<4 x i16> %a, <4 x i16> %b, <4 x i1> %allones, i32 %evl)
729 %y = call <4 x i16> @llvm.vp.add.nxv4i16(<4 x i16> %x, <4 x i16> %c, <4 x i1> %allones, i32 %evl)
730 %u = call <4 x i16> @llvm.vp.merge.nxv4i16(<4 x i1> %m, <4 x i16> %y, <4 x i16> %c, i32 %evl)
734 define <4 x i16> @vmacc_vv_nxv4i16_unmasked(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c, <4 x i1> %m, i32 zeroext %evl) {
735 ; CHECK-LABEL: vmacc_vv_nxv4i16_unmasked:
737 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, ma
738 ; CHECK-NEXT: vmacc.vv v10, v8, v9
739 ; CHECK-NEXT: vmv1r.v v8, v10
741 %splat = insertelement <4 x i1> poison, i1 -1, i32 0
742 %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer
743 %x = call <4 x i16> @llvm.vp.mul.nxv4i16(<4 x i16> %a, <4 x i16> %b, <4 x i1> %allones, i32 %evl)
744 %y = call <4 x i16> @llvm.vp.add.nxv4i16(<4 x i16> %x, <4 x i16> %c, <4 x i1> %allones, i32 %evl)
745 %u = call <4 x i16> @llvm.vp.merge.nxv4i16(<4 x i1> %allones, <4 x i16> %y, <4 x i16> %c, i32 %evl)
749 define <4 x i16> @vmacc_vx_nxv4i16(<4 x i16> %a, i16 %b, <4 x i16> %c, <4 x i1> %m, i32 zeroext %evl) {
750 ; CHECK-LABEL: vmacc_vx_nxv4i16:
752 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, tu, mu
753 ; CHECK-NEXT: vmacc.vx v9, a0, v8, v0.t
754 ; CHECK-NEXT: vmv1r.v v8, v9
756 %elt.head = insertelement <4 x i16> poison, i16 %b, i32 0
757 %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer
758 %splat = insertelement <4 x i1> poison, i1 -1, i32 0
759 %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer
760 %x = call <4 x i16> @llvm.vp.mul.nxv4i16(<4 x i16> %a, <4 x i16> %vb, <4 x i1> %allones, i32 %evl)
761 %y = call <4 x i16> @llvm.vp.add.nxv4i16(<4 x i16> %x, <4 x i16> %c, <4 x i1> %allones, i32 %evl)
762 %u = call <4 x i16> @llvm.vp.merge.nxv4i16(<4 x i1> %m, <4 x i16> %y, <4 x i16> %c, i32 %evl)
766 define <4 x i16> @vmacc_vx_nxv4i16_unmasked(<4 x i16> %a, i16 %b, <4 x i16> %c, <4 x i1> %m, i32 zeroext %evl) {
767 ; CHECK-LABEL: vmacc_vx_nxv4i16_unmasked:
769 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, tu, ma
770 ; CHECK-NEXT: vmacc.vx v9, a0, v8
771 ; CHECK-NEXT: vmv1r.v v8, v9
773 %elt.head = insertelement <4 x i16> poison, i16 %b, i32 0
774 %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer
775 %splat = insertelement <4 x i1> poison, i1 -1, i32 0
776 %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer
777 %x = call <4 x i16> @llvm.vp.mul.nxv4i16(<4 x i16> %a, <4 x i16> %vb, <4 x i1> %allones, i32 %evl)
778 %y = call <4 x i16> @llvm.vp.add.nxv4i16(<4 x i16> %x, <4 x i16> %c, <4 x i1> %allones, i32 %evl)
779 %u = call <4 x i16> @llvm.vp.merge.nxv4i16(<4 x i1> %allones, <4 x i16> %y, <4 x i16> %c, i32 %evl)
783 define <4 x i16> @vmacc_vv_nxv4i16_ta(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c, <4 x i1> %m, i32 zeroext %evl) {
784 ; CHECK-LABEL: vmacc_vv_nxv4i16_ta:
786 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu
787 ; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t
788 ; CHECK-NEXT: vmv1r.v v8, v10
790 %splat = insertelement <4 x i1> poison, i1 -1, i32 0
791 %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer
792 %x = call <4 x i16> @llvm.vp.mul.nxv4i16(<4 x i16> %a, <4 x i16> %b, <4 x i1> %allones, i32 %evl)
793 %y = call <4 x i16> @llvm.vp.add.nxv4i16(<4 x i16> %x, <4 x i16> %c, <4 x i1> %allones, i32 %evl)
794 %u = call <4 x i16> @llvm.vp.select.nxv4i16(<4 x i1> %m, <4 x i16> %y, <4 x i16> %c, i32 %evl)
798 define <4 x i16> @vmacc_vx_nxv4i16_ta(<4 x i16> %a, i16 %b, <4 x i16> %c, <4 x i1> %m, i32 zeroext %evl) {
799 ; CHECK-LABEL: vmacc_vx_nxv4i16_ta:
801 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu
802 ; CHECK-NEXT: vmacc.vx v9, a0, v8, v0.t
803 ; CHECK-NEXT: vmv1r.v v8, v9
805 %elt.head = insertelement <4 x i16> poison, i16 %b, i32 0
806 %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer
807 %splat = insertelement <4 x i1> poison, i1 -1, i32 0
808 %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer
809 %x = call <4 x i16> @llvm.vp.mul.nxv4i16(<4 x i16> %a, <4 x i16> %vb, <4 x i1> %allones, i32 %evl)
810 %y = call <4 x i16> @llvm.vp.add.nxv4i16(<4 x i16> %x, <4 x i16> %c, <4 x i1> %allones, i32 %evl)
811 %u = call <4 x i16> @llvm.vp.select.nxv4i16(<4 x i1> %m, <4 x i16> %y, <4 x i16> %c, i32 %evl)
815 declare <8 x i16> @llvm.vp.mul.nxv8i16(<8 x i16>, <8 x i16>, <8 x i1>, i32)
816 declare <8 x i16> @llvm.vp.add.nxv8i16(<8 x i16>, <8 x i16>, <8 x i1>, i32)
817 declare <8 x i16> @llvm.vp.merge.nxv8i16(<8 x i1>, <8 x i16>, <8 x i16>, i32)
818 declare <8 x i16> @llvm.vp.select.nxv8i16(<8 x i1>, <8 x i16>, <8 x i16>, i32)
820 define <8 x i16> @vmacc_vv_nxv8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c, <8 x i1> %m, i32 zeroext %evl) {
821 ; CHECK-LABEL: vmacc_vv_nxv8i16:
823 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu
824 ; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t
825 ; CHECK-NEXT: vmv1r.v v8, v10
827 %splat = insertelement <8 x i1> poison, i1 -1, i32 0
828 %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer
829 %x = call <8 x i16> @llvm.vp.mul.nxv8i16(<8 x i16> %a, <8 x i16> %b, <8 x i1> %allones, i32 %evl)
830 %y = call <8 x i16> @llvm.vp.add.nxv8i16(<8 x i16> %x, <8 x i16> %c, <8 x i1> %allones, i32 %evl)
831 %u = call <8 x i16> @llvm.vp.merge.nxv8i16(<8 x i1> %m, <8 x i16> %y, <8 x i16> %c, i32 %evl)
835 define <8 x i16> @vmacc_vv_nxv8i16_unmasked(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c, <8 x i1> %m, i32 zeroext %evl) {
836 ; CHECK-LABEL: vmacc_vv_nxv8i16_unmasked:
838 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma
839 ; CHECK-NEXT: vmacc.vv v10, v8, v9
840 ; CHECK-NEXT: vmv1r.v v8, v10
842 %splat = insertelement <8 x i1> poison, i1 -1, i32 0
843 %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer
844 %x = call <8 x i16> @llvm.vp.mul.nxv8i16(<8 x i16> %a, <8 x i16> %b, <8 x i1> %allones, i32 %evl)
845 %y = call <8 x i16> @llvm.vp.add.nxv8i16(<8 x i16> %x, <8 x i16> %c, <8 x i1> %allones, i32 %evl)
846 %u = call <8 x i16> @llvm.vp.merge.nxv8i16(<8 x i1> %allones, <8 x i16> %y, <8 x i16> %c, i32 %evl)
850 define <8 x i16> @vmacc_vx_nxv8i16(<8 x i16> %a, i16 %b, <8 x i16> %c, <8 x i1> %m, i32 zeroext %evl) {
851 ; CHECK-LABEL: vmacc_vx_nxv8i16:
853 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, mu
854 ; CHECK-NEXT: vmacc.vx v9, a0, v8, v0.t
855 ; CHECK-NEXT: vmv1r.v v8, v9
857 %elt.head = insertelement <8 x i16> poison, i16 %b, i32 0
858 %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer
859 %splat = insertelement <8 x i1> poison, i1 -1, i32 0
860 %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer
861 %x = call <8 x i16> @llvm.vp.mul.nxv8i16(<8 x i16> %a, <8 x i16> %vb, <8 x i1> %allones, i32 %evl)
862 %y = call <8 x i16> @llvm.vp.add.nxv8i16(<8 x i16> %x, <8 x i16> %c, <8 x i1> %allones, i32 %evl)
863 %u = call <8 x i16> @llvm.vp.merge.nxv8i16(<8 x i1> %m, <8 x i16> %y, <8 x i16> %c, i32 %evl)
867 define <8 x i16> @vmacc_vx_nxv8i16_unmasked(<8 x i16> %a, i16 %b, <8 x i16> %c, <8 x i1> %m, i32 zeroext %evl) {
868 ; CHECK-LABEL: vmacc_vx_nxv8i16_unmasked:
870 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, ma
871 ; CHECK-NEXT: vmacc.vx v9, a0, v8
872 ; CHECK-NEXT: vmv1r.v v8, v9
874 %elt.head = insertelement <8 x i16> poison, i16 %b, i32 0
875 %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer
876 %splat = insertelement <8 x i1> poison, i1 -1, i32 0
877 %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer
878 %x = call <8 x i16> @llvm.vp.mul.nxv8i16(<8 x i16> %a, <8 x i16> %vb, <8 x i1> %allones, i32 %evl)
879 %y = call <8 x i16> @llvm.vp.add.nxv8i16(<8 x i16> %x, <8 x i16> %c, <8 x i1> %allones, i32 %evl)
880 %u = call <8 x i16> @llvm.vp.merge.nxv8i16(<8 x i1> %allones, <8 x i16> %y, <8 x i16> %c, i32 %evl)
884 define <8 x i16> @vmacc_vv_nxv8i16_ta(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c, <8 x i1> %m, i32 zeroext %evl) {
885 ; CHECK-LABEL: vmacc_vv_nxv8i16_ta:
887 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu
888 ; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t
889 ; CHECK-NEXT: vmv.v.v v8, v10
891 %splat = insertelement <8 x i1> poison, i1 -1, i32 0
892 %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer
893 %x = call <8 x i16> @llvm.vp.mul.nxv8i16(<8 x i16> %a, <8 x i16> %b, <8 x i1> %allones, i32 %evl)
894 %y = call <8 x i16> @llvm.vp.add.nxv8i16(<8 x i16> %x, <8 x i16> %c, <8 x i1> %allones, i32 %evl)
895 %u = call <8 x i16> @llvm.vp.select.nxv8i16(<8 x i1> %m, <8 x i16> %y, <8 x i16> %c, i32 %evl)
899 define <8 x i16> @vmacc_vx_nxv8i16_ta(<8 x i16> %a, i16 %b, <8 x i16> %c, <8 x i1> %m, i32 zeroext %evl) {
900 ; CHECK-LABEL: vmacc_vx_nxv8i16_ta:
902 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu
903 ; CHECK-NEXT: vmacc.vx v9, a0, v8, v0.t
904 ; CHECK-NEXT: vmv.v.v v8, v9
906 %elt.head = insertelement <8 x i16> poison, i16 %b, i32 0
907 %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer
908 %splat = insertelement <8 x i1> poison, i1 -1, i32 0
909 %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer
910 %x = call <8 x i16> @llvm.vp.mul.nxv8i16(<8 x i16> %a, <8 x i16> %vb, <8 x i1> %allones, i32 %evl)
911 %y = call <8 x i16> @llvm.vp.add.nxv8i16(<8 x i16> %x, <8 x i16> %c, <8 x i1> %allones, i32 %evl)
912 %u = call <8 x i16> @llvm.vp.select.nxv8i16(<8 x i1> %m, <8 x i16> %y, <8 x i16> %c, i32 %evl)
916 declare <16 x i16> @llvm.vp.mul.nxv16i16(<16 x i16>, <16 x i16>, <16 x i1>, i32)
917 declare <16 x i16> @llvm.vp.add.nxv16i16(<16 x i16>, <16 x i16>, <16 x i1>, i32)
918 declare <16 x i16> @llvm.vp.merge.nxv16i16(<16 x i1>, <16 x i16>, <16 x i16>, i32)
919 declare <16 x i16> @llvm.vp.select.nxv16i16(<16 x i1>, <16 x i16>, <16 x i16>, i32)
921 define <16 x i16> @vmacc_vv_nxv16i16(<16 x i16> %a, <16 x i16> %b, <16 x i16> %c, <16 x i1> %m, i32 zeroext %evl) {
922 ; CHECK-LABEL: vmacc_vv_nxv16i16:
924 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu
925 ; CHECK-NEXT: vmacc.vv v12, v8, v10, v0.t
926 ; CHECK-NEXT: vmv2r.v v8, v12
928 %splat = insertelement <16 x i1> poison, i1 -1, i32 0
929 %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer
930 %x = call <16 x i16> @llvm.vp.mul.nxv16i16(<16 x i16> %a, <16 x i16> %b, <16 x i1> %allones, i32 %evl)
931 %y = call <16 x i16> @llvm.vp.add.nxv16i16(<16 x i16> %x, <16 x i16> %c, <16 x i1> %allones, i32 %evl)
932 %u = call <16 x i16> @llvm.vp.merge.nxv16i16(<16 x i1> %m, <16 x i16> %y, <16 x i16> %c, i32 %evl)
936 define <16 x i16> @vmacc_vv_nxv16i16_unmasked(<16 x i16> %a, <16 x i16> %b, <16 x i16> %c, <16 x i1> %m, i32 zeroext %evl) {
937 ; CHECK-LABEL: vmacc_vv_nxv16i16_unmasked:
939 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, ma
940 ; CHECK-NEXT: vmacc.vv v12, v8, v10
941 ; CHECK-NEXT: vmv2r.v v8, v12
943 %splat = insertelement <16 x i1> poison, i1 -1, i32 0
944 %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer
945 %x = call <16 x i16> @llvm.vp.mul.nxv16i16(<16 x i16> %a, <16 x i16> %b, <16 x i1> %allones, i32 %evl)
946 %y = call <16 x i16> @llvm.vp.add.nxv16i16(<16 x i16> %x, <16 x i16> %c, <16 x i1> %allones, i32 %evl)
947 %u = call <16 x i16> @llvm.vp.merge.nxv16i16(<16 x i1> %allones, <16 x i16> %y, <16 x i16> %c, i32 %evl)
951 define <16 x i16> @vmacc_vx_nxv16i16(<16 x i16> %a, i16 %b, <16 x i16> %c, <16 x i1> %m, i32 zeroext %evl) {
952 ; CHECK-LABEL: vmacc_vx_nxv16i16:
954 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, tu, mu
955 ; CHECK-NEXT: vmacc.vx v10, a0, v8, v0.t
956 ; CHECK-NEXT: vmv2r.v v8, v10
958 %elt.head = insertelement <16 x i16> poison, i16 %b, i32 0
959 %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer
960 %splat = insertelement <16 x i1> poison, i1 -1, i32 0
961 %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer
962 %x = call <16 x i16> @llvm.vp.mul.nxv16i16(<16 x i16> %a, <16 x i16> %vb, <16 x i1> %allones, i32 %evl)
963 %y = call <16 x i16> @llvm.vp.add.nxv16i16(<16 x i16> %x, <16 x i16> %c, <16 x i1> %allones, i32 %evl)
964 %u = call <16 x i16> @llvm.vp.merge.nxv16i16(<16 x i1> %m, <16 x i16> %y, <16 x i16> %c, i32 %evl)
968 define <16 x i16> @vmacc_vx_nxv16i16_unmasked(<16 x i16> %a, i16 %b, <16 x i16> %c, <16 x i1> %m, i32 zeroext %evl) {
969 ; CHECK-LABEL: vmacc_vx_nxv16i16_unmasked:
971 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, tu, ma
972 ; CHECK-NEXT: vmacc.vx v10, a0, v8
973 ; CHECK-NEXT: vmv2r.v v8, v10
975 %elt.head = insertelement <16 x i16> poison, i16 %b, i32 0
976 %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer
977 %splat = insertelement <16 x i1> poison, i1 -1, i32 0
978 %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer
979 %x = call <16 x i16> @llvm.vp.mul.nxv16i16(<16 x i16> %a, <16 x i16> %vb, <16 x i1> %allones, i32 %evl)
980 %y = call <16 x i16> @llvm.vp.add.nxv16i16(<16 x i16> %x, <16 x i16> %c, <16 x i1> %allones, i32 %evl)
981 %u = call <16 x i16> @llvm.vp.merge.nxv16i16(<16 x i1> %allones, <16 x i16> %y, <16 x i16> %c, i32 %evl)
985 define <16 x i16> @vmacc_vv_nxv16i16_ta(<16 x i16> %a, <16 x i16> %b, <16 x i16> %c, <16 x i1> %m, i32 zeroext %evl) {
986 ; CHECK-LABEL: vmacc_vv_nxv16i16_ta:
988 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu
989 ; CHECK-NEXT: vmacc.vv v12, v8, v10, v0.t
990 ; CHECK-NEXT: vmv.v.v v8, v12
992 %splat = insertelement <16 x i1> poison, i1 -1, i32 0
993 %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer
994 %x = call <16 x i16> @llvm.vp.mul.nxv16i16(<16 x i16> %a, <16 x i16> %b, <16 x i1> %allones, i32 %evl)
995 %y = call <16 x i16> @llvm.vp.add.nxv16i16(<16 x i16> %x, <16 x i16> %c, <16 x i1> %allones, i32 %evl)
996 %u = call <16 x i16> @llvm.vp.select.nxv16i16(<16 x i1> %m, <16 x i16> %y, <16 x i16> %c, i32 %evl)
1000 define <16 x i16> @vmacc_vx_nxv16i16_ta(<16 x i16> %a, i16 %b, <16 x i16> %c, <16 x i1> %m, i32 zeroext %evl) {
1001 ; CHECK-LABEL: vmacc_vx_nxv16i16_ta:
1003 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu
1004 ; CHECK-NEXT: vmacc.vx v10, a0, v8, v0.t
1005 ; CHECK-NEXT: vmv.v.v v8, v10
1007 %elt.head = insertelement <16 x i16> poison, i16 %b, i32 0
1008 %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer
1009 %splat = insertelement <16 x i1> poison, i1 -1, i32 0
1010 %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer
1011 %x = call <16 x i16> @llvm.vp.mul.nxv16i16(<16 x i16> %a, <16 x i16> %vb, <16 x i1> %allones, i32 %evl)
1012 %y = call <16 x i16> @llvm.vp.add.nxv16i16(<16 x i16> %x, <16 x i16> %c, <16 x i1> %allones, i32 %evl)
1013 %u = call <16 x i16> @llvm.vp.select.nxv16i16(<16 x i1> %m, <16 x i16> %y, <16 x i16> %c, i32 %evl)
1017 declare <32 x i16> @llvm.vp.mul.nxv32i16(<32 x i16>, <32 x i16>, <32 x i1>, i32)
1018 declare <32 x i16> @llvm.vp.add.nxv32i16(<32 x i16>, <32 x i16>, <32 x i1>, i32)
1019 declare <32 x i16> @llvm.vp.merge.nxv32i16(<32 x i1>, <32 x i16>, <32 x i16>, i32)
1020 declare <32 x i16> @llvm.vp.select.nxv32i16(<32 x i1>, <32 x i16>, <32 x i16>, i32)
1022 define <32 x i16> @vmacc_vv_nxv32i16(<32 x i16> %a, <32 x i16> %b, <32 x i16> %c, <32 x i1> %m, i32 zeroext %evl) {
1023 ; CHECK-LABEL: vmacc_vv_nxv32i16:
1025 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu
1026 ; CHECK-NEXT: vmacc.vv v16, v8, v12, v0.t
1027 ; CHECK-NEXT: vmv4r.v v8, v16
1029 %splat = insertelement <32 x i1> poison, i1 -1, i32 0
1030 %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer
1031 %x = call <32 x i16> @llvm.vp.mul.nxv32i16(<32 x i16> %a, <32 x i16> %b, <32 x i1> %allones, i32 %evl)
1032 %y = call <32 x i16> @llvm.vp.add.nxv32i16(<32 x i16> %x, <32 x i16> %c, <32 x i1> %allones, i32 %evl)
1033 %u = call <32 x i16> @llvm.vp.merge.nxv32i16(<32 x i1> %m, <32 x i16> %y, <32 x i16> %c, i32 %evl)
1037 define <32 x i16> @vmacc_vv_nxv32i16_unmasked(<32 x i16> %a, <32 x i16> %b, <32 x i16> %c, <32 x i1> %m, i32 zeroext %evl) {
1038 ; CHECK-LABEL: vmacc_vv_nxv32i16_unmasked:
1040 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, ma
1041 ; CHECK-NEXT: vmacc.vv v16, v8, v12
1042 ; CHECK-NEXT: vmv4r.v v8, v16
1044 %splat = insertelement <32 x i1> poison, i1 -1, i32 0
1045 %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer
1046 %x = call <32 x i16> @llvm.vp.mul.nxv32i16(<32 x i16> %a, <32 x i16> %b, <32 x i1> %allones, i32 %evl)
1047 %y = call <32 x i16> @llvm.vp.add.nxv32i16(<32 x i16> %x, <32 x i16> %c, <32 x i1> %allones, i32 %evl)
1048 %u = call <32 x i16> @llvm.vp.merge.nxv32i16(<32 x i1> %allones, <32 x i16> %y, <32 x i16> %c, i32 %evl)
1052 define <32 x i16> @vmacc_vx_nxv32i16(<32 x i16> %a, i16 %b, <32 x i16> %c, <32 x i1> %m, i32 zeroext %evl) {
1053 ; CHECK-LABEL: vmacc_vx_nxv32i16:
1055 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, tu, mu
1056 ; CHECK-NEXT: vmacc.vx v12, a0, v8, v0.t
1057 ; CHECK-NEXT: vmv4r.v v8, v12
1059 %elt.head = insertelement <32 x i16> poison, i16 %b, i32 0
1060 %vb = shufflevector <32 x i16> %elt.head, <32 x i16> poison, <32 x i32> zeroinitializer
1061 %splat = insertelement <32 x i1> poison, i1 -1, i32 0
1062 %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer
1063 %x = call <32 x i16> @llvm.vp.mul.nxv32i16(<32 x i16> %a, <32 x i16> %vb, <32 x i1> %allones, i32 %evl)
1064 %y = call <32 x i16> @llvm.vp.add.nxv32i16(<32 x i16> %x, <32 x i16> %c, <32 x i1> %allones, i32 %evl)
1065 %u = call <32 x i16> @llvm.vp.merge.nxv32i16(<32 x i1> %m, <32 x i16> %y, <32 x i16> %c, i32 %evl)
1069 define <32 x i16> @vmacc_vx_nxv32i16_unmasked(<32 x i16> %a, i16 %b, <32 x i16> %c, <32 x i1> %m, i32 zeroext %evl) {
1070 ; CHECK-LABEL: vmacc_vx_nxv32i16_unmasked:
1072 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, tu, ma
1073 ; CHECK-NEXT: vmacc.vx v12, a0, v8
1074 ; CHECK-NEXT: vmv4r.v v8, v12
1076 %elt.head = insertelement <32 x i16> poison, i16 %b, i32 0
1077 %vb = shufflevector <32 x i16> %elt.head, <32 x i16> poison, <32 x i32> zeroinitializer
1078 %splat = insertelement <32 x i1> poison, i1 -1, i32 0
1079 %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer
1080 %x = call <32 x i16> @llvm.vp.mul.nxv32i16(<32 x i16> %a, <32 x i16> %vb, <32 x i1> %allones, i32 %evl)
1081 %y = call <32 x i16> @llvm.vp.add.nxv32i16(<32 x i16> %x, <32 x i16> %c, <32 x i1> %allones, i32 %evl)
1082 %u = call <32 x i16> @llvm.vp.merge.nxv32i16(<32 x i1> %allones, <32 x i16> %y, <32 x i16> %c, i32 %evl)
1086 define <32 x i16> @vmacc_vv_nxv32i16_ta(<32 x i16> %a, <32 x i16> %b, <32 x i16> %c, <32 x i1> %m, i32 zeroext %evl) {
1087 ; CHECK-LABEL: vmacc_vv_nxv32i16_ta:
1089 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu
1090 ; CHECK-NEXT: vmacc.vv v16, v8, v12, v0.t
1091 ; CHECK-NEXT: vmv.v.v v8, v16
1093 %splat = insertelement <32 x i1> poison, i1 -1, i32 0
1094 %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer
1095 %x = call <32 x i16> @llvm.vp.mul.nxv32i16(<32 x i16> %a, <32 x i16> %b, <32 x i1> %allones, i32 %evl)
1096 %y = call <32 x i16> @llvm.vp.add.nxv32i16(<32 x i16> %x, <32 x i16> %c, <32 x i1> %allones, i32 %evl)
1097 %u = call <32 x i16> @llvm.vp.select.nxv32i16(<32 x i1> %m, <32 x i16> %y, <32 x i16> %c, i32 %evl)
1101 define <32 x i16> @vmacc_vx_nxv32i16_ta(<32 x i16> %a, i16 %b, <32 x i16> %c, <32 x i1> %m, i32 zeroext %evl) {
1102 ; CHECK-LABEL: vmacc_vx_nxv32i16_ta:
1104 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu
1105 ; CHECK-NEXT: vmacc.vx v12, a0, v8, v0.t
1106 ; CHECK-NEXT: vmv.v.v v8, v12
1108 %elt.head = insertelement <32 x i16> poison, i16 %b, i32 0
1109 %vb = shufflevector <32 x i16> %elt.head, <32 x i16> poison, <32 x i32> zeroinitializer
1110 %splat = insertelement <32 x i1> poison, i1 -1, i32 0
1111 %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer
1112 %x = call <32 x i16> @llvm.vp.mul.nxv32i16(<32 x i16> %a, <32 x i16> %vb, <32 x i1> %allones, i32 %evl)
1113 %y = call <32 x i16> @llvm.vp.add.nxv32i16(<32 x i16> %x, <32 x i16> %c, <32 x i1> %allones, i32 %evl)
1114 %u = call <32 x i16> @llvm.vp.select.nxv32i16(<32 x i1> %m, <32 x i16> %y, <32 x i16> %c, i32 %evl)
1118 declare <2 x i32> @llvm.vp.mul.nxv2i32(<2 x i32>, <2 x i32>, <2 x i1>, i32)
1119 declare <2 x i32> @llvm.vp.add.nxv2i32(<2 x i32>, <2 x i32>, <2 x i1>, i32)
1120 declare <2 x i32> @llvm.vp.merge.nxv2i32(<2 x i1>, <2 x i32>, <2 x i32>, i32)
1121 declare <2 x i32> @llvm.vp.select.nxv2i32(<2 x i1>, <2 x i32>, <2 x i32>, i32)
1123 define <2 x i32> @vmacc_vv_nxv2i32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c, <2 x i1> %m, i32 zeroext %evl) {
1124 ; CHECK-LABEL: vmacc_vv_nxv2i32:
1126 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu
1127 ; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t
1128 ; CHECK-NEXT: vmv1r.v v8, v10
1130 %splat = insertelement <2 x i1> poison, i1 -1, i32 0
1131 %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer
1132 %x = call <2 x i32> @llvm.vp.mul.nxv2i32(<2 x i32> %a, <2 x i32> %b, <2 x i1> %allones, i32 %evl)
1133 %y = call <2 x i32> @llvm.vp.add.nxv2i32(<2 x i32> %x, <2 x i32> %c, <2 x i1> %allones, i32 %evl)
1134 %u = call <2 x i32> @llvm.vp.merge.nxv2i32(<2 x i1> %m, <2 x i32> %y, <2 x i32> %c, i32 %evl)
1138 define <2 x i32> @vmacc_vv_nxv2i32_unmasked(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c, <2 x i1> %m, i32 zeroext %evl) {
1139 ; CHECK-LABEL: vmacc_vv_nxv2i32_unmasked:
1141 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, ma
1142 ; CHECK-NEXT: vmacc.vv v10, v8, v9
1143 ; CHECK-NEXT: vmv1r.v v8, v10
1145 %splat = insertelement <2 x i1> poison, i1 -1, i32 0
1146 %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer
1147 %x = call <2 x i32> @llvm.vp.mul.nxv2i32(<2 x i32> %a, <2 x i32> %b, <2 x i1> %allones, i32 %evl)
1148 %y = call <2 x i32> @llvm.vp.add.nxv2i32(<2 x i32> %x, <2 x i32> %c, <2 x i1> %allones, i32 %evl)
1149 %u = call <2 x i32> @llvm.vp.merge.nxv2i32(<2 x i1> %allones, <2 x i32> %y, <2 x i32> %c, i32 %evl)
1153 define <2 x i32> @vmacc_vx_nxv2i32(<2 x i32> %a, i32 %b, <2 x i32> %c, <2 x i1> %m, i32 zeroext %evl) {
1154 ; CHECK-LABEL: vmacc_vx_nxv2i32:
1156 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, tu, mu
1157 ; CHECK-NEXT: vmacc.vx v9, a0, v8, v0.t
1158 ; CHECK-NEXT: vmv1r.v v8, v9
1160 %elt.head = insertelement <2 x i32> poison, i32 %b, i32 0
1161 %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer
1162 %splat = insertelement <2 x i1> poison, i1 -1, i32 0
1163 %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer
1164 %x = call <2 x i32> @llvm.vp.mul.nxv2i32(<2 x i32> %a, <2 x i32> %vb, <2 x i1> %allones, i32 %evl)
1165 %y = call <2 x i32> @llvm.vp.add.nxv2i32(<2 x i32> %x, <2 x i32> %c, <2 x i1> %allones, i32 %evl)
1166 %u = call <2 x i32> @llvm.vp.merge.nxv2i32(<2 x i1> %m, <2 x i32> %y, <2 x i32> %c, i32 %evl)
1170 define <2 x i32> @vmacc_vx_nxv2i32_unmasked(<2 x i32> %a, i32 %b, <2 x i32> %c, <2 x i1> %m, i32 zeroext %evl) {
1171 ; CHECK-LABEL: vmacc_vx_nxv2i32_unmasked:
1173 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, tu, ma
1174 ; CHECK-NEXT: vmacc.vx v9, a0, v8
1175 ; CHECK-NEXT: vmv1r.v v8, v9
1177 %elt.head = insertelement <2 x i32> poison, i32 %b, i32 0
1178 %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer
1179 %splat = insertelement <2 x i1> poison, i1 -1, i32 0
1180 %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer
1181 %x = call <2 x i32> @llvm.vp.mul.nxv2i32(<2 x i32> %a, <2 x i32> %vb, <2 x i1> %allones, i32 %evl)
1182 %y = call <2 x i32> @llvm.vp.add.nxv2i32(<2 x i32> %x, <2 x i32> %c, <2 x i1> %allones, i32 %evl)
1183 %u = call <2 x i32> @llvm.vp.merge.nxv2i32(<2 x i1> %allones, <2 x i32> %y, <2 x i32> %c, i32 %evl)
1187 define <2 x i32> @vmacc_vv_nxv2i32_ta(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c, <2 x i1> %m, i32 zeroext %evl) {
1188 ; CHECK-LABEL: vmacc_vv_nxv2i32_ta:
1190 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu
1191 ; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t
1192 ; CHECK-NEXT: vmv1r.v v8, v10
1194 %splat = insertelement <2 x i1> poison, i1 -1, i32 0
1195 %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer
1196 %x = call <2 x i32> @llvm.vp.mul.nxv2i32(<2 x i32> %a, <2 x i32> %b, <2 x i1> %allones, i32 %evl)
1197 %y = call <2 x i32> @llvm.vp.add.nxv2i32(<2 x i32> %x, <2 x i32> %c, <2 x i1> %allones, i32 %evl)
1198 %u = call <2 x i32> @llvm.vp.select.nxv2i32(<2 x i1> %m, <2 x i32> %y, <2 x i32> %c, i32 %evl)
1202 define <2 x i32> @vmacc_vx_nxv2i32_ta(<2 x i32> %a, i32 %b, <2 x i32> %c, <2 x i1> %m, i32 zeroext %evl) {
1203 ; CHECK-LABEL: vmacc_vx_nxv2i32_ta:
1205 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu
1206 ; CHECK-NEXT: vmacc.vx v9, a0, v8, v0.t
1207 ; CHECK-NEXT: vmv1r.v v8, v9
1209 %elt.head = insertelement <2 x i32> poison, i32 %b, i32 0
1210 %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer
1211 %splat = insertelement <2 x i1> poison, i1 -1, i32 0
1212 %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer
1213 %x = call <2 x i32> @llvm.vp.mul.nxv2i32(<2 x i32> %a, <2 x i32> %vb, <2 x i1> %allones, i32 %evl)
1214 %y = call <2 x i32> @llvm.vp.add.nxv2i32(<2 x i32> %x, <2 x i32> %c, <2 x i1> %allones, i32 %evl)
1215 %u = call <2 x i32> @llvm.vp.select.nxv2i32(<2 x i1> %m, <2 x i32> %y, <2 x i32> %c, i32 %evl)
1219 declare <4 x i32> @llvm.vp.mul.nxv4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32)
1220 declare <4 x i32> @llvm.vp.add.nxv4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32)
1221 declare <4 x i32> @llvm.vp.merge.nxv4i32(<4 x i1>, <4 x i32>, <4 x i32>, i32)
1222 declare <4 x i32> @llvm.vp.select.nxv4i32(<4 x i1>, <4 x i32>, <4 x i32>, i32)
1224 define <4 x i32> @vmacc_vv_nxv4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i1> %m, i32 zeroext %evl) {
1225 ; CHECK-LABEL: vmacc_vv_nxv4i32:
1227 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu
1228 ; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t
1229 ; CHECK-NEXT: vmv1r.v v8, v10
1231 %splat = insertelement <4 x i1> poison, i1 -1, i32 0
1232 %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer
1233 %x = call <4 x i32> @llvm.vp.mul.nxv4i32(<4 x i32> %a, <4 x i32> %b, <4 x i1> %allones, i32 %evl)
1234 %y = call <4 x i32> @llvm.vp.add.nxv4i32(<4 x i32> %x, <4 x i32> %c, <4 x i1> %allones, i32 %evl)
1235 %u = call <4 x i32> @llvm.vp.merge.nxv4i32(<4 x i1> %m, <4 x i32> %y, <4 x i32> %c, i32 %evl)
1239 define <4 x i32> @vmacc_vv_nxv4i32_unmasked(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i1> %m, i32 zeroext %evl) {
1240 ; CHECK-LABEL: vmacc_vv_nxv4i32_unmasked:
1242 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, ma
1243 ; CHECK-NEXT: vmacc.vv v10, v8, v9
1244 ; CHECK-NEXT: vmv1r.v v8, v10
1246 %splat = insertelement <4 x i1> poison, i1 -1, i32 0
1247 %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer
1248 %x = call <4 x i32> @llvm.vp.mul.nxv4i32(<4 x i32> %a, <4 x i32> %b, <4 x i1> %allones, i32 %evl)
1249 %y = call <4 x i32> @llvm.vp.add.nxv4i32(<4 x i32> %x, <4 x i32> %c, <4 x i1> %allones, i32 %evl)
1250 %u = call <4 x i32> @llvm.vp.merge.nxv4i32(<4 x i1> %allones, <4 x i32> %y, <4 x i32> %c, i32 %evl)
1254 define <4 x i32> @vmacc_vx_nxv4i32(<4 x i32> %a, i32 %b, <4 x i32> %c, <4 x i1> %m, i32 zeroext %evl) {
1255 ; CHECK-LABEL: vmacc_vx_nxv4i32:
1257 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, mu
1258 ; CHECK-NEXT: vmacc.vx v9, a0, v8, v0.t
1259 ; CHECK-NEXT: vmv1r.v v8, v9
1261 %elt.head = insertelement <4 x i32> poison, i32 %b, i32 0
1262 %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer
1263 %splat = insertelement <4 x i1> poison, i1 -1, i32 0
1264 %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer
1265 %x = call <4 x i32> @llvm.vp.mul.nxv4i32(<4 x i32> %a, <4 x i32> %vb, <4 x i1> %allones, i32 %evl)
1266 %y = call <4 x i32> @llvm.vp.add.nxv4i32(<4 x i32> %x, <4 x i32> %c, <4 x i1> %allones, i32 %evl)
1267 %u = call <4 x i32> @llvm.vp.merge.nxv4i32(<4 x i1> %m, <4 x i32> %y, <4 x i32> %c, i32 %evl)
1271 define <4 x i32> @vmacc_vx_nxv4i32_unmasked(<4 x i32> %a, i32 %b, <4 x i32> %c, <4 x i1> %m, i32 zeroext %evl) {
1272 ; CHECK-LABEL: vmacc_vx_nxv4i32_unmasked:
1274 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, ma
1275 ; CHECK-NEXT: vmacc.vx v9, a0, v8
1276 ; CHECK-NEXT: vmv1r.v v8, v9
1278 %elt.head = insertelement <4 x i32> poison, i32 %b, i32 0
1279 %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer
1280 %splat = insertelement <4 x i1> poison, i1 -1, i32 0
1281 %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer
1282 %x = call <4 x i32> @llvm.vp.mul.nxv4i32(<4 x i32> %a, <4 x i32> %vb, <4 x i1> %allones, i32 %evl)
1283 %y = call <4 x i32> @llvm.vp.add.nxv4i32(<4 x i32> %x, <4 x i32> %c, <4 x i1> %allones, i32 %evl)
1284 %u = call <4 x i32> @llvm.vp.merge.nxv4i32(<4 x i1> %allones, <4 x i32> %y, <4 x i32> %c, i32 %evl)
1288 define <4 x i32> @vmacc_vv_nxv4i32_ta(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i1> %m, i32 zeroext %evl) {
1289 ; CHECK-LABEL: vmacc_vv_nxv4i32_ta:
1291 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
1292 ; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t
1293 ; CHECK-NEXT: vmv.v.v v8, v10
1295 %splat = insertelement <4 x i1> poison, i1 -1, i32 0
1296 %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer
1297 %x = call <4 x i32> @llvm.vp.mul.nxv4i32(<4 x i32> %a, <4 x i32> %b, <4 x i1> %allones, i32 %evl)
1298 %y = call <4 x i32> @llvm.vp.add.nxv4i32(<4 x i32> %x, <4 x i32> %c, <4 x i1> %allones, i32 %evl)
1299 %u = call <4 x i32> @llvm.vp.select.nxv4i32(<4 x i1> %m, <4 x i32> %y, <4 x i32> %c, i32 %evl)
1303 define <4 x i32> @vmacc_vx_nxv4i32_ta(<4 x i32> %a, i32 %b, <4 x i32> %c, <4 x i1> %m, i32 zeroext %evl) {
1304 ; CHECK-LABEL: vmacc_vx_nxv4i32_ta:
1306 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu
1307 ; CHECK-NEXT: vmacc.vx v9, a0, v8, v0.t
1308 ; CHECK-NEXT: vmv.v.v v8, v9
1310 %elt.head = insertelement <4 x i32> poison, i32 %b, i32 0
1311 %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer
1312 %splat = insertelement <4 x i1> poison, i1 -1, i32 0
1313 %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer
1314 %x = call <4 x i32> @llvm.vp.mul.nxv4i32(<4 x i32> %a, <4 x i32> %vb, <4 x i1> %allones, i32 %evl)
1315 %y = call <4 x i32> @llvm.vp.add.nxv4i32(<4 x i32> %x, <4 x i32> %c, <4 x i1> %allones, i32 %evl)
1316 %u = call <4 x i32> @llvm.vp.select.nxv4i32(<4 x i1> %m, <4 x i32> %y, <4 x i32> %c, i32 %evl)
1320 declare <8 x i32> @llvm.vp.mul.nxv8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
1321 declare <8 x i32> @llvm.vp.add.nxv8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
1322 declare <8 x i32> @llvm.vp.merge.nxv8i32(<8 x i1>, <8 x i32>, <8 x i32>, i32)
1323 declare <8 x i32> @llvm.vp.select.nxv8i32(<8 x i1>, <8 x i32>, <8 x i32>, i32)
1325 define <8 x i32> @vmacc_vv_nxv8i32(<8 x i32> %a, <8 x i32> %b, <8 x i32> %c, <8 x i1> %m, i32 zeroext %evl) {
1326 ; CHECK-LABEL: vmacc_vv_nxv8i32:
1328 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu
1329 ; CHECK-NEXT: vmacc.vv v12, v8, v10, v0.t
1330 ; CHECK-NEXT: vmv2r.v v8, v12
1332 %splat = insertelement <8 x i1> poison, i1 -1, i32 0
1333 %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer
1334 %x = call <8 x i32> @llvm.vp.mul.nxv8i32(<8 x i32> %a, <8 x i32> %b, <8 x i1> %allones, i32 %evl)
1335 %y = call <8 x i32> @llvm.vp.add.nxv8i32(<8 x i32> %x, <8 x i32> %c, <8 x i1> %allones, i32 %evl)
1336 %u = call <8 x i32> @llvm.vp.merge.nxv8i32(<8 x i1> %m, <8 x i32> %y, <8 x i32> %c, i32 %evl)
1340 define <8 x i32> @vmacc_vv_nxv8i32_unmasked(<8 x i32> %a, <8 x i32> %b, <8 x i32> %c, <8 x i1> %m, i32 zeroext %evl) {
1341 ; CHECK-LABEL: vmacc_vv_nxv8i32_unmasked:
1343 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, ma
1344 ; CHECK-NEXT: vmacc.vv v12, v8, v10
1345 ; CHECK-NEXT: vmv2r.v v8, v12
1347 %splat = insertelement <8 x i1> poison, i1 -1, i32 0
1348 %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer
1349 %x = call <8 x i32> @llvm.vp.mul.nxv8i32(<8 x i32> %a, <8 x i32> %b, <8 x i1> %allones, i32 %evl)
1350 %y = call <8 x i32> @llvm.vp.add.nxv8i32(<8 x i32> %x, <8 x i32> %c, <8 x i1> %allones, i32 %evl)
1351 %u = call <8 x i32> @llvm.vp.merge.nxv8i32(<8 x i1> %allones, <8 x i32> %y, <8 x i32> %c, i32 %evl)
1355 define <8 x i32> @vmacc_vx_nxv8i32(<8 x i32> %a, i32 %b, <8 x i32> %c, <8 x i1> %m, i32 zeroext %evl) {
1356 ; CHECK-LABEL: vmacc_vx_nxv8i32:
1358 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, mu
1359 ; CHECK-NEXT: vmacc.vx v10, a0, v8, v0.t
1360 ; CHECK-NEXT: vmv2r.v v8, v10
1362 %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0
1363 %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer
1364 %splat = insertelement <8 x i1> poison, i1 -1, i32 0
1365 %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer
1366 %x = call <8 x i32> @llvm.vp.mul.nxv8i32(<8 x i32> %a, <8 x i32> %vb, <8 x i1> %allones, i32 %evl)
1367 %y = call <8 x i32> @llvm.vp.add.nxv8i32(<8 x i32> %x, <8 x i32> %c, <8 x i1> %allones, i32 %evl)
1368 %u = call <8 x i32> @llvm.vp.merge.nxv8i32(<8 x i1> %m, <8 x i32> %y, <8 x i32> %c, i32 %evl)
1372 define <8 x i32> @vmacc_vx_nxv8i32_unmasked(<8 x i32> %a, i32 %b, <8 x i32> %c, <8 x i1> %m, i32 zeroext %evl) {
1373 ; CHECK-LABEL: vmacc_vx_nxv8i32_unmasked:
1375 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, ma
1376 ; CHECK-NEXT: vmacc.vx v10, a0, v8
1377 ; CHECK-NEXT: vmv2r.v v8, v10
1379 %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0
1380 %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer
1381 %splat = insertelement <8 x i1> poison, i1 -1, i32 0
1382 %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer
1383 %x = call <8 x i32> @llvm.vp.mul.nxv8i32(<8 x i32> %a, <8 x i32> %vb, <8 x i1> %allones, i32 %evl)
1384 %y = call <8 x i32> @llvm.vp.add.nxv8i32(<8 x i32> %x, <8 x i32> %c, <8 x i1> %allones, i32 %evl)
1385 %u = call <8 x i32> @llvm.vp.merge.nxv8i32(<8 x i1> %allones, <8 x i32> %y, <8 x i32> %c, i32 %evl)
1389 define <8 x i32> @vmacc_vv_nxv8i32_ta(<8 x i32> %a, <8 x i32> %b, <8 x i32> %c, <8 x i1> %m, i32 zeroext %evl) {
1390 ; CHECK-LABEL: vmacc_vv_nxv8i32_ta:
1392 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu
1393 ; CHECK-NEXT: vmacc.vv v12, v8, v10, v0.t
1394 ; CHECK-NEXT: vmv.v.v v8, v12
1396 %splat = insertelement <8 x i1> poison, i1 -1, i32 0
1397 %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer
1398 %x = call <8 x i32> @llvm.vp.mul.nxv8i32(<8 x i32> %a, <8 x i32> %b, <8 x i1> %allones, i32 %evl)
1399 %y = call <8 x i32> @llvm.vp.add.nxv8i32(<8 x i32> %x, <8 x i32> %c, <8 x i1> %allones, i32 %evl)
1400 %u = call <8 x i32> @llvm.vp.select.nxv8i32(<8 x i1> %m, <8 x i32> %y, <8 x i32> %c, i32 %evl)
1404 define <8 x i32> @vmacc_vx_nxv8i32_ta(<8 x i32> %a, i32 %b, <8 x i32> %c, <8 x i1> %m, i32 zeroext %evl) {
1405 ; CHECK-LABEL: vmacc_vx_nxv8i32_ta:
1407 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu
1408 ; CHECK-NEXT: vmacc.vx v10, a0, v8, v0.t
1409 ; CHECK-NEXT: vmv.v.v v8, v10
1411 %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0
1412 %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer
1413 %splat = insertelement <8 x i1> poison, i1 -1, i32 0
1414 %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer
1415 %x = call <8 x i32> @llvm.vp.mul.nxv8i32(<8 x i32> %a, <8 x i32> %vb, <8 x i1> %allones, i32 %evl)
1416 %y = call <8 x i32> @llvm.vp.add.nxv8i32(<8 x i32> %x, <8 x i32> %c, <8 x i1> %allones, i32 %evl)
1417 %u = call <8 x i32> @llvm.vp.select.nxv8i32(<8 x i1> %m, <8 x i32> %y, <8 x i32> %c, i32 %evl)
1421 declare <16 x i32> @llvm.vp.mul.nxv16i32(<16 x i32>, <16 x i32>, <16 x i1>, i32)
1422 declare <16 x i32> @llvm.vp.add.nxv16i32(<16 x i32>, <16 x i32>, <16 x i1>, i32)
1423 declare <16 x i32> @llvm.vp.merge.nxv16i32(<16 x i1>, <16 x i32>, <16 x i32>, i32)
1424 declare <16 x i32> @llvm.vp.select.nxv16i32(<16 x i1>, <16 x i32>, <16 x i32>, i32)
1426 define <16 x i32> @vmacc_vv_nxv16i32(<16 x i32> %a, <16 x i32> %b, <16 x i32> %c, <16 x i1> %m, i32 zeroext %evl) {
1427 ; CHECK-LABEL: vmacc_vv_nxv16i32:
1429 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu
1430 ; CHECK-NEXT: vmacc.vv v16, v8, v12, v0.t
1431 ; CHECK-NEXT: vmv4r.v v8, v16
1433 %splat = insertelement <16 x i1> poison, i1 -1, i32 0
1434 %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer
1435 %x = call <16 x i32> @llvm.vp.mul.nxv16i32(<16 x i32> %a, <16 x i32> %b, <16 x i1> %allones, i32 %evl)
1436 %y = call <16 x i32> @llvm.vp.add.nxv16i32(<16 x i32> %x, <16 x i32> %c, <16 x i1> %allones, i32 %evl)
1437 %u = call <16 x i32> @llvm.vp.merge.nxv16i32(<16 x i1> %m, <16 x i32> %y, <16 x i32> %c, i32 %evl)
1441 define <16 x i32> @vmacc_vv_nxv16i32_unmasked(<16 x i32> %a, <16 x i32> %b, <16 x i32> %c, <16 x i1> %m, i32 zeroext %evl) {
1442 ; CHECK-LABEL: vmacc_vv_nxv16i32_unmasked:
1444 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, ma
1445 ; CHECK-NEXT: vmacc.vv v16, v8, v12
1446 ; CHECK-NEXT: vmv4r.v v8, v16
1448 %splat = insertelement <16 x i1> poison, i1 -1, i32 0
1449 %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer
1450 %x = call <16 x i32> @llvm.vp.mul.nxv16i32(<16 x i32> %a, <16 x i32> %b, <16 x i1> %allones, i32 %evl)
1451 %y = call <16 x i32> @llvm.vp.add.nxv16i32(<16 x i32> %x, <16 x i32> %c, <16 x i1> %allones, i32 %evl)
1452 %u = call <16 x i32> @llvm.vp.merge.nxv16i32(<16 x i1> %allones, <16 x i32> %y, <16 x i32> %c, i32 %evl)
1456 define <16 x i32> @vmacc_vx_nxv16i32(<16 x i32> %a, i32 %b, <16 x i32> %c, <16 x i1> %m, i32 zeroext %evl) {
1457 ; CHECK-LABEL: vmacc_vx_nxv16i32:
1459 ; CHECK-NEXT: vsetvli zero, a1, e32, m4, tu, mu
1460 ; CHECK-NEXT: vmacc.vx v12, a0, v8, v0.t
1461 ; CHECK-NEXT: vmv4r.v v8, v12
1463 %elt.head = insertelement <16 x i32> poison, i32 %b, i32 0
1464 %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer
1465 %splat = insertelement <16 x i1> poison, i1 -1, i32 0
1466 %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer
1467 %x = call <16 x i32> @llvm.vp.mul.nxv16i32(<16 x i32> %a, <16 x i32> %vb, <16 x i1> %allones, i32 %evl)
1468 %y = call <16 x i32> @llvm.vp.add.nxv16i32(<16 x i32> %x, <16 x i32> %c, <16 x i1> %allones, i32 %evl)
1469 %u = call <16 x i32> @llvm.vp.merge.nxv16i32(<16 x i1> %m, <16 x i32> %y, <16 x i32> %c, i32 %evl)
1473 define <16 x i32> @vmacc_vx_nxv16i32_unmasked(<16 x i32> %a, i32 %b, <16 x i32> %c, <16 x i1> %m, i32 zeroext %evl) {
1474 ; CHECK-LABEL: vmacc_vx_nxv16i32_unmasked:
1476 ; CHECK-NEXT: vsetvli zero, a1, e32, m4, tu, ma
1477 ; CHECK-NEXT: vmacc.vx v12, a0, v8
1478 ; CHECK-NEXT: vmv4r.v v8, v12
1480 %elt.head = insertelement <16 x i32> poison, i32 %b, i32 0
1481 %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer
1482 %splat = insertelement <16 x i1> poison, i1 -1, i32 0
1483 %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer
1484 %x = call <16 x i32> @llvm.vp.mul.nxv16i32(<16 x i32> %a, <16 x i32> %vb, <16 x i1> %allones, i32 %evl)
1485 %y = call <16 x i32> @llvm.vp.add.nxv16i32(<16 x i32> %x, <16 x i32> %c, <16 x i1> %allones, i32 %evl)
1486 %u = call <16 x i32> @llvm.vp.merge.nxv16i32(<16 x i1> %allones, <16 x i32> %y, <16 x i32> %c, i32 %evl)
1490 define <16 x i32> @vmacc_vv_nxv16i32_ta(<16 x i32> %a, <16 x i32> %b, <16 x i32> %c, <16 x i1> %m, i32 zeroext %evl) {
1491 ; CHECK-LABEL: vmacc_vv_nxv16i32_ta:
1493 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu
1494 ; CHECK-NEXT: vmacc.vv v16, v8, v12, v0.t
1495 ; CHECK-NEXT: vmv.v.v v8, v16
1497 %splat = insertelement <16 x i1> poison, i1 -1, i32 0
1498 %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer
1499 %x = call <16 x i32> @llvm.vp.mul.nxv16i32(<16 x i32> %a, <16 x i32> %b, <16 x i1> %allones, i32 %evl)
1500 %y = call <16 x i32> @llvm.vp.add.nxv16i32(<16 x i32> %x, <16 x i32> %c, <16 x i1> %allones, i32 %evl)
1501 %u = call <16 x i32> @llvm.vp.select.nxv16i32(<16 x i1> %m, <16 x i32> %y, <16 x i32> %c, i32 %evl)
1505 define <16 x i32> @vmacc_vx_nxv16i32_ta(<16 x i32> %a, i32 %b, <16 x i32> %c, <16 x i1> %m, i32 zeroext %evl) {
1506 ; CHECK-LABEL: vmacc_vx_nxv16i32_ta:
1508 ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu
1509 ; CHECK-NEXT: vmacc.vx v12, a0, v8, v0.t
1510 ; CHECK-NEXT: vmv.v.v v8, v12
1512 %elt.head = insertelement <16 x i32> poison, i32 %b, i32 0
1513 %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer
1514 %splat = insertelement <16 x i1> poison, i1 -1, i32 0
1515 %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer
1516 %x = call <16 x i32> @llvm.vp.mul.nxv16i32(<16 x i32> %a, <16 x i32> %vb, <16 x i1> %allones, i32 %evl)
1517 %y = call <16 x i32> @llvm.vp.add.nxv16i32(<16 x i32> %x, <16 x i32> %c, <16 x i1> %allones, i32 %evl)
1518 %u = call <16 x i32> @llvm.vp.select.nxv16i32(<16 x i1> %m, <16 x i32> %y, <16 x i32> %c, i32 %evl)
1522 declare <2 x i64> @llvm.vp.mul.nxv2i64(<2 x i64>, <2 x i64>, <2 x i1>, i32)
1523 declare <2 x i64> @llvm.vp.add.nxv2i64(<2 x i64>, <2 x i64>, <2 x i1>, i32)
1524 declare <2 x i64> @llvm.vp.merge.nxv2i64(<2 x i1>, <2 x i64>, <2 x i64>, i32)
1525 declare <2 x i64> @llvm.vp.select.nxv2i64(<2 x i1>, <2 x i64>, <2 x i64>, i32)
1527 define <2 x i64> @vmacc_vv_nxv2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c, <2 x i1> %m, i32 zeroext %evl) {
1528 ; CHECK-LABEL: vmacc_vv_nxv2i64:
1530 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu
1531 ; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t
1532 ; CHECK-NEXT: vmv1r.v v8, v10
1534 %splat = insertelement <2 x i1> poison, i1 -1, i32 0
1535 %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer
1536 %x = call <2 x i64> @llvm.vp.mul.nxv2i64(<2 x i64> %a, <2 x i64> %b, <2 x i1> %allones, i32 %evl)
1537 %y = call <2 x i64> @llvm.vp.add.nxv2i64(<2 x i64> %x, <2 x i64> %c, <2 x i1> %allones, i32 %evl)
1538 %u = call <2 x i64> @llvm.vp.merge.nxv2i64(<2 x i1> %m, <2 x i64> %y, <2 x i64> %c, i32 %evl)
1542 define <2 x i64> @vmacc_vv_nxv2i64_unmasked(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c, <2 x i1> %m, i32 zeroext %evl) {
1543 ; CHECK-LABEL: vmacc_vv_nxv2i64_unmasked:
1545 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, ma
1546 ; CHECK-NEXT: vmacc.vv v10, v8, v9
1547 ; CHECK-NEXT: vmv1r.v v8, v10
1549 %splat = insertelement <2 x i1> poison, i1 -1, i32 0
1550 %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer
1551 %x = call <2 x i64> @llvm.vp.mul.nxv2i64(<2 x i64> %a, <2 x i64> %b, <2 x i1> %allones, i32 %evl)
1552 %y = call <2 x i64> @llvm.vp.add.nxv2i64(<2 x i64> %x, <2 x i64> %c, <2 x i1> %allones, i32 %evl)
1553 %u = call <2 x i64> @llvm.vp.merge.nxv2i64(<2 x i1> %allones, <2 x i64> %y, <2 x i64> %c, i32 %evl)
1557 define <2 x i64> @vmacc_vx_nxv2i64(<2 x i64> %a, i64 %b, <2 x i64> %c, <2 x i1> %m, i32 zeroext %evl) {
1558 ; RV32-LABEL: vmacc_vx_nxv2i64:
1560 ; RV32-NEXT: addi sp, sp, -16
1561 ; RV32-NEXT: .cfi_def_cfa_offset 16
1562 ; RV32-NEXT: sw a1, 12(sp)
1563 ; RV32-NEXT: sw a0, 8(sp)
1564 ; RV32-NEXT: addi a0, sp, 8
1565 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1566 ; RV32-NEXT: vlse64.v v10, (a0), zero
1567 ; RV32-NEXT: vsetvli zero, a2, e64, m1, tu, mu
1568 ; RV32-NEXT: vmacc.vv v9, v8, v10, v0.t
1569 ; RV32-NEXT: vmv1r.v v8, v9
1570 ; RV32-NEXT: addi sp, sp, 16
1573 ; RV64-LABEL: vmacc_vx_nxv2i64:
1575 ; RV64-NEXT: vsetvli zero, a1, e64, m1, tu, mu
1576 ; RV64-NEXT: vmacc.vx v9, a0, v8, v0.t
1577 ; RV64-NEXT: vmv1r.v v8, v9
1579 %elt.head = insertelement <2 x i64> poison, i64 %b, i32 0
1580 %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer
1581 %splat = insertelement <2 x i1> poison, i1 -1, i32 0
1582 %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer
1583 %x = call <2 x i64> @llvm.vp.mul.nxv2i64(<2 x i64> %a, <2 x i64> %vb, <2 x i1> %allones, i32 %evl)
1584 %y = call <2 x i64> @llvm.vp.add.nxv2i64(<2 x i64> %x, <2 x i64> %c, <2 x i1> %allones, i32 %evl)
1585 %u = call <2 x i64> @llvm.vp.merge.nxv2i64(<2 x i1> %m, <2 x i64> %y, <2 x i64> %c, i32 %evl)
1589 define <2 x i64> @vmacc_vx_nxv2i64_unmasked(<2 x i64> %a, i64 %b, <2 x i64> %c, <2 x i1> %m, i32 zeroext %evl) {
1590 ; RV32-LABEL: vmacc_vx_nxv2i64_unmasked:
1592 ; RV32-NEXT: addi sp, sp, -16
1593 ; RV32-NEXT: .cfi_def_cfa_offset 16
1594 ; RV32-NEXT: sw a1, 12(sp)
1595 ; RV32-NEXT: sw a0, 8(sp)
1596 ; RV32-NEXT: addi a0, sp, 8
1597 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1598 ; RV32-NEXT: vlse64.v v10, (a0), zero
1599 ; RV32-NEXT: vsetvli zero, a2, e64, m1, tu, ma
1600 ; RV32-NEXT: vmacc.vv v9, v8, v10
1601 ; RV32-NEXT: vmv1r.v v8, v9
1602 ; RV32-NEXT: addi sp, sp, 16
1605 ; RV64-LABEL: vmacc_vx_nxv2i64_unmasked:
1607 ; RV64-NEXT: vsetvli zero, a1, e64, m1, tu, ma
1608 ; RV64-NEXT: vmacc.vx v9, a0, v8
1609 ; RV64-NEXT: vmv1r.v v8, v9
1611 %elt.head = insertelement <2 x i64> poison, i64 %b, i32 0
1612 %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer
1613 %splat = insertelement <2 x i1> poison, i1 -1, i32 0
1614 %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer
1615 %x = call <2 x i64> @llvm.vp.mul.nxv2i64(<2 x i64> %a, <2 x i64> %vb, <2 x i1> %allones, i32 %evl)
1616 %y = call <2 x i64> @llvm.vp.add.nxv2i64(<2 x i64> %x, <2 x i64> %c, <2 x i1> %allones, i32 %evl)
1617 %u = call <2 x i64> @llvm.vp.merge.nxv2i64(<2 x i1> %allones, <2 x i64> %y, <2 x i64> %c, i32 %evl)
1621 define <2 x i64> @vmacc_vv_nxv2i64_ta(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c, <2 x i1> %m, i32 zeroext %evl) {
1622 ; CHECK-LABEL: vmacc_vv_nxv2i64_ta:
1624 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu
1625 ; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t
1626 ; CHECK-NEXT: vmv.v.v v8, v10
1628 %splat = insertelement <2 x i1> poison, i1 -1, i32 0
1629 %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer
1630 %x = call <2 x i64> @llvm.vp.mul.nxv2i64(<2 x i64> %a, <2 x i64> %b, <2 x i1> %allones, i32 %evl)
1631 %y = call <2 x i64> @llvm.vp.add.nxv2i64(<2 x i64> %x, <2 x i64> %c, <2 x i1> %allones, i32 %evl)
1632 %u = call <2 x i64> @llvm.vp.select.nxv2i64(<2 x i1> %m, <2 x i64> %y, <2 x i64> %c, i32 %evl)
1636 define <2 x i64> @vmacc_vx_nxv2i64_ta(<2 x i64> %a, i64 %b, <2 x i64> %c, <2 x i1> %m, i32 zeroext %evl) {
1637 ; RV32-LABEL: vmacc_vx_nxv2i64_ta:
1639 ; RV32-NEXT: addi sp, sp, -16
1640 ; RV32-NEXT: .cfi_def_cfa_offset 16
1641 ; RV32-NEXT: sw a1, 12(sp)
1642 ; RV32-NEXT: sw a0, 8(sp)
1643 ; RV32-NEXT: addi a0, sp, 8
1644 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1645 ; RV32-NEXT: vlse64.v v10, (a0), zero
1646 ; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu
1647 ; RV32-NEXT: vmacc.vv v9, v8, v10, v0.t
1648 ; RV32-NEXT: vmv.v.v v8, v9
1649 ; RV32-NEXT: addi sp, sp, 16
1652 ; RV64-LABEL: vmacc_vx_nxv2i64_ta:
1654 ; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, mu
1655 ; RV64-NEXT: vmacc.vx v9, a0, v8, v0.t
1656 ; RV64-NEXT: vmv.v.v v8, v9
1658 %elt.head = insertelement <2 x i64> poison, i64 %b, i32 0
1659 %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer
1660 %splat = insertelement <2 x i1> poison, i1 -1, i32 0
1661 %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer
1662 %x = call <2 x i64> @llvm.vp.mul.nxv2i64(<2 x i64> %a, <2 x i64> %vb, <2 x i1> %allones, i32 %evl)
1663 %y = call <2 x i64> @llvm.vp.add.nxv2i64(<2 x i64> %x, <2 x i64> %c, <2 x i1> %allones, i32 %evl)
1664 %u = call <2 x i64> @llvm.vp.select.nxv2i64(<2 x i1> %m, <2 x i64> %y, <2 x i64> %c, i32 %evl)
1668 declare <4 x i64> @llvm.vp.mul.nxv4i64(<4 x i64>, <4 x i64>, <4 x i1>, i32)
1669 declare <4 x i64> @llvm.vp.add.nxv4i64(<4 x i64>, <4 x i64>, <4 x i1>, i32)
1670 declare <4 x i64> @llvm.vp.merge.nxv4i64(<4 x i1>, <4 x i64>, <4 x i64>, i32)
1671 declare <4 x i64> @llvm.vp.select.nxv4i64(<4 x i1>, <4 x i64>, <4 x i64>, i32)
1673 define <4 x i64> @vmacc_vv_nxv4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64> %c, <4 x i1> %m, i32 zeroext %evl) {
1674 ; CHECK-LABEL: vmacc_vv_nxv4i64:
1676 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu
1677 ; CHECK-NEXT: vmacc.vv v12, v8, v10, v0.t
1678 ; CHECK-NEXT: vmv2r.v v8, v12
1680 %splat = insertelement <4 x i1> poison, i1 -1, i32 0
1681 %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer
1682 %x = call <4 x i64> @llvm.vp.mul.nxv4i64(<4 x i64> %a, <4 x i64> %b, <4 x i1> %allones, i32 %evl)
1683 %y = call <4 x i64> @llvm.vp.add.nxv4i64(<4 x i64> %x, <4 x i64> %c, <4 x i1> %allones, i32 %evl)
1684 %u = call <4 x i64> @llvm.vp.merge.nxv4i64(<4 x i1> %m, <4 x i64> %y, <4 x i64> %c, i32 %evl)
1688 define <4 x i64> @vmacc_vv_nxv4i64_unmasked(<4 x i64> %a, <4 x i64> %b, <4 x i64> %c, <4 x i1> %m, i32 zeroext %evl) {
1689 ; CHECK-LABEL: vmacc_vv_nxv4i64_unmasked:
1691 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, ma
1692 ; CHECK-NEXT: vmacc.vv v12, v8, v10
1693 ; CHECK-NEXT: vmv2r.v v8, v12
1695 %splat = insertelement <4 x i1> poison, i1 -1, i32 0
1696 %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer
1697 %x = call <4 x i64> @llvm.vp.mul.nxv4i64(<4 x i64> %a, <4 x i64> %b, <4 x i1> %allones, i32 %evl)
1698 %y = call <4 x i64> @llvm.vp.add.nxv4i64(<4 x i64> %x, <4 x i64> %c, <4 x i1> %allones, i32 %evl)
1699 %u = call <4 x i64> @llvm.vp.merge.nxv4i64(<4 x i1> %allones, <4 x i64> %y, <4 x i64> %c, i32 %evl)
1703 define <4 x i64> @vmacc_vx_nxv4i64(<4 x i64> %a, i64 %b, <4 x i64> %c, <4 x i1> %m, i32 zeroext %evl) {
1704 ; RV32-LABEL: vmacc_vx_nxv4i64:
1706 ; RV32-NEXT: addi sp, sp, -16
1707 ; RV32-NEXT: .cfi_def_cfa_offset 16
1708 ; RV32-NEXT: sw a1, 12(sp)
1709 ; RV32-NEXT: sw a0, 8(sp)
1710 ; RV32-NEXT: addi a0, sp, 8
1711 ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
1712 ; RV32-NEXT: vlse64.v v12, (a0), zero
1713 ; RV32-NEXT: vsetvli zero, a2, e64, m2, tu, mu
1714 ; RV32-NEXT: vmacc.vv v10, v8, v12, v0.t
1715 ; RV32-NEXT: vmv2r.v v8, v10
1716 ; RV32-NEXT: addi sp, sp, 16
1719 ; RV64-LABEL: vmacc_vx_nxv4i64:
1721 ; RV64-NEXT: vsetvli zero, a1, e64, m2, tu, mu
1722 ; RV64-NEXT: vmacc.vx v10, a0, v8, v0.t
1723 ; RV64-NEXT: vmv2r.v v8, v10
1725 %elt.head = insertelement <4 x i64> poison, i64 %b, i32 0
1726 %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer
1727 %splat = insertelement <4 x i1> poison, i1 -1, i32 0
1728 %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer
1729 %x = call <4 x i64> @llvm.vp.mul.nxv4i64(<4 x i64> %a, <4 x i64> %vb, <4 x i1> %allones, i32 %evl)
1730 %y = call <4 x i64> @llvm.vp.add.nxv4i64(<4 x i64> %x, <4 x i64> %c, <4 x i1> %allones, i32 %evl)
1731 %u = call <4 x i64> @llvm.vp.merge.nxv4i64(<4 x i1> %m, <4 x i64> %y, <4 x i64> %c, i32 %evl)
1735 define <4 x i64> @vmacc_vx_nxv4i64_unmasked(<4 x i64> %a, i64 %b, <4 x i64> %c, <4 x i1> %m, i32 zeroext %evl) {
1736 ; RV32-LABEL: vmacc_vx_nxv4i64_unmasked:
1738 ; RV32-NEXT: addi sp, sp, -16
1739 ; RV32-NEXT: .cfi_def_cfa_offset 16
1740 ; RV32-NEXT: sw a1, 12(sp)
1741 ; RV32-NEXT: sw a0, 8(sp)
1742 ; RV32-NEXT: addi a0, sp, 8
1743 ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
1744 ; RV32-NEXT: vlse64.v v12, (a0), zero
1745 ; RV32-NEXT: vsetvli zero, a2, e64, m2, tu, ma
1746 ; RV32-NEXT: vmacc.vv v10, v8, v12
1747 ; RV32-NEXT: vmv2r.v v8, v10
1748 ; RV32-NEXT: addi sp, sp, 16
1751 ; RV64-LABEL: vmacc_vx_nxv4i64_unmasked:
1753 ; RV64-NEXT: vsetvli zero, a1, e64, m2, tu, ma
1754 ; RV64-NEXT: vmacc.vx v10, a0, v8
1755 ; RV64-NEXT: vmv2r.v v8, v10
1757 %elt.head = insertelement <4 x i64> poison, i64 %b, i32 0
1758 %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer
1759 %splat = insertelement <4 x i1> poison, i1 -1, i32 0
1760 %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer
1761 %x = call <4 x i64> @llvm.vp.mul.nxv4i64(<4 x i64> %a, <4 x i64> %vb, <4 x i1> %allones, i32 %evl)
1762 %y = call <4 x i64> @llvm.vp.add.nxv4i64(<4 x i64> %x, <4 x i64> %c, <4 x i1> %allones, i32 %evl)
1763 %u = call <4 x i64> @llvm.vp.merge.nxv4i64(<4 x i1> %allones, <4 x i64> %y, <4 x i64> %c, i32 %evl)
1767 define <4 x i64> @vmacc_vv_nxv4i64_ta(<4 x i64> %a, <4 x i64> %b, <4 x i64> %c, <4 x i1> %m, i32 zeroext %evl) {
1768 ; CHECK-LABEL: vmacc_vv_nxv4i64_ta:
1770 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu
1771 ; CHECK-NEXT: vmacc.vv v12, v8, v10, v0.t
1772 ; CHECK-NEXT: vmv.v.v v8, v12
1774 %splat = insertelement <4 x i1> poison, i1 -1, i32 0
1775 %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer
1776 %x = call <4 x i64> @llvm.vp.mul.nxv4i64(<4 x i64> %a, <4 x i64> %b, <4 x i1> %allones, i32 %evl)
1777 %y = call <4 x i64> @llvm.vp.add.nxv4i64(<4 x i64> %x, <4 x i64> %c, <4 x i1> %allones, i32 %evl)
1778 %u = call <4 x i64> @llvm.vp.select.nxv4i64(<4 x i1> %m, <4 x i64> %y, <4 x i64> %c, i32 %evl)
1782 define <4 x i64> @vmacc_vx_nxv4i64_ta(<4 x i64> %a, i64 %b, <4 x i64> %c, <4 x i1> %m, i32 zeroext %evl) {
1783 ; RV32-LABEL: vmacc_vx_nxv4i64_ta:
1785 ; RV32-NEXT: addi sp, sp, -16
1786 ; RV32-NEXT: .cfi_def_cfa_offset 16
1787 ; RV32-NEXT: sw a1, 12(sp)
1788 ; RV32-NEXT: sw a0, 8(sp)
1789 ; RV32-NEXT: addi a0, sp, 8
1790 ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
1791 ; RV32-NEXT: vlse64.v v12, (a0), zero
1792 ; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu
1793 ; RV32-NEXT: vmacc.vv v10, v8, v12, v0.t
1794 ; RV32-NEXT: vmv.v.v v8, v10
1795 ; RV32-NEXT: addi sp, sp, 16
1798 ; RV64-LABEL: vmacc_vx_nxv4i64_ta:
1800 ; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, mu
1801 ; RV64-NEXT: vmacc.vx v10, a0, v8, v0.t
1802 ; RV64-NEXT: vmv.v.v v8, v10
1804 %elt.head = insertelement <4 x i64> poison, i64 %b, i32 0
1805 %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer
1806 %splat = insertelement <4 x i1> poison, i1 -1, i32 0
1807 %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer
1808 %x = call <4 x i64> @llvm.vp.mul.nxv4i64(<4 x i64> %a, <4 x i64> %vb, <4 x i1> %allones, i32 %evl)
1809 %y = call <4 x i64> @llvm.vp.add.nxv4i64(<4 x i64> %x, <4 x i64> %c, <4 x i1> %allones, i32 %evl)
1810 %u = call <4 x i64> @llvm.vp.select.nxv4i64(<4 x i1> %m, <4 x i64> %y, <4 x i64> %c, i32 %evl)
1814 declare <8 x i64> @llvm.vp.mul.nxv8i64(<8 x i64>, <8 x i64>, <8 x i1>, i32)
1815 declare <8 x i64> @llvm.vp.add.nxv8i64(<8 x i64>, <8 x i64>, <8 x i1>, i32)
1816 declare <8 x i64> @llvm.vp.merge.nxv8i64(<8 x i1>, <8 x i64>, <8 x i64>, i32)
1817 declare <8 x i64> @llvm.vp.select.nxv8i64(<8 x i1>, <8 x i64>, <8 x i64>, i32)
1819 define <8 x i64> @vmacc_vv_nxv8i64(<8 x i64> %a, <8 x i64> %b, <8 x i64> %c, <8 x i1> %m, i32 zeroext %evl) {
1820 ; CHECK-LABEL: vmacc_vv_nxv8i64:
1822 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu
1823 ; CHECK-NEXT: vmacc.vv v16, v8, v12, v0.t
1824 ; CHECK-NEXT: vmv4r.v v8, v16
1826 %splat = insertelement <8 x i1> poison, i1 -1, i32 0
1827 %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer
1828 %x = call <8 x i64> @llvm.vp.mul.nxv8i64(<8 x i64> %a, <8 x i64> %b, <8 x i1> %allones, i32 %evl)
1829 %y = call <8 x i64> @llvm.vp.add.nxv8i64(<8 x i64> %x, <8 x i64> %c, <8 x i1> %allones, i32 %evl)
1830 %u = call <8 x i64> @llvm.vp.merge.nxv8i64(<8 x i1> %m, <8 x i64> %y, <8 x i64> %c, i32 %evl)
1834 define <8 x i64> @vmacc_vv_nxv8i64_unmasked(<8 x i64> %a, <8 x i64> %b, <8 x i64> %c, <8 x i1> %m, i32 zeroext %evl) {
1835 ; CHECK-LABEL: vmacc_vv_nxv8i64_unmasked:
1837 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, ma
1838 ; CHECK-NEXT: vmacc.vv v16, v8, v12
1839 ; CHECK-NEXT: vmv4r.v v8, v16
1841 %splat = insertelement <8 x i1> poison, i1 -1, i32 0
1842 %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer
1843 %x = call <8 x i64> @llvm.vp.mul.nxv8i64(<8 x i64> %a, <8 x i64> %b, <8 x i1> %allones, i32 %evl)
1844 %y = call <8 x i64> @llvm.vp.add.nxv8i64(<8 x i64> %x, <8 x i64> %c, <8 x i1> %allones, i32 %evl)
1845 %u = call <8 x i64> @llvm.vp.merge.nxv8i64(<8 x i1> %allones, <8 x i64> %y, <8 x i64> %c, i32 %evl)
1849 define <8 x i64> @vmacc_vx_nxv8i64(<8 x i64> %a, i64 %b, <8 x i64> %c, <8 x i1> %m, i32 zeroext %evl) {
1850 ; RV32-LABEL: vmacc_vx_nxv8i64:
1852 ; RV32-NEXT: addi sp, sp, -16
1853 ; RV32-NEXT: .cfi_def_cfa_offset 16
1854 ; RV32-NEXT: sw a1, 12(sp)
1855 ; RV32-NEXT: sw a0, 8(sp)
1856 ; RV32-NEXT: addi a0, sp, 8
1857 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
1858 ; RV32-NEXT: vlse64.v v16, (a0), zero
1859 ; RV32-NEXT: vsetvli zero, a2, e64, m4, tu, mu
1860 ; RV32-NEXT: vmacc.vv v12, v8, v16, v0.t
1861 ; RV32-NEXT: vmv4r.v v8, v12
1862 ; RV32-NEXT: addi sp, sp, 16
1865 ; RV64-LABEL: vmacc_vx_nxv8i64:
1867 ; RV64-NEXT: vsetvli zero, a1, e64, m4, tu, mu
1868 ; RV64-NEXT: vmacc.vx v12, a0, v8, v0.t
1869 ; RV64-NEXT: vmv4r.v v8, v12
1871 %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0
1872 %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
1873 %splat = insertelement <8 x i1> poison, i1 -1, i32 0
1874 %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer
1875 %x = call <8 x i64> @llvm.vp.mul.nxv8i64(<8 x i64> %a, <8 x i64> %vb, <8 x i1> %allones, i32 %evl)
1876 %y = call <8 x i64> @llvm.vp.add.nxv8i64(<8 x i64> %x, <8 x i64> %c, <8 x i1> %allones, i32 %evl)
1877 %u = call <8 x i64> @llvm.vp.merge.nxv8i64(<8 x i1> %m, <8 x i64> %y, <8 x i64> %c, i32 %evl)
1881 define <8 x i64> @vmacc_vx_nxv8i64_unmasked(<8 x i64> %a, i64 %b, <8 x i64> %c, <8 x i1> %m, i32 zeroext %evl) {
1882 ; RV32-LABEL: vmacc_vx_nxv8i64_unmasked:
1884 ; RV32-NEXT: addi sp, sp, -16
1885 ; RV32-NEXT: .cfi_def_cfa_offset 16
1886 ; RV32-NEXT: sw a1, 12(sp)
1887 ; RV32-NEXT: sw a0, 8(sp)
1888 ; RV32-NEXT: addi a0, sp, 8
1889 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
1890 ; RV32-NEXT: vlse64.v v16, (a0), zero
1891 ; RV32-NEXT: vsetvli zero, a2, e64, m4, tu, ma
1892 ; RV32-NEXT: vmacc.vv v12, v8, v16
1893 ; RV32-NEXT: vmv4r.v v8, v12
1894 ; RV32-NEXT: addi sp, sp, 16
1897 ; RV64-LABEL: vmacc_vx_nxv8i64_unmasked:
1899 ; RV64-NEXT: vsetvli zero, a1, e64, m4, tu, ma
1900 ; RV64-NEXT: vmacc.vx v12, a0, v8
1901 ; RV64-NEXT: vmv4r.v v8, v12
1903 %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0
1904 %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
1905 %splat = insertelement <8 x i1> poison, i1 -1, i32 0
1906 %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer
1907 %x = call <8 x i64> @llvm.vp.mul.nxv8i64(<8 x i64> %a, <8 x i64> %vb, <8 x i1> %allones, i32 %evl)
1908 %y = call <8 x i64> @llvm.vp.add.nxv8i64(<8 x i64> %x, <8 x i64> %c, <8 x i1> %allones, i32 %evl)
1909 %u = call <8 x i64> @llvm.vp.merge.nxv8i64(<8 x i1> %allones, <8 x i64> %y, <8 x i64> %c, i32 %evl)
1913 define <8 x i64> @vmacc_vv_nxv8i64_ta(<8 x i64> %a, <8 x i64> %b, <8 x i64> %c, <8 x i1> %m, i32 zeroext %evl) {
1914 ; CHECK-LABEL: vmacc_vv_nxv8i64_ta:
1916 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu
1917 ; CHECK-NEXT: vmacc.vv v16, v8, v12, v0.t
1918 ; CHECK-NEXT: vmv.v.v v8, v16
1920 %splat = insertelement <8 x i1> poison, i1 -1, i32 0
1921 %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer
1922 %x = call <8 x i64> @llvm.vp.mul.nxv8i64(<8 x i64> %a, <8 x i64> %b, <8 x i1> %allones, i32 %evl)
1923 %y = call <8 x i64> @llvm.vp.add.nxv8i64(<8 x i64> %x, <8 x i64> %c, <8 x i1> %allones, i32 %evl)
1924 %u = call <8 x i64> @llvm.vp.select.nxv8i64(<8 x i1> %m, <8 x i64> %y, <8 x i64> %c, i32 %evl)
1928 define <8 x i64> @vmacc_vx_nxv8i64_ta(<8 x i64> %a, i64 %b, <8 x i64> %c, <8 x i1> %m, i32 zeroext %evl) {
1929 ; RV32-LABEL: vmacc_vx_nxv8i64_ta:
1931 ; RV32-NEXT: addi sp, sp, -16
1932 ; RV32-NEXT: .cfi_def_cfa_offset 16
1933 ; RV32-NEXT: sw a1, 12(sp)
1934 ; RV32-NEXT: sw a0, 8(sp)
1935 ; RV32-NEXT: addi a0, sp, 8
1936 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
1937 ; RV32-NEXT: vlse64.v v16, (a0), zero
1938 ; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu
1939 ; RV32-NEXT: vmacc.vv v12, v8, v16, v0.t
1940 ; RV32-NEXT: vmv.v.v v8, v12
1941 ; RV32-NEXT: addi sp, sp, 16
1944 ; RV64-LABEL: vmacc_vx_nxv8i64_ta:
1946 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu
1947 ; RV64-NEXT: vmacc.vx v12, a0, v8, v0.t
1948 ; RV64-NEXT: vmv.v.v v8, v12
1950 %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0
1951 %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
1952 %splat = insertelement <8 x i1> poison, i1 -1, i32 0
1953 %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer
1954 %x = call <8 x i64> @llvm.vp.mul.nxv8i64(<8 x i64> %a, <8 x i64> %vb, <8 x i1> %allones, i32 %evl)
1955 %y = call <8 x i64> @llvm.vp.add.nxv8i64(<8 x i64> %x, <8 x i64> %c, <8 x i1> %allones, i32 %evl)
1956 %u = call <8 x i64> @llvm.vp.select.nxv8i64(<8 x i1> %m, <8 x i64> %y, <8 x i64> %c, i32 %evl)