1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=ilp32d \
3 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
4 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=lp64d \
5 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
7 declare <vscale x 1 x i8> @llvm.vp.mul.nxv1i8(<vscale x 1 x i8>, <vscale x 1 x i8>, <vscale x 1 x i1>, i32)
8 declare <vscale x 1 x i8> @llvm.vp.add.nxv1i8(<vscale x 1 x i8>, <vscale x 1 x i8>, <vscale x 1 x i1>, i32)
9 declare <vscale x 1 x i8> @llvm.vp.merge.nxv1i8(<vscale x 1 x i1>, <vscale x 1 x i8>, <vscale x 1 x i8>, i32)
10 declare <vscale x 1 x i8> @llvm.vp.select.nxv1i8(<vscale x 1 x i1>, <vscale x 1 x i8>, <vscale x 1 x i8>, i32)
12 define <vscale x 1 x i8> @vmacc_vv_nxv1i8(<vscale x 1 x i8> %a, <vscale x 1 x i8> %b, <vscale x 1 x i8> %c, <vscale x 1 x i1> %m, i32 zeroext %evl) {
13 ; CHECK-LABEL: vmacc_vv_nxv1i8:
15 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, mu
16 ; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t
17 ; CHECK-NEXT: vmv1r.v v8, v10
19 %x = call <vscale x 1 x i8> @llvm.vp.mul.nxv1i8(<vscale x 1 x i8> %a, <vscale x 1 x i8> %b, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
20 %y = call <vscale x 1 x i8> @llvm.vp.add.nxv1i8(<vscale x 1 x i8> %x, <vscale x 1 x i8> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
21 %u = call <vscale x 1 x i8> @llvm.vp.merge.nxv1i8(<vscale x 1 x i1> %m, <vscale x 1 x i8> %y, <vscale x 1 x i8> %c, i32 %evl)
22 ret <vscale x 1 x i8> %u
25 define <vscale x 1 x i8> @vmacc_vv_nxv1i8_unmasked(<vscale x 1 x i8> %a, <vscale x 1 x i8> %b, <vscale x 1 x i8> %c, <vscale x 1 x i1> %m, i32 zeroext %evl) {
26 ; CHECK-LABEL: vmacc_vv_nxv1i8_unmasked:
28 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, ma
29 ; CHECK-NEXT: vmacc.vv v10, v8, v9
30 ; CHECK-NEXT: vmv1r.v v8, v10
32 %x = call <vscale x 1 x i8> @llvm.vp.mul.nxv1i8(<vscale x 1 x i8> %a, <vscale x 1 x i8> %b, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
33 %y = call <vscale x 1 x i8> @llvm.vp.add.nxv1i8(<vscale x 1 x i8> %x, <vscale x 1 x i8> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
34 %u = call <vscale x 1 x i8> @llvm.vp.merge.nxv1i8(<vscale x 1 x i1> splat (i1 -1), <vscale x 1 x i8> %y, <vscale x 1 x i8> %c, i32 %evl)
35 ret <vscale x 1 x i8> %u
38 define <vscale x 1 x i8> @vmacc_vx_nxv1i8(<vscale x 1 x i8> %a, i8 %b, <vscale x 1 x i8> %c, <vscale x 1 x i1> %m, i32 zeroext %evl) {
39 ; CHECK-LABEL: vmacc_vx_nxv1i8:
41 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, tu, mu
42 ; CHECK-NEXT: vmacc.vx v9, a0, v8, v0.t
43 ; CHECK-NEXT: vmv1r.v v8, v9
45 %elt.head = insertelement <vscale x 1 x i8> poison, i8 %b, i32 0
46 %vb = shufflevector <vscale x 1 x i8> %elt.head, <vscale x 1 x i8> poison, <vscale x 1 x i32> zeroinitializer
47 %x = call <vscale x 1 x i8> @llvm.vp.mul.nxv1i8(<vscale x 1 x i8> %a, <vscale x 1 x i8> %vb, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
48 %y = call <vscale x 1 x i8> @llvm.vp.add.nxv1i8(<vscale x 1 x i8> %x, <vscale x 1 x i8> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
49 %u = call <vscale x 1 x i8> @llvm.vp.merge.nxv1i8(<vscale x 1 x i1> %m, <vscale x 1 x i8> %y, <vscale x 1 x i8> %c, i32 %evl)
50 ret <vscale x 1 x i8> %u
53 define <vscale x 1 x i8> @vmacc_vx_nxv1i8_unmasked(<vscale x 1 x i8> %a, i8 %b, <vscale x 1 x i8> %c, <vscale x 1 x i1> %m, i32 zeroext %evl) {
54 ; CHECK-LABEL: vmacc_vx_nxv1i8_unmasked:
56 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, tu, ma
57 ; CHECK-NEXT: vmacc.vx v9, a0, v8
58 ; CHECK-NEXT: vmv1r.v v8, v9
60 %elt.head = insertelement <vscale x 1 x i8> poison, i8 %b, i32 0
61 %vb = shufflevector <vscale x 1 x i8> %elt.head, <vscale x 1 x i8> poison, <vscale x 1 x i32> zeroinitializer
62 %x = call <vscale x 1 x i8> @llvm.vp.mul.nxv1i8(<vscale x 1 x i8> %a, <vscale x 1 x i8> %vb, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
63 %y = call <vscale x 1 x i8> @llvm.vp.add.nxv1i8(<vscale x 1 x i8> %x, <vscale x 1 x i8> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
64 %u = call <vscale x 1 x i8> @llvm.vp.merge.nxv1i8(<vscale x 1 x i1> splat (i1 -1), <vscale x 1 x i8> %y, <vscale x 1 x i8> %c, i32 %evl)
65 ret <vscale x 1 x i8> %u
68 define <vscale x 1 x i8> @vmacc_vv_nxv1i8_ta(<vscale x 1 x i8> %a, <vscale x 1 x i8> %b, <vscale x 1 x i8> %c, <vscale x 1 x i1> %m, i32 zeroext %evl) {
69 ; CHECK-LABEL: vmacc_vv_nxv1i8_ta:
71 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu
72 ; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t
73 ; CHECK-NEXT: vmv1r.v v8, v10
75 %x = call <vscale x 1 x i8> @llvm.vp.mul.nxv1i8(<vscale x 1 x i8> %a, <vscale x 1 x i8> %b, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
76 %y = call <vscale x 1 x i8> @llvm.vp.add.nxv1i8(<vscale x 1 x i8> %x, <vscale x 1 x i8> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
77 %u = call <vscale x 1 x i8> @llvm.vp.select.nxv1i8(<vscale x 1 x i1> %m, <vscale x 1 x i8> %y, <vscale x 1 x i8> %c, i32 %evl)
78 ret <vscale x 1 x i8> %u
81 define <vscale x 1 x i8> @vmacc_vx_nxv1i8_ta(<vscale x 1 x i8> %a, i8 %b, <vscale x 1 x i8> %c, <vscale x 1 x i1> %m, i32 zeroext %evl) {
82 ; CHECK-LABEL: vmacc_vx_nxv1i8_ta:
84 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu
85 ; CHECK-NEXT: vmacc.vx v9, a0, v8, v0.t
86 ; CHECK-NEXT: vmv1r.v v8, v9
88 %elt.head = insertelement <vscale x 1 x i8> poison, i8 %b, i32 0
89 %vb = shufflevector <vscale x 1 x i8> %elt.head, <vscale x 1 x i8> poison, <vscale x 1 x i32> zeroinitializer
90 %x = call <vscale x 1 x i8> @llvm.vp.mul.nxv1i8(<vscale x 1 x i8> %a, <vscale x 1 x i8> %vb, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
91 %y = call <vscale x 1 x i8> @llvm.vp.add.nxv1i8(<vscale x 1 x i8> %x, <vscale x 1 x i8> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
92 %u = call <vscale x 1 x i8> @llvm.vp.select.nxv1i8(<vscale x 1 x i1> %m, <vscale x 1 x i8> %y, <vscale x 1 x i8> %c, i32 %evl)
93 ret <vscale x 1 x i8> %u
96 declare <vscale x 2 x i8> @llvm.vp.mul.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i8>, <vscale x 2 x i1>, i32)
97 declare <vscale x 2 x i8> @llvm.vp.add.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i8>, <vscale x 2 x i1>, i32)
98 declare <vscale x 2 x i8> @llvm.vp.merge.nxv2i8(<vscale x 2 x i1>, <vscale x 2 x i8>, <vscale x 2 x i8>, i32)
99 declare <vscale x 2 x i8> @llvm.vp.select.nxv2i8(<vscale x 2 x i1>, <vscale x 2 x i8>, <vscale x 2 x i8>, i32)
101 define <vscale x 2 x i8> @vmacc_vv_nxv2i8(<vscale x 2 x i8> %a, <vscale x 2 x i8> %b, <vscale x 2 x i8> %c, <vscale x 2 x i1> %m, i32 zeroext %evl) {
102 ; CHECK-LABEL: vmacc_vv_nxv2i8:
104 ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, tu, mu
105 ; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t
106 ; CHECK-NEXT: vmv1r.v v8, v10
108 %x = call <vscale x 2 x i8> @llvm.vp.mul.nxv2i8(<vscale x 2 x i8> %a, <vscale x 2 x i8> %b, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
109 %y = call <vscale x 2 x i8> @llvm.vp.add.nxv2i8(<vscale x 2 x i8> %x, <vscale x 2 x i8> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
110 %u = call <vscale x 2 x i8> @llvm.vp.merge.nxv2i8(<vscale x 2 x i1> %m, <vscale x 2 x i8> %y, <vscale x 2 x i8> %c, i32 %evl)
111 ret <vscale x 2 x i8> %u
114 define <vscale x 2 x i8> @vmacc_vv_nxv2i8_unmasked(<vscale x 2 x i8> %a, <vscale x 2 x i8> %b, <vscale x 2 x i8> %c, <vscale x 2 x i1> %m, i32 zeroext %evl) {
115 ; CHECK-LABEL: vmacc_vv_nxv2i8_unmasked:
117 ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, tu, ma
118 ; CHECK-NEXT: vmacc.vv v10, v8, v9
119 ; CHECK-NEXT: vmv1r.v v8, v10
121 %x = call <vscale x 2 x i8> @llvm.vp.mul.nxv2i8(<vscale x 2 x i8> %a, <vscale x 2 x i8> %b, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
122 %y = call <vscale x 2 x i8> @llvm.vp.add.nxv2i8(<vscale x 2 x i8> %x, <vscale x 2 x i8> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
123 %u = call <vscale x 2 x i8> @llvm.vp.merge.nxv2i8(<vscale x 2 x i1> splat (i1 -1), <vscale x 2 x i8> %y, <vscale x 2 x i8> %c, i32 %evl)
124 ret <vscale x 2 x i8> %u
127 define <vscale x 2 x i8> @vmacc_vx_nxv2i8(<vscale x 2 x i8> %a, i8 %b, <vscale x 2 x i8> %c, <vscale x 2 x i1> %m, i32 zeroext %evl) {
128 ; CHECK-LABEL: vmacc_vx_nxv2i8:
130 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, tu, mu
131 ; CHECK-NEXT: vmacc.vx v9, a0, v8, v0.t
132 ; CHECK-NEXT: vmv1r.v v8, v9
134 %elt.head = insertelement <vscale x 2 x i8> poison, i8 %b, i32 0
135 %vb = shufflevector <vscale x 2 x i8> %elt.head, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer
136 %x = call <vscale x 2 x i8> @llvm.vp.mul.nxv2i8(<vscale x 2 x i8> %a, <vscale x 2 x i8> %vb, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
137 %y = call <vscale x 2 x i8> @llvm.vp.add.nxv2i8(<vscale x 2 x i8> %x, <vscale x 2 x i8> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
138 %u = call <vscale x 2 x i8> @llvm.vp.merge.nxv2i8(<vscale x 2 x i1> %m, <vscale x 2 x i8> %y, <vscale x 2 x i8> %c, i32 %evl)
139 ret <vscale x 2 x i8> %u
142 define <vscale x 2 x i8> @vmacc_vx_nxv2i8_unmasked(<vscale x 2 x i8> %a, i8 %b, <vscale x 2 x i8> %c, <vscale x 2 x i1> %m, i32 zeroext %evl) {
143 ; CHECK-LABEL: vmacc_vx_nxv2i8_unmasked:
145 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, tu, ma
146 ; CHECK-NEXT: vmacc.vx v9, a0, v8
147 ; CHECK-NEXT: vmv1r.v v8, v9
149 %elt.head = insertelement <vscale x 2 x i8> poison, i8 %b, i32 0
150 %vb = shufflevector <vscale x 2 x i8> %elt.head, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer
151 %x = call <vscale x 2 x i8> @llvm.vp.mul.nxv2i8(<vscale x 2 x i8> %a, <vscale x 2 x i8> %vb, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
152 %y = call <vscale x 2 x i8> @llvm.vp.add.nxv2i8(<vscale x 2 x i8> %x, <vscale x 2 x i8> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
153 %u = call <vscale x 2 x i8> @llvm.vp.merge.nxv2i8(<vscale x 2 x i1> splat (i1 -1), <vscale x 2 x i8> %y, <vscale x 2 x i8> %c, i32 %evl)
154 ret <vscale x 2 x i8> %u
157 define <vscale x 2 x i8> @vmacc_vv_nxv2i8_ta(<vscale x 2 x i8> %a, <vscale x 2 x i8> %b, <vscale x 2 x i8> %c, <vscale x 2 x i1> %m, i32 zeroext %evl) {
158 ; CHECK-LABEL: vmacc_vv_nxv2i8_ta:
160 ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu
161 ; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t
162 ; CHECK-NEXT: vmv1r.v v8, v10
164 %x = call <vscale x 2 x i8> @llvm.vp.mul.nxv2i8(<vscale x 2 x i8> %a, <vscale x 2 x i8> %b, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
165 %y = call <vscale x 2 x i8> @llvm.vp.add.nxv2i8(<vscale x 2 x i8> %x, <vscale x 2 x i8> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
166 %u = call <vscale x 2 x i8> @llvm.vp.select.nxv2i8(<vscale x 2 x i1> %m, <vscale x 2 x i8> %y, <vscale x 2 x i8> %c, i32 %evl)
167 ret <vscale x 2 x i8> %u
170 define <vscale x 2 x i8> @vmacc_vx_nxv2i8_ta(<vscale x 2 x i8> %a, i8 %b, <vscale x 2 x i8> %c, <vscale x 2 x i1> %m, i32 zeroext %evl) {
171 ; CHECK-LABEL: vmacc_vx_nxv2i8_ta:
173 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu
174 ; CHECK-NEXT: vmacc.vx v9, a0, v8, v0.t
175 ; CHECK-NEXT: vmv1r.v v8, v9
177 %elt.head = insertelement <vscale x 2 x i8> poison, i8 %b, i32 0
178 %vb = shufflevector <vscale x 2 x i8> %elt.head, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer
179 %x = call <vscale x 2 x i8> @llvm.vp.mul.nxv2i8(<vscale x 2 x i8> %a, <vscale x 2 x i8> %vb, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
180 %y = call <vscale x 2 x i8> @llvm.vp.add.nxv2i8(<vscale x 2 x i8> %x, <vscale x 2 x i8> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
181 %u = call <vscale x 2 x i8> @llvm.vp.select.nxv2i8(<vscale x 2 x i1> %m, <vscale x 2 x i8> %y, <vscale x 2 x i8> %c, i32 %evl)
182 ret <vscale x 2 x i8> %u
185 declare <vscale x 4 x i8> @llvm.vp.mul.nxv4i8(<vscale x 4 x i8>, <vscale x 4 x i8>, <vscale x 4 x i1>, i32)
186 declare <vscale x 4 x i8> @llvm.vp.add.nxv4i8(<vscale x 4 x i8>, <vscale x 4 x i8>, <vscale x 4 x i1>, i32)
187 declare <vscale x 4 x i8> @llvm.vp.merge.nxv4i8(<vscale x 4 x i1>, <vscale x 4 x i8>, <vscale x 4 x i8>, i32)
188 declare <vscale x 4 x i8> @llvm.vp.select.nxv4i8(<vscale x 4 x i1>, <vscale x 4 x i8>, <vscale x 4 x i8>, i32)
190 define <vscale x 4 x i8> @vmacc_vv_nxv4i8(<vscale x 4 x i8> %a, <vscale x 4 x i8> %b, <vscale x 4 x i8> %c, <vscale x 4 x i1> %m, i32 zeroext %evl) {
191 ; CHECK-LABEL: vmacc_vv_nxv4i8:
193 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, mu
194 ; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t
195 ; CHECK-NEXT: vmv1r.v v8, v10
197 %x = call <vscale x 4 x i8> @llvm.vp.mul.nxv4i8(<vscale x 4 x i8> %a, <vscale x 4 x i8> %b, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
198 %y = call <vscale x 4 x i8> @llvm.vp.add.nxv4i8(<vscale x 4 x i8> %x, <vscale x 4 x i8> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
199 %u = call <vscale x 4 x i8> @llvm.vp.merge.nxv4i8(<vscale x 4 x i1> %m, <vscale x 4 x i8> %y, <vscale x 4 x i8> %c, i32 %evl)
200 ret <vscale x 4 x i8> %u
203 define <vscale x 4 x i8> @vmacc_vv_nxv4i8_unmasked(<vscale x 4 x i8> %a, <vscale x 4 x i8> %b, <vscale x 4 x i8> %c, <vscale x 4 x i1> %m, i32 zeroext %evl) {
204 ; CHECK-LABEL: vmacc_vv_nxv4i8_unmasked:
206 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, ma
207 ; CHECK-NEXT: vmacc.vv v10, v8, v9
208 ; CHECK-NEXT: vmv1r.v v8, v10
210 %x = call <vscale x 4 x i8> @llvm.vp.mul.nxv4i8(<vscale x 4 x i8> %a, <vscale x 4 x i8> %b, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
211 %y = call <vscale x 4 x i8> @llvm.vp.add.nxv4i8(<vscale x 4 x i8> %x, <vscale x 4 x i8> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
212 %u = call <vscale x 4 x i8> @llvm.vp.merge.nxv4i8(<vscale x 4 x i1> splat (i1 -1), <vscale x 4 x i8> %y, <vscale x 4 x i8> %c, i32 %evl)
213 ret <vscale x 4 x i8> %u
216 define <vscale x 4 x i8> @vmacc_vx_nxv4i8(<vscale x 4 x i8> %a, i8 %b, <vscale x 4 x i8> %c, <vscale x 4 x i1> %m, i32 zeroext %evl) {
217 ; CHECK-LABEL: vmacc_vx_nxv4i8:
219 ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, tu, mu
220 ; CHECK-NEXT: vmacc.vx v9, a0, v8, v0.t
221 ; CHECK-NEXT: vmv1r.v v8, v9
223 %elt.head = insertelement <vscale x 4 x i8> poison, i8 %b, i32 0
224 %vb = shufflevector <vscale x 4 x i8> %elt.head, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer
225 %x = call <vscale x 4 x i8> @llvm.vp.mul.nxv4i8(<vscale x 4 x i8> %a, <vscale x 4 x i8> %vb, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
226 %y = call <vscale x 4 x i8> @llvm.vp.add.nxv4i8(<vscale x 4 x i8> %x, <vscale x 4 x i8> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
227 %u = call <vscale x 4 x i8> @llvm.vp.merge.nxv4i8(<vscale x 4 x i1> %m, <vscale x 4 x i8> %y, <vscale x 4 x i8> %c, i32 %evl)
228 ret <vscale x 4 x i8> %u
231 define <vscale x 4 x i8> @vmacc_vx_nxv4i8_unmasked(<vscale x 4 x i8> %a, i8 %b, <vscale x 4 x i8> %c, <vscale x 4 x i1> %m, i32 zeroext %evl) {
232 ; CHECK-LABEL: vmacc_vx_nxv4i8_unmasked:
234 ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, tu, ma
235 ; CHECK-NEXT: vmacc.vx v9, a0, v8
236 ; CHECK-NEXT: vmv1r.v v8, v9
238 %elt.head = insertelement <vscale x 4 x i8> poison, i8 %b, i32 0
239 %vb = shufflevector <vscale x 4 x i8> %elt.head, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer
240 %x = call <vscale x 4 x i8> @llvm.vp.mul.nxv4i8(<vscale x 4 x i8> %a, <vscale x 4 x i8> %vb, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
241 %y = call <vscale x 4 x i8> @llvm.vp.add.nxv4i8(<vscale x 4 x i8> %x, <vscale x 4 x i8> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
242 %u = call <vscale x 4 x i8> @llvm.vp.merge.nxv4i8(<vscale x 4 x i1> splat (i1 -1), <vscale x 4 x i8> %y, <vscale x 4 x i8> %c, i32 %evl)
243 ret <vscale x 4 x i8> %u
246 define <vscale x 4 x i8> @vmacc_vv_nxv4i8_ta(<vscale x 4 x i8> %a, <vscale x 4 x i8> %b, <vscale x 4 x i8> %c, <vscale x 4 x i1> %m, i32 zeroext %evl) {
247 ; CHECK-LABEL: vmacc_vv_nxv4i8_ta:
249 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu
250 ; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t
251 ; CHECK-NEXT: vmv1r.v v8, v10
253 %x = call <vscale x 4 x i8> @llvm.vp.mul.nxv4i8(<vscale x 4 x i8> %a, <vscale x 4 x i8> %b, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
254 %y = call <vscale x 4 x i8> @llvm.vp.add.nxv4i8(<vscale x 4 x i8> %x, <vscale x 4 x i8> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
255 %u = call <vscale x 4 x i8> @llvm.vp.select.nxv4i8(<vscale x 4 x i1> %m, <vscale x 4 x i8> %y, <vscale x 4 x i8> %c, i32 %evl)
256 ret <vscale x 4 x i8> %u
259 define <vscale x 4 x i8> @vmacc_vx_nxv4i8_ta(<vscale x 4 x i8> %a, i8 %b, <vscale x 4 x i8> %c, <vscale x 4 x i1> %m, i32 zeroext %evl) {
260 ; CHECK-LABEL: vmacc_vx_nxv4i8_ta:
262 ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu
263 ; CHECK-NEXT: vmacc.vx v9, a0, v8, v0.t
264 ; CHECK-NEXT: vmv1r.v v8, v9
266 %elt.head = insertelement <vscale x 4 x i8> poison, i8 %b, i32 0
267 %vb = shufflevector <vscale x 4 x i8> %elt.head, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer
268 %x = call <vscale x 4 x i8> @llvm.vp.mul.nxv4i8(<vscale x 4 x i8> %a, <vscale x 4 x i8> %vb, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
269 %y = call <vscale x 4 x i8> @llvm.vp.add.nxv4i8(<vscale x 4 x i8> %x, <vscale x 4 x i8> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
270 %u = call <vscale x 4 x i8> @llvm.vp.select.nxv4i8(<vscale x 4 x i1> %m, <vscale x 4 x i8> %y, <vscale x 4 x i8> %c, i32 %evl)
271 ret <vscale x 4 x i8> %u
274 declare <vscale x 8 x i8> @llvm.vp.mul.nxv8i8(<vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i1>, i32)
275 declare <vscale x 8 x i8> @llvm.vp.add.nxv8i8(<vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i1>, i32)
276 declare <vscale x 8 x i8> @llvm.vp.merge.nxv8i8(<vscale x 8 x i1>, <vscale x 8 x i8>, <vscale x 8 x i8>, i32)
277 declare <vscale x 8 x i8> @llvm.vp.select.nxv8i8(<vscale x 8 x i1>, <vscale x 8 x i8>, <vscale x 8 x i8>, i32)
279 define <vscale x 8 x i8> @vmacc_vv_nxv8i8(<vscale x 8 x i8> %a, <vscale x 8 x i8> %b, <vscale x 8 x i8> %c, <vscale x 8 x i1> %m, i32 zeroext %evl) {
280 ; CHECK-LABEL: vmacc_vv_nxv8i8:
282 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, tu, mu
283 ; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t
284 ; CHECK-NEXT: vmv1r.v v8, v10
286 %x = call <vscale x 8 x i8> @llvm.vp.mul.nxv8i8(<vscale x 8 x i8> %a, <vscale x 8 x i8> %b, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
287 %y = call <vscale x 8 x i8> @llvm.vp.add.nxv8i8(<vscale x 8 x i8> %x, <vscale x 8 x i8> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
288 %u = call <vscale x 8 x i8> @llvm.vp.merge.nxv8i8(<vscale x 8 x i1> %m, <vscale x 8 x i8> %y, <vscale x 8 x i8> %c, i32 %evl)
289 ret <vscale x 8 x i8> %u
292 define <vscale x 8 x i8> @vmacc_vv_nxv8i8_unmasked(<vscale x 8 x i8> %a, <vscale x 8 x i8> %b, <vscale x 8 x i8> %c, <vscale x 8 x i1> %m, i32 zeroext %evl) {
293 ; CHECK-LABEL: vmacc_vv_nxv8i8_unmasked:
295 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, tu, ma
296 ; CHECK-NEXT: vmacc.vv v10, v8, v9
297 ; CHECK-NEXT: vmv1r.v v8, v10
299 %x = call <vscale x 8 x i8> @llvm.vp.mul.nxv8i8(<vscale x 8 x i8> %a, <vscale x 8 x i8> %b, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
300 %y = call <vscale x 8 x i8> @llvm.vp.add.nxv8i8(<vscale x 8 x i8> %x, <vscale x 8 x i8> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
301 %u = call <vscale x 8 x i8> @llvm.vp.merge.nxv8i8(<vscale x 8 x i1> splat (i1 -1), <vscale x 8 x i8> %y, <vscale x 8 x i8> %c, i32 %evl)
302 ret <vscale x 8 x i8> %u
305 define <vscale x 8 x i8> @vmacc_vx_nxv8i8(<vscale x 8 x i8> %a, i8 %b, <vscale x 8 x i8> %c, <vscale x 8 x i1> %m, i32 zeroext %evl) {
306 ; CHECK-LABEL: vmacc_vx_nxv8i8:
308 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, tu, mu
309 ; CHECK-NEXT: vmacc.vx v9, a0, v8, v0.t
310 ; CHECK-NEXT: vmv1r.v v8, v9
312 %elt.head = insertelement <vscale x 8 x i8> poison, i8 %b, i32 0
313 %vb = shufflevector <vscale x 8 x i8> %elt.head, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
314 %x = call <vscale x 8 x i8> @llvm.vp.mul.nxv8i8(<vscale x 8 x i8> %a, <vscale x 8 x i8> %vb, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
315 %y = call <vscale x 8 x i8> @llvm.vp.add.nxv8i8(<vscale x 8 x i8> %x, <vscale x 8 x i8> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
316 %u = call <vscale x 8 x i8> @llvm.vp.merge.nxv8i8(<vscale x 8 x i1> %m, <vscale x 8 x i8> %y, <vscale x 8 x i8> %c, i32 %evl)
317 ret <vscale x 8 x i8> %u
320 define <vscale x 8 x i8> @vmacc_vx_nxv8i8_unmasked(<vscale x 8 x i8> %a, i8 %b, <vscale x 8 x i8> %c, <vscale x 8 x i1> %m, i32 zeroext %evl) {
321 ; CHECK-LABEL: vmacc_vx_nxv8i8_unmasked:
323 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, tu, ma
324 ; CHECK-NEXT: vmacc.vx v9, a0, v8
325 ; CHECK-NEXT: vmv1r.v v8, v9
327 %elt.head = insertelement <vscale x 8 x i8> poison, i8 %b, i32 0
328 %vb = shufflevector <vscale x 8 x i8> %elt.head, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
329 %x = call <vscale x 8 x i8> @llvm.vp.mul.nxv8i8(<vscale x 8 x i8> %a, <vscale x 8 x i8> %vb, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
330 %y = call <vscale x 8 x i8> @llvm.vp.add.nxv8i8(<vscale x 8 x i8> %x, <vscale x 8 x i8> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
331 %u = call <vscale x 8 x i8> @llvm.vp.merge.nxv8i8(<vscale x 8 x i1> splat (i1 -1), <vscale x 8 x i8> %y, <vscale x 8 x i8> %c, i32 %evl)
332 ret <vscale x 8 x i8> %u
335 define <vscale x 8 x i8> @vmacc_vv_nxv8i8_ta(<vscale x 8 x i8> %a, <vscale x 8 x i8> %b, <vscale x 8 x i8> %c, <vscale x 8 x i1> %m, i32 zeroext %evl) {
336 ; CHECK-LABEL: vmacc_vv_nxv8i8_ta:
338 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu
339 ; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t
340 ; CHECK-NEXT: vmv.v.v v8, v10
342 %x = call <vscale x 8 x i8> @llvm.vp.mul.nxv8i8(<vscale x 8 x i8> %a, <vscale x 8 x i8> %b, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
343 %y = call <vscale x 8 x i8> @llvm.vp.add.nxv8i8(<vscale x 8 x i8> %x, <vscale x 8 x i8> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
344 %u = call <vscale x 8 x i8> @llvm.vp.select.nxv8i8(<vscale x 8 x i1> %m, <vscale x 8 x i8> %y, <vscale x 8 x i8> %c, i32 %evl)
345 ret <vscale x 8 x i8> %u
348 define <vscale x 8 x i8> @vmacc_vx_nxv8i8_ta(<vscale x 8 x i8> %a, i8 %b, <vscale x 8 x i8> %c, <vscale x 8 x i1> %m, i32 zeroext %evl) {
349 ; CHECK-LABEL: vmacc_vx_nxv8i8_ta:
351 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu
352 ; CHECK-NEXT: vmacc.vx v9, a0, v8, v0.t
353 ; CHECK-NEXT: vmv.v.v v8, v9
355 %elt.head = insertelement <vscale x 8 x i8> poison, i8 %b, i32 0
356 %vb = shufflevector <vscale x 8 x i8> %elt.head, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
357 %x = call <vscale x 8 x i8> @llvm.vp.mul.nxv8i8(<vscale x 8 x i8> %a, <vscale x 8 x i8> %vb, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
358 %y = call <vscale x 8 x i8> @llvm.vp.add.nxv8i8(<vscale x 8 x i8> %x, <vscale x 8 x i8> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
359 %u = call <vscale x 8 x i8> @llvm.vp.select.nxv8i8(<vscale x 8 x i1> %m, <vscale x 8 x i8> %y, <vscale x 8 x i8> %c, i32 %evl)
360 ret <vscale x 8 x i8> %u
363 declare <vscale x 16 x i8> @llvm.vp.mul.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i1>, i32)
364 declare <vscale x 16 x i8> @llvm.vp.add.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i1>, i32)
365 declare <vscale x 16 x i8> @llvm.vp.merge.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>, i32)
366 declare <vscale x 16 x i8> @llvm.vp.select.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>, i32)
368 define <vscale x 16 x i8> @vmacc_vv_nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c, <vscale x 16 x i1> %m, i32 zeroext %evl) {
369 ; CHECK-LABEL: vmacc_vv_nxv16i8:
371 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, tu, mu
372 ; CHECK-NEXT: vmacc.vv v12, v8, v10, v0.t
373 ; CHECK-NEXT: vmv2r.v v8, v12
375 %x = call <vscale x 16 x i8> @llvm.vp.mul.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
376 %y = call <vscale x 16 x i8> @llvm.vp.add.nxv16i8(<vscale x 16 x i8> %x, <vscale x 16 x i8> %c, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
377 %u = call <vscale x 16 x i8> @llvm.vp.merge.nxv16i8(<vscale x 16 x i1> %m, <vscale x 16 x i8> %y, <vscale x 16 x i8> %c, i32 %evl)
378 ret <vscale x 16 x i8> %u
381 define <vscale x 16 x i8> @vmacc_vv_nxv16i8_unmasked(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c, <vscale x 16 x i1> %m, i32 zeroext %evl) {
382 ; CHECK-LABEL: vmacc_vv_nxv16i8_unmasked:
384 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, tu, ma
385 ; CHECK-NEXT: vmacc.vv v12, v8, v10
386 ; CHECK-NEXT: vmv2r.v v8, v12
388 %x = call <vscale x 16 x i8> @llvm.vp.mul.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
389 %y = call <vscale x 16 x i8> @llvm.vp.add.nxv16i8(<vscale x 16 x i8> %x, <vscale x 16 x i8> %c, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
390 %u = call <vscale x 16 x i8> @llvm.vp.merge.nxv16i8(<vscale x 16 x i1> splat (i1 -1), <vscale x 16 x i8> %y, <vscale x 16 x i8> %c, i32 %evl)
391 ret <vscale x 16 x i8> %u
394 define <vscale x 16 x i8> @vmacc_vx_nxv16i8(<vscale x 16 x i8> %a, i8 %b, <vscale x 16 x i8> %c, <vscale x 16 x i1> %m, i32 zeroext %evl) {
395 ; CHECK-LABEL: vmacc_vx_nxv16i8:
397 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, tu, mu
398 ; CHECK-NEXT: vmacc.vx v10, a0, v8, v0.t
399 ; CHECK-NEXT: vmv2r.v v8, v10
401 %elt.head = insertelement <vscale x 16 x i8> poison, i8 %b, i32 0
402 %vb = shufflevector <vscale x 16 x i8> %elt.head, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
403 %x = call <vscale x 16 x i8> @llvm.vp.mul.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %vb, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
404 %y = call <vscale x 16 x i8> @llvm.vp.add.nxv16i8(<vscale x 16 x i8> %x, <vscale x 16 x i8> %c, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
405 %u = call <vscale x 16 x i8> @llvm.vp.merge.nxv16i8(<vscale x 16 x i1> %m, <vscale x 16 x i8> %y, <vscale x 16 x i8> %c, i32 %evl)
406 ret <vscale x 16 x i8> %u
409 define <vscale x 16 x i8> @vmacc_vx_nxv16i8_unmasked(<vscale x 16 x i8> %a, i8 %b, <vscale x 16 x i8> %c, <vscale x 16 x i1> %m, i32 zeroext %evl) {
410 ; CHECK-LABEL: vmacc_vx_nxv16i8_unmasked:
412 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, tu, ma
413 ; CHECK-NEXT: vmacc.vx v10, a0, v8
414 ; CHECK-NEXT: vmv2r.v v8, v10
416 %elt.head = insertelement <vscale x 16 x i8> poison, i8 %b, i32 0
417 %vb = shufflevector <vscale x 16 x i8> %elt.head, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
418 %x = call <vscale x 16 x i8> @llvm.vp.mul.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %vb, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
419 %y = call <vscale x 16 x i8> @llvm.vp.add.nxv16i8(<vscale x 16 x i8> %x, <vscale x 16 x i8> %c, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
420 %u = call <vscale x 16 x i8> @llvm.vp.merge.nxv16i8(<vscale x 16 x i1> splat (i1 -1), <vscale x 16 x i8> %y, <vscale x 16 x i8> %c, i32 %evl)
421 ret <vscale x 16 x i8> %u
424 define <vscale x 16 x i8> @vmacc_vv_nxv16i8_ta(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c, <vscale x 16 x i1> %m, i32 zeroext %evl) {
425 ; CHECK-LABEL: vmacc_vv_nxv16i8_ta:
427 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu
428 ; CHECK-NEXT: vmacc.vv v12, v8, v10, v0.t
429 ; CHECK-NEXT: vmv.v.v v8, v12
431 %x = call <vscale x 16 x i8> @llvm.vp.mul.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
432 %y = call <vscale x 16 x i8> @llvm.vp.add.nxv16i8(<vscale x 16 x i8> %x, <vscale x 16 x i8> %c, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
433 %u = call <vscale x 16 x i8> @llvm.vp.select.nxv16i8(<vscale x 16 x i1> %m, <vscale x 16 x i8> %y, <vscale x 16 x i8> %c, i32 %evl)
434 ret <vscale x 16 x i8> %u
437 define <vscale x 16 x i8> @vmacc_vx_nxv16i8_ta(<vscale x 16 x i8> %a, i8 %b, <vscale x 16 x i8> %c, <vscale x 16 x i1> %m, i32 zeroext %evl) {
438 ; CHECK-LABEL: vmacc_vx_nxv16i8_ta:
440 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu
441 ; CHECK-NEXT: vmacc.vx v10, a0, v8, v0.t
442 ; CHECK-NEXT: vmv.v.v v8, v10
444 %elt.head = insertelement <vscale x 16 x i8> poison, i8 %b, i32 0
445 %vb = shufflevector <vscale x 16 x i8> %elt.head, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
446 %x = call <vscale x 16 x i8> @llvm.vp.mul.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %vb, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
447 %y = call <vscale x 16 x i8> @llvm.vp.add.nxv16i8(<vscale x 16 x i8> %x, <vscale x 16 x i8> %c, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
448 %u = call <vscale x 16 x i8> @llvm.vp.select.nxv16i8(<vscale x 16 x i1> %m, <vscale x 16 x i8> %y, <vscale x 16 x i8> %c, i32 %evl)
449 ret <vscale x 16 x i8> %u
452 declare <vscale x 32 x i8> @llvm.vp.mul.nxv32i8(<vscale x 32 x i8>, <vscale x 32 x i8>, <vscale x 32 x i1>, i32)
453 declare <vscale x 32 x i8> @llvm.vp.add.nxv32i8(<vscale x 32 x i8>, <vscale x 32 x i8>, <vscale x 32 x i1>, i32)
454 declare <vscale x 32 x i8> @llvm.vp.merge.nxv32i8(<vscale x 32 x i1>, <vscale x 32 x i8>, <vscale x 32 x i8>, i32)
455 declare <vscale x 32 x i8> @llvm.vp.select.nxv32i8(<vscale x 32 x i1>, <vscale x 32 x i8>, <vscale x 32 x i8>, i32)
457 define <vscale x 32 x i8> @vmacc_vv_nxv32i8(<vscale x 32 x i8> %a, <vscale x 32 x i8> %b, <vscale x 32 x i8> %c, <vscale x 32 x i1> %m, i32 zeroext %evl) {
458 ; CHECK-LABEL: vmacc_vv_nxv32i8:
460 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, tu, mu
461 ; CHECK-NEXT: vmacc.vv v16, v8, v12, v0.t
462 ; CHECK-NEXT: vmv4r.v v8, v16
464 %x = call <vscale x 32 x i8> @llvm.vp.mul.nxv32i8(<vscale x 32 x i8> %a, <vscale x 32 x i8> %b, <vscale x 32 x i1> splat (i1 -1), i32 %evl)
465 %y = call <vscale x 32 x i8> @llvm.vp.add.nxv32i8(<vscale x 32 x i8> %x, <vscale x 32 x i8> %c, <vscale x 32 x i1> splat (i1 -1), i32 %evl)
466 %u = call <vscale x 32 x i8> @llvm.vp.merge.nxv32i8(<vscale x 32 x i1> %m, <vscale x 32 x i8> %y, <vscale x 32 x i8> %c, i32 %evl)
467 ret <vscale x 32 x i8> %u
470 define <vscale x 32 x i8> @vmacc_vv_nxv32i8_unmasked(<vscale x 32 x i8> %a, <vscale x 32 x i8> %b, <vscale x 32 x i8> %c, <vscale x 32 x i1> %m, i32 zeroext %evl) {
471 ; CHECK-LABEL: vmacc_vv_nxv32i8_unmasked:
473 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, tu, ma
474 ; CHECK-NEXT: vmacc.vv v16, v8, v12
475 ; CHECK-NEXT: vmv4r.v v8, v16
477 %x = call <vscale x 32 x i8> @llvm.vp.mul.nxv32i8(<vscale x 32 x i8> %a, <vscale x 32 x i8> %b, <vscale x 32 x i1> splat (i1 -1), i32 %evl)
478 %y = call <vscale x 32 x i8> @llvm.vp.add.nxv32i8(<vscale x 32 x i8> %x, <vscale x 32 x i8> %c, <vscale x 32 x i1> splat (i1 -1), i32 %evl)
479 %u = call <vscale x 32 x i8> @llvm.vp.merge.nxv32i8(<vscale x 32 x i1> splat (i1 -1), <vscale x 32 x i8> %y, <vscale x 32 x i8> %c, i32 %evl)
480 ret <vscale x 32 x i8> %u
483 define <vscale x 32 x i8> @vmacc_vx_nxv32i8(<vscale x 32 x i8> %a, i8 %b, <vscale x 32 x i8> %c, <vscale x 32 x i1> %m, i32 zeroext %evl) {
484 ; CHECK-LABEL: vmacc_vx_nxv32i8:
486 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, tu, mu
487 ; CHECK-NEXT: vmacc.vx v12, a0, v8, v0.t
488 ; CHECK-NEXT: vmv4r.v v8, v12
490 %elt.head = insertelement <vscale x 32 x i8> poison, i8 %b, i32 0
491 %vb = shufflevector <vscale x 32 x i8> %elt.head, <vscale x 32 x i8> poison, <vscale x 32 x i32> zeroinitializer
492 %x = call <vscale x 32 x i8> @llvm.vp.mul.nxv32i8(<vscale x 32 x i8> %a, <vscale x 32 x i8> %vb, <vscale x 32 x i1> splat (i1 -1), i32 %evl)
493 %y = call <vscale x 32 x i8> @llvm.vp.add.nxv32i8(<vscale x 32 x i8> %x, <vscale x 32 x i8> %c, <vscale x 32 x i1> splat (i1 -1), i32 %evl)
494 %u = call <vscale x 32 x i8> @llvm.vp.merge.nxv32i8(<vscale x 32 x i1> %m, <vscale x 32 x i8> %y, <vscale x 32 x i8> %c, i32 %evl)
495 ret <vscale x 32 x i8> %u
498 define <vscale x 32 x i8> @vmacc_vx_nxv32i8_unmasked(<vscale x 32 x i8> %a, i8 %b, <vscale x 32 x i8> %c, <vscale x 32 x i1> %m, i32 zeroext %evl) {
499 ; CHECK-LABEL: vmacc_vx_nxv32i8_unmasked:
501 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, tu, ma
502 ; CHECK-NEXT: vmacc.vx v12, a0, v8
503 ; CHECK-NEXT: vmv4r.v v8, v12
505 %elt.head = insertelement <vscale x 32 x i8> poison, i8 %b, i32 0
506 %vb = shufflevector <vscale x 32 x i8> %elt.head, <vscale x 32 x i8> poison, <vscale x 32 x i32> zeroinitializer
507 %x = call <vscale x 32 x i8> @llvm.vp.mul.nxv32i8(<vscale x 32 x i8> %a, <vscale x 32 x i8> %vb, <vscale x 32 x i1> splat (i1 -1), i32 %evl)
508 %y = call <vscale x 32 x i8> @llvm.vp.add.nxv32i8(<vscale x 32 x i8> %x, <vscale x 32 x i8> %c, <vscale x 32 x i1> splat (i1 -1), i32 %evl)
509 %u = call <vscale x 32 x i8> @llvm.vp.merge.nxv32i8(<vscale x 32 x i1> splat (i1 -1), <vscale x 32 x i8> %y, <vscale x 32 x i8> %c, i32 %evl)
510 ret <vscale x 32 x i8> %u
513 define <vscale x 32 x i8> @vmacc_vv_nxv32i8_ta(<vscale x 32 x i8> %a, <vscale x 32 x i8> %b, <vscale x 32 x i8> %c, <vscale x 32 x i1> %m, i32 zeroext %evl) {
514 ; CHECK-LABEL: vmacc_vv_nxv32i8_ta:
516 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu
517 ; CHECK-NEXT: vmacc.vv v16, v8, v12, v0.t
518 ; CHECK-NEXT: vmv.v.v v8, v16
520 %x = call <vscale x 32 x i8> @llvm.vp.mul.nxv32i8(<vscale x 32 x i8> %a, <vscale x 32 x i8> %b, <vscale x 32 x i1> splat (i1 -1), i32 %evl)
521 %y = call <vscale x 32 x i8> @llvm.vp.add.nxv32i8(<vscale x 32 x i8> %x, <vscale x 32 x i8> %c, <vscale x 32 x i1> splat (i1 -1), i32 %evl)
522 %u = call <vscale x 32 x i8> @llvm.vp.select.nxv32i8(<vscale x 32 x i1> %m, <vscale x 32 x i8> %y, <vscale x 32 x i8> %c, i32 %evl)
523 ret <vscale x 32 x i8> %u
526 define <vscale x 32 x i8> @vmacc_vx_nxv32i8_ta(<vscale x 32 x i8> %a, i8 %b, <vscale x 32 x i8> %c, <vscale x 32 x i1> %m, i32 zeroext %evl) {
527 ; CHECK-LABEL: vmacc_vx_nxv32i8_ta:
529 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu
530 ; CHECK-NEXT: vmacc.vx v12, a0, v8, v0.t
531 ; CHECK-NEXT: vmv.v.v v8, v12
533 %elt.head = insertelement <vscale x 32 x i8> poison, i8 %b, i32 0
534 %vb = shufflevector <vscale x 32 x i8> %elt.head, <vscale x 32 x i8> poison, <vscale x 32 x i32> zeroinitializer
535 %x = call <vscale x 32 x i8> @llvm.vp.mul.nxv32i8(<vscale x 32 x i8> %a, <vscale x 32 x i8> %vb, <vscale x 32 x i1> splat (i1 -1), i32 %evl)
536 %y = call <vscale x 32 x i8> @llvm.vp.add.nxv32i8(<vscale x 32 x i8> %x, <vscale x 32 x i8> %c, <vscale x 32 x i1> splat (i1 -1), i32 %evl)
537 %u = call <vscale x 32 x i8> @llvm.vp.select.nxv32i8(<vscale x 32 x i1> %m, <vscale x 32 x i8> %y, <vscale x 32 x i8> %c, i32 %evl)
538 ret <vscale x 32 x i8> %u
541 declare <vscale x 64 x i8> @llvm.vp.mul.nxv64i8(<vscale x 64 x i8>, <vscale x 64 x i8>, <vscale x 64 x i1>, i32)
542 declare <vscale x 64 x i8> @llvm.vp.add.nxv64i8(<vscale x 64 x i8>, <vscale x 64 x i8>, <vscale x 64 x i1>, i32)
543 declare <vscale x 64 x i8> @llvm.vp.merge.nxv64i8(<vscale x 64 x i1>, <vscale x 64 x i8>, <vscale x 64 x i8>, i32)
544 declare <vscale x 64 x i8> @llvm.vp.select.nxv64i8(<vscale x 64 x i1>, <vscale x 64 x i8>, <vscale x 64 x i8>, i32)
546 define <vscale x 64 x i8> @vmacc_vv_nxv64i8(<vscale x 64 x i8> %a, <vscale x 64 x i8> %b, <vscale x 64 x i8> %c, <vscale x 64 x i1> %m, i32 zeroext %evl) {
547 ; CHECK-LABEL: vmacc_vv_nxv64i8:
549 ; CHECK-NEXT: vl8r.v v24, (a0)
550 ; CHECK-NEXT: vsetvli zero, a1, e8, m8, tu, mu
551 ; CHECK-NEXT: vmacc.vv v24, v8, v16, v0.t
552 ; CHECK-NEXT: vmv8r.v v8, v24
554 %x = call <vscale x 64 x i8> @llvm.vp.mul.nxv64i8(<vscale x 64 x i8> %a, <vscale x 64 x i8> %b, <vscale x 64 x i1> splat (i1 -1), i32 %evl)
555 %y = call <vscale x 64 x i8> @llvm.vp.add.nxv64i8(<vscale x 64 x i8> %x, <vscale x 64 x i8> %c, <vscale x 64 x i1> splat (i1 -1), i32 %evl)
556 %u = call <vscale x 64 x i8> @llvm.vp.merge.nxv64i8(<vscale x 64 x i1> %m, <vscale x 64 x i8> %y, <vscale x 64 x i8> %c, i32 %evl)
557 ret <vscale x 64 x i8> %u
560 define <vscale x 64 x i8> @vmacc_vv_nxv64i8_unmasked(<vscale x 64 x i8> %a, <vscale x 64 x i8> %b, <vscale x 64 x i8> %c, <vscale x 64 x i1> %m, i32 zeroext %evl) {
561 ; CHECK-LABEL: vmacc_vv_nxv64i8_unmasked:
563 ; CHECK-NEXT: vl8r.v v24, (a0)
564 ; CHECK-NEXT: vsetvli zero, a1, e8, m8, tu, ma
565 ; CHECK-NEXT: vmacc.vv v24, v8, v16
566 ; CHECK-NEXT: vmv8r.v v8, v24
568 %x = call <vscale x 64 x i8> @llvm.vp.mul.nxv64i8(<vscale x 64 x i8> %a, <vscale x 64 x i8> %b, <vscale x 64 x i1> splat (i1 -1), i32 %evl)
569 %y = call <vscale x 64 x i8> @llvm.vp.add.nxv64i8(<vscale x 64 x i8> %x, <vscale x 64 x i8> %c, <vscale x 64 x i1> splat (i1 -1), i32 %evl)
570 %u = call <vscale x 64 x i8> @llvm.vp.merge.nxv64i8(<vscale x 64 x i1> splat (i1 -1), <vscale x 64 x i8> %y, <vscale x 64 x i8> %c, i32 %evl)
571 ret <vscale x 64 x i8> %u
574 define <vscale x 64 x i8> @vmacc_vx_nxv64i8(<vscale x 64 x i8> %a, i8 %b, <vscale x 64 x i8> %c, <vscale x 64 x i1> %m, i32 zeroext %evl) {
575 ; CHECK-LABEL: vmacc_vx_nxv64i8:
577 ; CHECK-NEXT: vsetvli zero, a1, e8, m8, tu, mu
578 ; CHECK-NEXT: vmacc.vx v16, a0, v8, v0.t
579 ; CHECK-NEXT: vmv8r.v v8, v16
581 %elt.head = insertelement <vscale x 64 x i8> poison, i8 %b, i32 0
582 %vb = shufflevector <vscale x 64 x i8> %elt.head, <vscale x 64 x i8> poison, <vscale x 64 x i32> zeroinitializer
583 %x = call <vscale x 64 x i8> @llvm.vp.mul.nxv64i8(<vscale x 64 x i8> %a, <vscale x 64 x i8> %vb, <vscale x 64 x i1> splat (i1 -1), i32 %evl)
584 %y = call <vscale x 64 x i8> @llvm.vp.add.nxv64i8(<vscale x 64 x i8> %x, <vscale x 64 x i8> %c, <vscale x 64 x i1> splat (i1 -1), i32 %evl)
585 %u = call <vscale x 64 x i8> @llvm.vp.merge.nxv64i8(<vscale x 64 x i1> %m, <vscale x 64 x i8> %y, <vscale x 64 x i8> %c, i32 %evl)
586 ret <vscale x 64 x i8> %u
589 define <vscale x 64 x i8> @vmacc_vx_nxv64i8_unmasked(<vscale x 64 x i8> %a, i8 %b, <vscale x 64 x i8> %c, <vscale x 64 x i1> %m, i32 zeroext %evl) {
590 ; CHECK-LABEL: vmacc_vx_nxv64i8_unmasked:
592 ; CHECK-NEXT: vsetvli zero, a1, e8, m8, tu, ma
593 ; CHECK-NEXT: vmacc.vx v16, a0, v8
594 ; CHECK-NEXT: vmv8r.v v8, v16
596 %elt.head = insertelement <vscale x 64 x i8> poison, i8 %b, i32 0
597 %vb = shufflevector <vscale x 64 x i8> %elt.head, <vscale x 64 x i8> poison, <vscale x 64 x i32> zeroinitializer
598 %x = call <vscale x 64 x i8> @llvm.vp.mul.nxv64i8(<vscale x 64 x i8> %a, <vscale x 64 x i8> %vb, <vscale x 64 x i1> splat (i1 -1), i32 %evl)
599 %y = call <vscale x 64 x i8> @llvm.vp.add.nxv64i8(<vscale x 64 x i8> %x, <vscale x 64 x i8> %c, <vscale x 64 x i1> splat (i1 -1), i32 %evl)
600 %u = call <vscale x 64 x i8> @llvm.vp.merge.nxv64i8(<vscale x 64 x i1> splat (i1 -1), <vscale x 64 x i8> %y, <vscale x 64 x i8> %c, i32 %evl)
601 ret <vscale x 64 x i8> %u
604 define <vscale x 64 x i8> @vmacc_vv_nxv64i8_ta(<vscale x 64 x i8> %a, <vscale x 64 x i8> %b, <vscale x 64 x i8> %c, <vscale x 64 x i1> %m, i32 zeroext %evl) {
605 ; CHECK-LABEL: vmacc_vv_nxv64i8_ta:
607 ; CHECK-NEXT: vl8r.v v24, (a0)
608 ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu
609 ; CHECK-NEXT: vmacc.vv v24, v8, v16, v0.t
610 ; CHECK-NEXT: vmv.v.v v8, v24
612 %x = call <vscale x 64 x i8> @llvm.vp.mul.nxv64i8(<vscale x 64 x i8> %a, <vscale x 64 x i8> %b, <vscale x 64 x i1> splat (i1 -1), i32 %evl)
613 %y = call <vscale x 64 x i8> @llvm.vp.add.nxv64i8(<vscale x 64 x i8> %x, <vscale x 64 x i8> %c, <vscale x 64 x i1> splat (i1 -1), i32 %evl)
614 %u = call <vscale x 64 x i8> @llvm.vp.select.nxv64i8(<vscale x 64 x i1> %m, <vscale x 64 x i8> %y, <vscale x 64 x i8> %c, i32 %evl)
615 ret <vscale x 64 x i8> %u
618 define <vscale x 64 x i8> @vmacc_vx_nxv64i8_ta(<vscale x 64 x i8> %a, i8 %b, <vscale x 64 x i8> %c, <vscale x 64 x i1> %m, i32 zeroext %evl) {
619 ; CHECK-LABEL: vmacc_vx_nxv64i8_ta:
621 ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu
622 ; CHECK-NEXT: vmacc.vx v16, a0, v8, v0.t
623 ; CHECK-NEXT: vmv.v.v v8, v16
625 %elt.head = insertelement <vscale x 64 x i8> poison, i8 %b, i32 0
626 %vb = shufflevector <vscale x 64 x i8> %elt.head, <vscale x 64 x i8> poison, <vscale x 64 x i32> zeroinitializer
627 %x = call <vscale x 64 x i8> @llvm.vp.mul.nxv64i8(<vscale x 64 x i8> %a, <vscale x 64 x i8> %vb, <vscale x 64 x i1> splat (i1 -1), i32 %evl)
628 %y = call <vscale x 64 x i8> @llvm.vp.add.nxv64i8(<vscale x 64 x i8> %x, <vscale x 64 x i8> %c, <vscale x 64 x i1> splat (i1 -1), i32 %evl)
629 %u = call <vscale x 64 x i8> @llvm.vp.select.nxv64i8(<vscale x 64 x i1> %m, <vscale x 64 x i8> %y, <vscale x 64 x i8> %c, i32 %evl)
630 ret <vscale x 64 x i8> %u
633 declare <vscale x 1 x i16> @llvm.vp.mul.nxv1i16(<vscale x 1 x i16>, <vscale x 1 x i16>, <vscale x 1 x i1>, i32)
634 declare <vscale x 1 x i16> @llvm.vp.add.nxv1i16(<vscale x 1 x i16>, <vscale x 1 x i16>, <vscale x 1 x i1>, i32)
635 declare <vscale x 1 x i16> @llvm.vp.merge.nxv1i16(<vscale x 1 x i1>, <vscale x 1 x i16>, <vscale x 1 x i16>, i32)
636 declare <vscale x 1 x i16> @llvm.vp.select.nxv1i16(<vscale x 1 x i1>, <vscale x 1 x i16>, <vscale x 1 x i16>, i32)
638 define <vscale x 1 x i16> @vmacc_vv_nxv1i16(<vscale x 1 x i16> %a, <vscale x 1 x i16> %b, <vscale x 1 x i16> %c, <vscale x 1 x i1> %m, i32 zeroext %evl) {
639 ; CHECK-LABEL: vmacc_vv_nxv1i16:
641 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu
642 ; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t
643 ; CHECK-NEXT: vmv1r.v v8, v10
645 %x = call <vscale x 1 x i16> @llvm.vp.mul.nxv1i16(<vscale x 1 x i16> %a, <vscale x 1 x i16> %b, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
646 %y = call <vscale x 1 x i16> @llvm.vp.add.nxv1i16(<vscale x 1 x i16> %x, <vscale x 1 x i16> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
647 %u = call <vscale x 1 x i16> @llvm.vp.merge.nxv1i16(<vscale x 1 x i1> %m, <vscale x 1 x i16> %y, <vscale x 1 x i16> %c, i32 %evl)
648 ret <vscale x 1 x i16> %u
651 define <vscale x 1 x i16> @vmacc_vv_nxv1i16_unmasked(<vscale x 1 x i16> %a, <vscale x 1 x i16> %b, <vscale x 1 x i16> %c, <vscale x 1 x i1> %m, i32 zeroext %evl) {
652 ; CHECK-LABEL: vmacc_vv_nxv1i16_unmasked:
654 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, ma
655 ; CHECK-NEXT: vmacc.vv v10, v8, v9
656 ; CHECK-NEXT: vmv1r.v v8, v10
658 %x = call <vscale x 1 x i16> @llvm.vp.mul.nxv1i16(<vscale x 1 x i16> %a, <vscale x 1 x i16> %b, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
659 %y = call <vscale x 1 x i16> @llvm.vp.add.nxv1i16(<vscale x 1 x i16> %x, <vscale x 1 x i16> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
660 %u = call <vscale x 1 x i16> @llvm.vp.merge.nxv1i16(<vscale x 1 x i1> splat (i1 -1), <vscale x 1 x i16> %y, <vscale x 1 x i16> %c, i32 %evl)
661 ret <vscale x 1 x i16> %u
664 define <vscale x 1 x i16> @vmacc_vx_nxv1i16(<vscale x 1 x i16> %a, i16 %b, <vscale x 1 x i16> %c, <vscale x 1 x i1> %m, i32 zeroext %evl) {
665 ; CHECK-LABEL: vmacc_vx_nxv1i16:
667 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, tu, mu
668 ; CHECK-NEXT: vmacc.vx v9, a0, v8, v0.t
669 ; CHECK-NEXT: vmv1r.v v8, v9
671 %elt.head = insertelement <vscale x 1 x i16> poison, i16 %b, i32 0
672 %vb = shufflevector <vscale x 1 x i16> %elt.head, <vscale x 1 x i16> poison, <vscale x 1 x i32> zeroinitializer
673 %x = call <vscale x 1 x i16> @llvm.vp.mul.nxv1i16(<vscale x 1 x i16> %a, <vscale x 1 x i16> %vb, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
674 %y = call <vscale x 1 x i16> @llvm.vp.add.nxv1i16(<vscale x 1 x i16> %x, <vscale x 1 x i16> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
675 %u = call <vscale x 1 x i16> @llvm.vp.merge.nxv1i16(<vscale x 1 x i1> %m, <vscale x 1 x i16> %y, <vscale x 1 x i16> %c, i32 %evl)
676 ret <vscale x 1 x i16> %u
679 define <vscale x 1 x i16> @vmacc_vx_nxv1i16_unmasked(<vscale x 1 x i16> %a, i16 %b, <vscale x 1 x i16> %c, <vscale x 1 x i1> %m, i32 zeroext %evl) {
680 ; CHECK-LABEL: vmacc_vx_nxv1i16_unmasked:
682 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, tu, ma
683 ; CHECK-NEXT: vmacc.vx v9, a0, v8
684 ; CHECK-NEXT: vmv1r.v v8, v9
686 %elt.head = insertelement <vscale x 1 x i16> poison, i16 %b, i32 0
687 %vb = shufflevector <vscale x 1 x i16> %elt.head, <vscale x 1 x i16> poison, <vscale x 1 x i32> zeroinitializer
688 %x = call <vscale x 1 x i16> @llvm.vp.mul.nxv1i16(<vscale x 1 x i16> %a, <vscale x 1 x i16> %vb, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
689 %y = call <vscale x 1 x i16> @llvm.vp.add.nxv1i16(<vscale x 1 x i16> %x, <vscale x 1 x i16> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
690 %u = call <vscale x 1 x i16> @llvm.vp.merge.nxv1i16(<vscale x 1 x i1> splat (i1 -1), <vscale x 1 x i16> %y, <vscale x 1 x i16> %c, i32 %evl)
691 ret <vscale x 1 x i16> %u
694 define <vscale x 1 x i16> @vmacc_vv_nxv1i16_ta(<vscale x 1 x i16> %a, <vscale x 1 x i16> %b, <vscale x 1 x i16> %c, <vscale x 1 x i1> %m, i32 zeroext %evl) {
695 ; CHECK-LABEL: vmacc_vv_nxv1i16_ta:
697 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu
698 ; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t
699 ; CHECK-NEXT: vmv1r.v v8, v10
701 %x = call <vscale x 1 x i16> @llvm.vp.mul.nxv1i16(<vscale x 1 x i16> %a, <vscale x 1 x i16> %b, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
702 %y = call <vscale x 1 x i16> @llvm.vp.add.nxv1i16(<vscale x 1 x i16> %x, <vscale x 1 x i16> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
703 %u = call <vscale x 1 x i16> @llvm.vp.select.nxv1i16(<vscale x 1 x i1> %m, <vscale x 1 x i16> %y, <vscale x 1 x i16> %c, i32 %evl)
704 ret <vscale x 1 x i16> %u
707 define <vscale x 1 x i16> @vmacc_vx_nxv1i16_ta(<vscale x 1 x i16> %a, i16 %b, <vscale x 1 x i16> %c, <vscale x 1 x i1> %m, i32 zeroext %evl) {
708 ; CHECK-LABEL: vmacc_vx_nxv1i16_ta:
710 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu
711 ; CHECK-NEXT: vmacc.vx v9, a0, v8, v0.t
712 ; CHECK-NEXT: vmv1r.v v8, v9
714 %elt.head = insertelement <vscale x 1 x i16> poison, i16 %b, i32 0
715 %vb = shufflevector <vscale x 1 x i16> %elt.head, <vscale x 1 x i16> poison, <vscale x 1 x i32> zeroinitializer
716 %x = call <vscale x 1 x i16> @llvm.vp.mul.nxv1i16(<vscale x 1 x i16> %a, <vscale x 1 x i16> %vb, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
717 %y = call <vscale x 1 x i16> @llvm.vp.add.nxv1i16(<vscale x 1 x i16> %x, <vscale x 1 x i16> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
718 %u = call <vscale x 1 x i16> @llvm.vp.select.nxv1i16(<vscale x 1 x i1> %m, <vscale x 1 x i16> %y, <vscale x 1 x i16> %c, i32 %evl)
719 ret <vscale x 1 x i16> %u
722 declare <vscale x 2 x i16> @llvm.vp.mul.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i16>, <vscale x 2 x i1>, i32)
723 declare <vscale x 2 x i16> @llvm.vp.add.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i16>, <vscale x 2 x i1>, i32)
724 declare <vscale x 2 x i16> @llvm.vp.merge.nxv2i16(<vscale x 2 x i1>, <vscale x 2 x i16>, <vscale x 2 x i16>, i32)
725 declare <vscale x 2 x i16> @llvm.vp.select.nxv2i16(<vscale x 2 x i1>, <vscale x 2 x i16>, <vscale x 2 x i16>, i32)
727 define <vscale x 2 x i16> @vmacc_vv_nxv2i16(<vscale x 2 x i16> %a, <vscale x 2 x i16> %b, <vscale x 2 x i16> %c, <vscale x 2 x i1> %m, i32 zeroext %evl) {
728 ; CHECK-LABEL: vmacc_vv_nxv2i16:
730 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu
731 ; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t
732 ; CHECK-NEXT: vmv1r.v v8, v10
734 %x = call <vscale x 2 x i16> @llvm.vp.mul.nxv2i16(<vscale x 2 x i16> %a, <vscale x 2 x i16> %b, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
735 %y = call <vscale x 2 x i16> @llvm.vp.add.nxv2i16(<vscale x 2 x i16> %x, <vscale x 2 x i16> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
736 %u = call <vscale x 2 x i16> @llvm.vp.merge.nxv2i16(<vscale x 2 x i1> %m, <vscale x 2 x i16> %y, <vscale x 2 x i16> %c, i32 %evl)
737 ret <vscale x 2 x i16> %u
740 define <vscale x 2 x i16> @vmacc_vv_nxv2i16_unmasked(<vscale x 2 x i16> %a, <vscale x 2 x i16> %b, <vscale x 2 x i16> %c, <vscale x 2 x i1> %m, i32 zeroext %evl) {
741 ; CHECK-LABEL: vmacc_vv_nxv2i16_unmasked:
743 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, ma
744 ; CHECK-NEXT: vmacc.vv v10, v8, v9
745 ; CHECK-NEXT: vmv1r.v v8, v10
747 %x = call <vscale x 2 x i16> @llvm.vp.mul.nxv2i16(<vscale x 2 x i16> %a, <vscale x 2 x i16> %b, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
748 %y = call <vscale x 2 x i16> @llvm.vp.add.nxv2i16(<vscale x 2 x i16> %x, <vscale x 2 x i16> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
749 %u = call <vscale x 2 x i16> @llvm.vp.merge.nxv2i16(<vscale x 2 x i1> splat (i1 -1), <vscale x 2 x i16> %y, <vscale x 2 x i16> %c, i32 %evl)
750 ret <vscale x 2 x i16> %u
753 define <vscale x 2 x i16> @vmacc_vx_nxv2i16(<vscale x 2 x i16> %a, i16 %b, <vscale x 2 x i16> %c, <vscale x 2 x i1> %m, i32 zeroext %evl) {
754 ; CHECK-LABEL: vmacc_vx_nxv2i16:
756 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, tu, mu
757 ; CHECK-NEXT: vmacc.vx v9, a0, v8, v0.t
758 ; CHECK-NEXT: vmv1r.v v8, v9
760 %elt.head = insertelement <vscale x 2 x i16> poison, i16 %b, i32 0
761 %vb = shufflevector <vscale x 2 x i16> %elt.head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
762 %x = call <vscale x 2 x i16> @llvm.vp.mul.nxv2i16(<vscale x 2 x i16> %a, <vscale x 2 x i16> %vb, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
763 %y = call <vscale x 2 x i16> @llvm.vp.add.nxv2i16(<vscale x 2 x i16> %x, <vscale x 2 x i16> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
764 %u = call <vscale x 2 x i16> @llvm.vp.merge.nxv2i16(<vscale x 2 x i1> %m, <vscale x 2 x i16> %y, <vscale x 2 x i16> %c, i32 %evl)
765 ret <vscale x 2 x i16> %u
768 define <vscale x 2 x i16> @vmacc_vx_nxv2i16_unmasked(<vscale x 2 x i16> %a, i16 %b, <vscale x 2 x i16> %c, <vscale x 2 x i1> %m, i32 zeroext %evl) {
769 ; CHECK-LABEL: vmacc_vx_nxv2i16_unmasked:
771 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, tu, ma
772 ; CHECK-NEXT: vmacc.vx v9, a0, v8
773 ; CHECK-NEXT: vmv1r.v v8, v9
775 %elt.head = insertelement <vscale x 2 x i16> poison, i16 %b, i32 0
776 %vb = shufflevector <vscale x 2 x i16> %elt.head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
777 %x = call <vscale x 2 x i16> @llvm.vp.mul.nxv2i16(<vscale x 2 x i16> %a, <vscale x 2 x i16> %vb, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
778 %y = call <vscale x 2 x i16> @llvm.vp.add.nxv2i16(<vscale x 2 x i16> %x, <vscale x 2 x i16> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
779 %u = call <vscale x 2 x i16> @llvm.vp.merge.nxv2i16(<vscale x 2 x i1> splat (i1 -1), <vscale x 2 x i16> %y, <vscale x 2 x i16> %c, i32 %evl)
780 ret <vscale x 2 x i16> %u
783 define <vscale x 2 x i16> @vmacc_vv_nxv2i16_ta(<vscale x 2 x i16> %a, <vscale x 2 x i16> %b, <vscale x 2 x i16> %c, <vscale x 2 x i1> %m, i32 zeroext %evl) {
784 ; CHECK-LABEL: vmacc_vv_nxv2i16_ta:
786 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu
787 ; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t
788 ; CHECK-NEXT: vmv1r.v v8, v10
790 %x = call <vscale x 2 x i16> @llvm.vp.mul.nxv2i16(<vscale x 2 x i16> %a, <vscale x 2 x i16> %b, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
791 %y = call <vscale x 2 x i16> @llvm.vp.add.nxv2i16(<vscale x 2 x i16> %x, <vscale x 2 x i16> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
792 %u = call <vscale x 2 x i16> @llvm.vp.select.nxv2i16(<vscale x 2 x i1> %m, <vscale x 2 x i16> %y, <vscale x 2 x i16> %c, i32 %evl)
793 ret <vscale x 2 x i16> %u
796 define <vscale x 2 x i16> @vmacc_vx_nxv2i16_ta(<vscale x 2 x i16> %a, i16 %b, <vscale x 2 x i16> %c, <vscale x 2 x i1> %m, i32 zeroext %evl) {
797 ; CHECK-LABEL: vmacc_vx_nxv2i16_ta:
799 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu
800 ; CHECK-NEXT: vmacc.vx v9, a0, v8, v0.t
801 ; CHECK-NEXT: vmv1r.v v8, v9
803 %elt.head = insertelement <vscale x 2 x i16> poison, i16 %b, i32 0
804 %vb = shufflevector <vscale x 2 x i16> %elt.head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
805 %x = call <vscale x 2 x i16> @llvm.vp.mul.nxv2i16(<vscale x 2 x i16> %a, <vscale x 2 x i16> %vb, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
806 %y = call <vscale x 2 x i16> @llvm.vp.add.nxv2i16(<vscale x 2 x i16> %x, <vscale x 2 x i16> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
807 %u = call <vscale x 2 x i16> @llvm.vp.select.nxv2i16(<vscale x 2 x i1> %m, <vscale x 2 x i16> %y, <vscale x 2 x i16> %c, i32 %evl)
808 ret <vscale x 2 x i16> %u
811 declare <vscale x 4 x i16> @llvm.vp.mul.nxv4i16(<vscale x 4 x i16>, <vscale x 4 x i16>, <vscale x 4 x i1>, i32)
812 declare <vscale x 4 x i16> @llvm.vp.add.nxv4i16(<vscale x 4 x i16>, <vscale x 4 x i16>, <vscale x 4 x i1>, i32)
813 declare <vscale x 4 x i16> @llvm.vp.merge.nxv4i16(<vscale x 4 x i1>, <vscale x 4 x i16>, <vscale x 4 x i16>, i32)
814 declare <vscale x 4 x i16> @llvm.vp.select.nxv4i16(<vscale x 4 x i1>, <vscale x 4 x i16>, <vscale x 4 x i16>, i32)
816 define <vscale x 4 x i16> @vmacc_vv_nxv4i16(<vscale x 4 x i16> %a, <vscale x 4 x i16> %b, <vscale x 4 x i16> %c, <vscale x 4 x i1> %m, i32 zeroext %evl) {
817 ; CHECK-LABEL: vmacc_vv_nxv4i16:
819 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu
820 ; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t
821 ; CHECK-NEXT: vmv1r.v v8, v10
823 %x = call <vscale x 4 x i16> @llvm.vp.mul.nxv4i16(<vscale x 4 x i16> %a, <vscale x 4 x i16> %b, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
824 %y = call <vscale x 4 x i16> @llvm.vp.add.nxv4i16(<vscale x 4 x i16> %x, <vscale x 4 x i16> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
825 %u = call <vscale x 4 x i16> @llvm.vp.merge.nxv4i16(<vscale x 4 x i1> %m, <vscale x 4 x i16> %y, <vscale x 4 x i16> %c, i32 %evl)
826 ret <vscale x 4 x i16> %u
829 define <vscale x 4 x i16> @vmacc_vv_nxv4i16_unmasked(<vscale x 4 x i16> %a, <vscale x 4 x i16> %b, <vscale x 4 x i16> %c, <vscale x 4 x i1> %m, i32 zeroext %evl) {
830 ; CHECK-LABEL: vmacc_vv_nxv4i16_unmasked:
832 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma
833 ; CHECK-NEXT: vmacc.vv v10, v8, v9
834 ; CHECK-NEXT: vmv1r.v v8, v10
836 %x = call <vscale x 4 x i16> @llvm.vp.mul.nxv4i16(<vscale x 4 x i16> %a, <vscale x 4 x i16> %b, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
837 %y = call <vscale x 4 x i16> @llvm.vp.add.nxv4i16(<vscale x 4 x i16> %x, <vscale x 4 x i16> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
838 %u = call <vscale x 4 x i16> @llvm.vp.merge.nxv4i16(<vscale x 4 x i1> splat (i1 -1), <vscale x 4 x i16> %y, <vscale x 4 x i16> %c, i32 %evl)
839 ret <vscale x 4 x i16> %u
842 define <vscale x 4 x i16> @vmacc_vx_nxv4i16(<vscale x 4 x i16> %a, i16 %b, <vscale x 4 x i16> %c, <vscale x 4 x i1> %m, i32 zeroext %evl) {
843 ; CHECK-LABEL: vmacc_vx_nxv4i16:
845 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, mu
846 ; CHECK-NEXT: vmacc.vx v9, a0, v8, v0.t
847 ; CHECK-NEXT: vmv1r.v v8, v9
849 %elt.head = insertelement <vscale x 4 x i16> poison, i16 %b, i32 0
850 %vb = shufflevector <vscale x 4 x i16> %elt.head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
851 %x = call <vscale x 4 x i16> @llvm.vp.mul.nxv4i16(<vscale x 4 x i16> %a, <vscale x 4 x i16> %vb, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
852 %y = call <vscale x 4 x i16> @llvm.vp.add.nxv4i16(<vscale x 4 x i16> %x, <vscale x 4 x i16> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
853 %u = call <vscale x 4 x i16> @llvm.vp.merge.nxv4i16(<vscale x 4 x i1> %m, <vscale x 4 x i16> %y, <vscale x 4 x i16> %c, i32 %evl)
854 ret <vscale x 4 x i16> %u
857 define <vscale x 4 x i16> @vmacc_vx_nxv4i16_unmasked(<vscale x 4 x i16> %a, i16 %b, <vscale x 4 x i16> %c, <vscale x 4 x i1> %m, i32 zeroext %evl) {
858 ; CHECK-LABEL: vmacc_vx_nxv4i16_unmasked:
860 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, ma
861 ; CHECK-NEXT: vmacc.vx v9, a0, v8
862 ; CHECK-NEXT: vmv1r.v v8, v9
864 %elt.head = insertelement <vscale x 4 x i16> poison, i16 %b, i32 0
865 %vb = shufflevector <vscale x 4 x i16> %elt.head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
866 %x = call <vscale x 4 x i16> @llvm.vp.mul.nxv4i16(<vscale x 4 x i16> %a, <vscale x 4 x i16> %vb, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
867 %y = call <vscale x 4 x i16> @llvm.vp.add.nxv4i16(<vscale x 4 x i16> %x, <vscale x 4 x i16> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
868 %u = call <vscale x 4 x i16> @llvm.vp.merge.nxv4i16(<vscale x 4 x i1> splat (i1 -1), <vscale x 4 x i16> %y, <vscale x 4 x i16> %c, i32 %evl)
869 ret <vscale x 4 x i16> %u
872 define <vscale x 4 x i16> @vmacc_vv_nxv4i16_ta(<vscale x 4 x i16> %a, <vscale x 4 x i16> %b, <vscale x 4 x i16> %c, <vscale x 4 x i1> %m, i32 zeroext %evl) {
873 ; CHECK-LABEL: vmacc_vv_nxv4i16_ta:
875 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu
876 ; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t
877 ; CHECK-NEXT: vmv.v.v v8, v10
879 %x = call <vscale x 4 x i16> @llvm.vp.mul.nxv4i16(<vscale x 4 x i16> %a, <vscale x 4 x i16> %b, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
880 %y = call <vscale x 4 x i16> @llvm.vp.add.nxv4i16(<vscale x 4 x i16> %x, <vscale x 4 x i16> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
881 %u = call <vscale x 4 x i16> @llvm.vp.select.nxv4i16(<vscale x 4 x i1> %m, <vscale x 4 x i16> %y, <vscale x 4 x i16> %c, i32 %evl)
882 ret <vscale x 4 x i16> %u
885 define <vscale x 4 x i16> @vmacc_vx_nxv4i16_ta(<vscale x 4 x i16> %a, i16 %b, <vscale x 4 x i16> %c, <vscale x 4 x i1> %m, i32 zeroext %evl) {
886 ; CHECK-LABEL: vmacc_vx_nxv4i16_ta:
888 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu
889 ; CHECK-NEXT: vmacc.vx v9, a0, v8, v0.t
890 ; CHECK-NEXT: vmv.v.v v8, v9
892 %elt.head = insertelement <vscale x 4 x i16> poison, i16 %b, i32 0
893 %vb = shufflevector <vscale x 4 x i16> %elt.head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
894 %x = call <vscale x 4 x i16> @llvm.vp.mul.nxv4i16(<vscale x 4 x i16> %a, <vscale x 4 x i16> %vb, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
895 %y = call <vscale x 4 x i16> @llvm.vp.add.nxv4i16(<vscale x 4 x i16> %x, <vscale x 4 x i16> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
896 %u = call <vscale x 4 x i16> @llvm.vp.select.nxv4i16(<vscale x 4 x i1> %m, <vscale x 4 x i16> %y, <vscale x 4 x i16> %c, i32 %evl)
897 ret <vscale x 4 x i16> %u
900 declare <vscale x 8 x i16> @llvm.vp.mul.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i1>, i32)
901 declare <vscale x 8 x i16> @llvm.vp.add.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i1>, i32)
902 declare <vscale x 8 x i16> @llvm.vp.merge.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>, i32)
903 declare <vscale x 8 x i16> @llvm.vp.select.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>, i32)
905 define <vscale x 8 x i16> @vmacc_vv_nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c, <vscale x 8 x i1> %m, i32 zeroext %evl) {
906 ; CHECK-LABEL: vmacc_vv_nxv8i16:
908 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu
909 ; CHECK-NEXT: vmacc.vv v12, v8, v10, v0.t
910 ; CHECK-NEXT: vmv2r.v v8, v12
912 %x = call <vscale x 8 x i16> @llvm.vp.mul.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
913 %y = call <vscale x 8 x i16> @llvm.vp.add.nxv8i16(<vscale x 8 x i16> %x, <vscale x 8 x i16> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
914 %u = call <vscale x 8 x i16> @llvm.vp.merge.nxv8i16(<vscale x 8 x i1> %m, <vscale x 8 x i16> %y, <vscale x 8 x i16> %c, i32 %evl)
915 ret <vscale x 8 x i16> %u
918 define <vscale x 8 x i16> @vmacc_vv_nxv8i16_unmasked(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c, <vscale x 8 x i1> %m, i32 zeroext %evl) {
919 ; CHECK-LABEL: vmacc_vv_nxv8i16_unmasked:
921 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, ma
922 ; CHECK-NEXT: vmacc.vv v12, v8, v10
923 ; CHECK-NEXT: vmv2r.v v8, v12
925 %x = call <vscale x 8 x i16> @llvm.vp.mul.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
926 %y = call <vscale x 8 x i16> @llvm.vp.add.nxv8i16(<vscale x 8 x i16> %x, <vscale x 8 x i16> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
927 %u = call <vscale x 8 x i16> @llvm.vp.merge.nxv8i16(<vscale x 8 x i1> splat (i1 -1), <vscale x 8 x i16> %y, <vscale x 8 x i16> %c, i32 %evl)
928 ret <vscale x 8 x i16> %u
931 define <vscale x 8 x i16> @vmacc_vx_nxv8i16(<vscale x 8 x i16> %a, i16 %b, <vscale x 8 x i16> %c, <vscale x 8 x i1> %m, i32 zeroext %evl) {
932 ; CHECK-LABEL: vmacc_vx_nxv8i16:
934 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, tu, mu
935 ; CHECK-NEXT: vmacc.vx v10, a0, v8, v0.t
936 ; CHECK-NEXT: vmv2r.v v8, v10
938 %elt.head = insertelement <vscale x 8 x i16> poison, i16 %b, i32 0
939 %vb = shufflevector <vscale x 8 x i16> %elt.head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
940 %x = call <vscale x 8 x i16> @llvm.vp.mul.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %vb, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
941 %y = call <vscale x 8 x i16> @llvm.vp.add.nxv8i16(<vscale x 8 x i16> %x, <vscale x 8 x i16> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
942 %u = call <vscale x 8 x i16> @llvm.vp.merge.nxv8i16(<vscale x 8 x i1> %m, <vscale x 8 x i16> %y, <vscale x 8 x i16> %c, i32 %evl)
943 ret <vscale x 8 x i16> %u
946 define <vscale x 8 x i16> @vmacc_vx_nxv8i16_unmasked(<vscale x 8 x i16> %a, i16 %b, <vscale x 8 x i16> %c, <vscale x 8 x i1> %m, i32 zeroext %evl) {
947 ; CHECK-LABEL: vmacc_vx_nxv8i16_unmasked:
949 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, tu, ma
950 ; CHECK-NEXT: vmacc.vx v10, a0, v8
951 ; CHECK-NEXT: vmv2r.v v8, v10
953 %elt.head = insertelement <vscale x 8 x i16> poison, i16 %b, i32 0
954 %vb = shufflevector <vscale x 8 x i16> %elt.head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
955 %x = call <vscale x 8 x i16> @llvm.vp.mul.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %vb, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
956 %y = call <vscale x 8 x i16> @llvm.vp.add.nxv8i16(<vscale x 8 x i16> %x, <vscale x 8 x i16> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
957 %u = call <vscale x 8 x i16> @llvm.vp.merge.nxv8i16(<vscale x 8 x i1> splat (i1 -1), <vscale x 8 x i16> %y, <vscale x 8 x i16> %c, i32 %evl)
958 ret <vscale x 8 x i16> %u
961 define <vscale x 8 x i16> @vmacc_vv_nxv8i16_ta(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c, <vscale x 8 x i1> %m, i32 zeroext %evl) {
962 ; CHECK-LABEL: vmacc_vv_nxv8i16_ta:
964 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu
965 ; CHECK-NEXT: vmacc.vv v12, v8, v10, v0.t
966 ; CHECK-NEXT: vmv.v.v v8, v12
968 %x = call <vscale x 8 x i16> @llvm.vp.mul.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
969 %y = call <vscale x 8 x i16> @llvm.vp.add.nxv8i16(<vscale x 8 x i16> %x, <vscale x 8 x i16> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
970 %u = call <vscale x 8 x i16> @llvm.vp.select.nxv8i16(<vscale x 8 x i1> %m, <vscale x 8 x i16> %y, <vscale x 8 x i16> %c, i32 %evl)
971 ret <vscale x 8 x i16> %u
974 define <vscale x 8 x i16> @vmacc_vx_nxv8i16_ta(<vscale x 8 x i16> %a, i16 %b, <vscale x 8 x i16> %c, <vscale x 8 x i1> %m, i32 zeroext %evl) {
975 ; CHECK-LABEL: vmacc_vx_nxv8i16_ta:
977 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu
978 ; CHECK-NEXT: vmacc.vx v10, a0, v8, v0.t
979 ; CHECK-NEXT: vmv.v.v v8, v10
981 %elt.head = insertelement <vscale x 8 x i16> poison, i16 %b, i32 0
982 %vb = shufflevector <vscale x 8 x i16> %elt.head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
983 %x = call <vscale x 8 x i16> @llvm.vp.mul.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %vb, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
984 %y = call <vscale x 8 x i16> @llvm.vp.add.nxv8i16(<vscale x 8 x i16> %x, <vscale x 8 x i16> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
985 %u = call <vscale x 8 x i16> @llvm.vp.select.nxv8i16(<vscale x 8 x i1> %m, <vscale x 8 x i16> %y, <vscale x 8 x i16> %c, i32 %evl)
986 ret <vscale x 8 x i16> %u
989 declare <vscale x 16 x i16> @llvm.vp.mul.nxv16i16(<vscale x 16 x i16>, <vscale x 16 x i16>, <vscale x 16 x i1>, i32)
990 declare <vscale x 16 x i16> @llvm.vp.add.nxv16i16(<vscale x 16 x i16>, <vscale x 16 x i16>, <vscale x 16 x i1>, i32)
991 declare <vscale x 16 x i16> @llvm.vp.merge.nxv16i16(<vscale x 16 x i1>, <vscale x 16 x i16>, <vscale x 16 x i16>, i32)
992 declare <vscale x 16 x i16> @llvm.vp.select.nxv16i16(<vscale x 16 x i1>, <vscale x 16 x i16>, <vscale x 16 x i16>, i32)
994 define <vscale x 16 x i16> @vmacc_vv_nxv16i16(<vscale x 16 x i16> %a, <vscale x 16 x i16> %b, <vscale x 16 x i16> %c, <vscale x 16 x i1> %m, i32 zeroext %evl) {
995 ; CHECK-LABEL: vmacc_vv_nxv16i16:
997 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu
998 ; CHECK-NEXT: vmacc.vv v16, v8, v12, v0.t
999 ; CHECK-NEXT: vmv4r.v v8, v16
1001 %x = call <vscale x 16 x i16> @llvm.vp.mul.nxv16i16(<vscale x 16 x i16> %a, <vscale x 16 x i16> %b, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
1002 %y = call <vscale x 16 x i16> @llvm.vp.add.nxv16i16(<vscale x 16 x i16> %x, <vscale x 16 x i16> %c, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
1003 %u = call <vscale x 16 x i16> @llvm.vp.merge.nxv16i16(<vscale x 16 x i1> %m, <vscale x 16 x i16> %y, <vscale x 16 x i16> %c, i32 %evl)
1004 ret <vscale x 16 x i16> %u
1007 define <vscale x 16 x i16> @vmacc_vv_nxv16i16_unmasked(<vscale x 16 x i16> %a, <vscale x 16 x i16> %b, <vscale x 16 x i16> %c, <vscale x 16 x i1> %m, i32 zeroext %evl) {
1008 ; CHECK-LABEL: vmacc_vv_nxv16i16_unmasked:
1010 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, ma
1011 ; CHECK-NEXT: vmacc.vv v16, v8, v12
1012 ; CHECK-NEXT: vmv4r.v v8, v16
1014 %x = call <vscale x 16 x i16> @llvm.vp.mul.nxv16i16(<vscale x 16 x i16> %a, <vscale x 16 x i16> %b, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
1015 %y = call <vscale x 16 x i16> @llvm.vp.add.nxv16i16(<vscale x 16 x i16> %x, <vscale x 16 x i16> %c, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
1016 %u = call <vscale x 16 x i16> @llvm.vp.merge.nxv16i16(<vscale x 16 x i1> splat (i1 -1), <vscale x 16 x i16> %y, <vscale x 16 x i16> %c, i32 %evl)
1017 ret <vscale x 16 x i16> %u
1020 define <vscale x 16 x i16> @vmacc_vx_nxv16i16(<vscale x 16 x i16> %a, i16 %b, <vscale x 16 x i16> %c, <vscale x 16 x i1> %m, i32 zeroext %evl) {
1021 ; CHECK-LABEL: vmacc_vx_nxv16i16:
1023 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, tu, mu
1024 ; CHECK-NEXT: vmacc.vx v12, a0, v8, v0.t
1025 ; CHECK-NEXT: vmv4r.v v8, v12
1027 %elt.head = insertelement <vscale x 16 x i16> poison, i16 %b, i32 0
1028 %vb = shufflevector <vscale x 16 x i16> %elt.head, <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer
1029 %x = call <vscale x 16 x i16> @llvm.vp.mul.nxv16i16(<vscale x 16 x i16> %a, <vscale x 16 x i16> %vb, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
1030 %y = call <vscale x 16 x i16> @llvm.vp.add.nxv16i16(<vscale x 16 x i16> %x, <vscale x 16 x i16> %c, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
1031 %u = call <vscale x 16 x i16> @llvm.vp.merge.nxv16i16(<vscale x 16 x i1> %m, <vscale x 16 x i16> %y, <vscale x 16 x i16> %c, i32 %evl)
1032 ret <vscale x 16 x i16> %u
1035 define <vscale x 16 x i16> @vmacc_vx_nxv16i16_unmasked(<vscale x 16 x i16> %a, i16 %b, <vscale x 16 x i16> %c, <vscale x 16 x i1> %m, i32 zeroext %evl) {
1036 ; CHECK-LABEL: vmacc_vx_nxv16i16_unmasked:
1038 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, tu, ma
1039 ; CHECK-NEXT: vmacc.vx v12, a0, v8
1040 ; CHECK-NEXT: vmv4r.v v8, v12
1042 %elt.head = insertelement <vscale x 16 x i16> poison, i16 %b, i32 0
1043 %vb = shufflevector <vscale x 16 x i16> %elt.head, <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer
1044 %x = call <vscale x 16 x i16> @llvm.vp.mul.nxv16i16(<vscale x 16 x i16> %a, <vscale x 16 x i16> %vb, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
1045 %y = call <vscale x 16 x i16> @llvm.vp.add.nxv16i16(<vscale x 16 x i16> %x, <vscale x 16 x i16> %c, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
1046 %u = call <vscale x 16 x i16> @llvm.vp.merge.nxv16i16(<vscale x 16 x i1> splat (i1 -1), <vscale x 16 x i16> %y, <vscale x 16 x i16> %c, i32 %evl)
1047 ret <vscale x 16 x i16> %u
1050 define <vscale x 16 x i16> @vmacc_vv_nxv16i16_ta(<vscale x 16 x i16> %a, <vscale x 16 x i16> %b, <vscale x 16 x i16> %c, <vscale x 16 x i1> %m, i32 zeroext %evl) {
1051 ; CHECK-LABEL: vmacc_vv_nxv16i16_ta:
1053 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu
1054 ; CHECK-NEXT: vmacc.vv v16, v8, v12, v0.t
1055 ; CHECK-NEXT: vmv.v.v v8, v16
1057 %x = call <vscale x 16 x i16> @llvm.vp.mul.nxv16i16(<vscale x 16 x i16> %a, <vscale x 16 x i16> %b, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
1058 %y = call <vscale x 16 x i16> @llvm.vp.add.nxv16i16(<vscale x 16 x i16> %x, <vscale x 16 x i16> %c, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
1059 %u = call <vscale x 16 x i16> @llvm.vp.select.nxv16i16(<vscale x 16 x i1> %m, <vscale x 16 x i16> %y, <vscale x 16 x i16> %c, i32 %evl)
1060 ret <vscale x 16 x i16> %u
1063 define <vscale x 16 x i16> @vmacc_vx_nxv16i16_ta(<vscale x 16 x i16> %a, i16 %b, <vscale x 16 x i16> %c, <vscale x 16 x i1> %m, i32 zeroext %evl) {
1064 ; CHECK-LABEL: vmacc_vx_nxv16i16_ta:
1066 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu
1067 ; CHECK-NEXT: vmacc.vx v12, a0, v8, v0.t
1068 ; CHECK-NEXT: vmv.v.v v8, v12
1070 %elt.head = insertelement <vscale x 16 x i16> poison, i16 %b, i32 0
1071 %vb = shufflevector <vscale x 16 x i16> %elt.head, <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer
1072 %x = call <vscale x 16 x i16> @llvm.vp.mul.nxv16i16(<vscale x 16 x i16> %a, <vscale x 16 x i16> %vb, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
1073 %y = call <vscale x 16 x i16> @llvm.vp.add.nxv16i16(<vscale x 16 x i16> %x, <vscale x 16 x i16> %c, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
1074 %u = call <vscale x 16 x i16> @llvm.vp.select.nxv16i16(<vscale x 16 x i1> %m, <vscale x 16 x i16> %y, <vscale x 16 x i16> %c, i32 %evl)
1075 ret <vscale x 16 x i16> %u
1078 declare <vscale x 32 x i16> @llvm.vp.mul.nxv32i16(<vscale x 32 x i16>, <vscale x 32 x i16>, <vscale x 32 x i1>, i32)
1079 declare <vscale x 32 x i16> @llvm.vp.add.nxv32i16(<vscale x 32 x i16>, <vscale x 32 x i16>, <vscale x 32 x i1>, i32)
1080 declare <vscale x 32 x i16> @llvm.vp.merge.nxv32i16(<vscale x 32 x i1>, <vscale x 32 x i16>, <vscale x 32 x i16>, i32)
1081 declare <vscale x 32 x i16> @llvm.vp.select.nxv32i16(<vscale x 32 x i1>, <vscale x 32 x i16>, <vscale x 32 x i16>, i32)
1083 define <vscale x 32 x i16> @vmacc_vv_nxv32i16(<vscale x 32 x i16> %a, <vscale x 32 x i16> %b, <vscale x 32 x i16> %c, <vscale x 32 x i1> %m, i32 zeroext %evl) {
1084 ; CHECK-LABEL: vmacc_vv_nxv32i16:
1086 ; CHECK-NEXT: vl8re16.v v24, (a0)
1087 ; CHECK-NEXT: vsetvli zero, a1, e16, m8, tu, mu
1088 ; CHECK-NEXT: vmacc.vv v24, v8, v16, v0.t
1089 ; CHECK-NEXT: vmv8r.v v8, v24
1091 %x = call <vscale x 32 x i16> @llvm.vp.mul.nxv32i16(<vscale x 32 x i16> %a, <vscale x 32 x i16> %b, <vscale x 32 x i1> splat (i1 -1), i32 %evl)
1092 %y = call <vscale x 32 x i16> @llvm.vp.add.nxv32i16(<vscale x 32 x i16> %x, <vscale x 32 x i16> %c, <vscale x 32 x i1> splat (i1 -1), i32 %evl)
1093 %u = call <vscale x 32 x i16> @llvm.vp.merge.nxv32i16(<vscale x 32 x i1> %m, <vscale x 32 x i16> %y, <vscale x 32 x i16> %c, i32 %evl)
1094 ret <vscale x 32 x i16> %u
1097 define <vscale x 32 x i16> @vmacc_vv_nxv32i16_unmasked(<vscale x 32 x i16> %a, <vscale x 32 x i16> %b, <vscale x 32 x i16> %c, <vscale x 32 x i1> %m, i32 zeroext %evl) {
1098 ; CHECK-LABEL: vmacc_vv_nxv32i16_unmasked:
1100 ; CHECK-NEXT: vl8re16.v v24, (a0)
1101 ; CHECK-NEXT: vsetvli zero, a1, e16, m8, tu, ma
1102 ; CHECK-NEXT: vmacc.vv v24, v8, v16
1103 ; CHECK-NEXT: vmv8r.v v8, v24
1105 %x = call <vscale x 32 x i16> @llvm.vp.mul.nxv32i16(<vscale x 32 x i16> %a, <vscale x 32 x i16> %b, <vscale x 32 x i1> splat (i1 -1), i32 %evl)
1106 %y = call <vscale x 32 x i16> @llvm.vp.add.nxv32i16(<vscale x 32 x i16> %x, <vscale x 32 x i16> %c, <vscale x 32 x i1> splat (i1 -1), i32 %evl)
1107 %u = call <vscale x 32 x i16> @llvm.vp.merge.nxv32i16(<vscale x 32 x i1> splat (i1 -1), <vscale x 32 x i16> %y, <vscale x 32 x i16> %c, i32 %evl)
1108 ret <vscale x 32 x i16> %u
1111 define <vscale x 32 x i16> @vmacc_vx_nxv32i16(<vscale x 32 x i16> %a, i16 %b, <vscale x 32 x i16> %c, <vscale x 32 x i1> %m, i32 zeroext %evl) {
1112 ; CHECK-LABEL: vmacc_vx_nxv32i16:
1114 ; CHECK-NEXT: vsetvli zero, a1, e16, m8, tu, mu
1115 ; CHECK-NEXT: vmacc.vx v16, a0, v8, v0.t
1116 ; CHECK-NEXT: vmv8r.v v8, v16
1118 %elt.head = insertelement <vscale x 32 x i16> poison, i16 %b, i32 0
1119 %vb = shufflevector <vscale x 32 x i16> %elt.head, <vscale x 32 x i16> poison, <vscale x 32 x i32> zeroinitializer
1120 %x = call <vscale x 32 x i16> @llvm.vp.mul.nxv32i16(<vscale x 32 x i16> %a, <vscale x 32 x i16> %vb, <vscale x 32 x i1> splat (i1 -1), i32 %evl)
1121 %y = call <vscale x 32 x i16> @llvm.vp.add.nxv32i16(<vscale x 32 x i16> %x, <vscale x 32 x i16> %c, <vscale x 32 x i1> splat (i1 -1), i32 %evl)
1122 %u = call <vscale x 32 x i16> @llvm.vp.merge.nxv32i16(<vscale x 32 x i1> %m, <vscale x 32 x i16> %y, <vscale x 32 x i16> %c, i32 %evl)
1123 ret <vscale x 32 x i16> %u
1126 define <vscale x 32 x i16> @vmacc_vx_nxv32i16_unmasked(<vscale x 32 x i16> %a, i16 %b, <vscale x 32 x i16> %c, <vscale x 32 x i1> %m, i32 zeroext %evl) {
1127 ; CHECK-LABEL: vmacc_vx_nxv32i16_unmasked:
1129 ; CHECK-NEXT: vsetvli zero, a1, e16, m8, tu, ma
1130 ; CHECK-NEXT: vmacc.vx v16, a0, v8
1131 ; CHECK-NEXT: vmv8r.v v8, v16
1133 %elt.head = insertelement <vscale x 32 x i16> poison, i16 %b, i32 0
1134 %vb = shufflevector <vscale x 32 x i16> %elt.head, <vscale x 32 x i16> poison, <vscale x 32 x i32> zeroinitializer
1135 %x = call <vscale x 32 x i16> @llvm.vp.mul.nxv32i16(<vscale x 32 x i16> %a, <vscale x 32 x i16> %vb, <vscale x 32 x i1> splat (i1 -1), i32 %evl)
1136 %y = call <vscale x 32 x i16> @llvm.vp.add.nxv32i16(<vscale x 32 x i16> %x, <vscale x 32 x i16> %c, <vscale x 32 x i1> splat (i1 -1), i32 %evl)
1137 %u = call <vscale x 32 x i16> @llvm.vp.merge.nxv32i16(<vscale x 32 x i1> splat (i1 -1), <vscale x 32 x i16> %y, <vscale x 32 x i16> %c, i32 %evl)
1138 ret <vscale x 32 x i16> %u
1141 define <vscale x 32 x i16> @vmacc_vv_nxv32i16_ta(<vscale x 32 x i16> %a, <vscale x 32 x i16> %b, <vscale x 32 x i16> %c, <vscale x 32 x i1> %m, i32 zeroext %evl) {
1142 ; CHECK-LABEL: vmacc_vv_nxv32i16_ta:
1144 ; CHECK-NEXT: vl8re16.v v24, (a0)
1145 ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu
1146 ; CHECK-NEXT: vmacc.vv v24, v8, v16, v0.t
1147 ; CHECK-NEXT: vmv.v.v v8, v24
1149 %x = call <vscale x 32 x i16> @llvm.vp.mul.nxv32i16(<vscale x 32 x i16> %a, <vscale x 32 x i16> %b, <vscale x 32 x i1> splat (i1 -1), i32 %evl)
1150 %y = call <vscale x 32 x i16> @llvm.vp.add.nxv32i16(<vscale x 32 x i16> %x, <vscale x 32 x i16> %c, <vscale x 32 x i1> splat (i1 -1), i32 %evl)
1151 %u = call <vscale x 32 x i16> @llvm.vp.select.nxv32i16(<vscale x 32 x i1> %m, <vscale x 32 x i16> %y, <vscale x 32 x i16> %c, i32 %evl)
1152 ret <vscale x 32 x i16> %u
1155 define <vscale x 32 x i16> @vmacc_vx_nxv32i16_ta(<vscale x 32 x i16> %a, i16 %b, <vscale x 32 x i16> %c, <vscale x 32 x i1> %m, i32 zeroext %evl) {
1156 ; CHECK-LABEL: vmacc_vx_nxv32i16_ta:
1158 ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu
1159 ; CHECK-NEXT: vmacc.vx v16, a0, v8, v0.t
1160 ; CHECK-NEXT: vmv.v.v v8, v16
1162 %elt.head = insertelement <vscale x 32 x i16> poison, i16 %b, i32 0
1163 %vb = shufflevector <vscale x 32 x i16> %elt.head, <vscale x 32 x i16> poison, <vscale x 32 x i32> zeroinitializer
1164 %x = call <vscale x 32 x i16> @llvm.vp.mul.nxv32i16(<vscale x 32 x i16> %a, <vscale x 32 x i16> %vb, <vscale x 32 x i1> splat (i1 -1), i32 %evl)
1165 %y = call <vscale x 32 x i16> @llvm.vp.add.nxv32i16(<vscale x 32 x i16> %x, <vscale x 32 x i16> %c, <vscale x 32 x i1> splat (i1 -1), i32 %evl)
1166 %u = call <vscale x 32 x i16> @llvm.vp.select.nxv32i16(<vscale x 32 x i1> %m, <vscale x 32 x i16> %y, <vscale x 32 x i16> %c, i32 %evl)
1167 ret <vscale x 32 x i16> %u
1170 declare <vscale x 1 x i32> @llvm.vp.mul.nxv1i32(<vscale x 1 x i32>, <vscale x 1 x i32>, <vscale x 1 x i1>, i32)
1171 declare <vscale x 1 x i32> @llvm.vp.add.nxv1i32(<vscale x 1 x i32>, <vscale x 1 x i32>, <vscale x 1 x i1>, i32)
1172 declare <vscale x 1 x i32> @llvm.vp.merge.nxv1i32(<vscale x 1 x i1>, <vscale x 1 x i32>, <vscale x 1 x i32>, i32)
1173 declare <vscale x 1 x i32> @llvm.vp.select.nxv1i32(<vscale x 1 x i1>, <vscale x 1 x i32>, <vscale x 1 x i32>, i32)
1175 define <vscale x 1 x i32> @vmacc_vv_nxv1i32(<vscale x 1 x i32> %a, <vscale x 1 x i32> %b, <vscale x 1 x i32> %c, <vscale x 1 x i1> %m, i32 zeroext %evl) {
1176 ; CHECK-LABEL: vmacc_vv_nxv1i32:
1178 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu
1179 ; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t
1180 ; CHECK-NEXT: vmv1r.v v8, v10
1182 %x = call <vscale x 1 x i32> @llvm.vp.mul.nxv1i32(<vscale x 1 x i32> %a, <vscale x 1 x i32> %b, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
1183 %y = call <vscale x 1 x i32> @llvm.vp.add.nxv1i32(<vscale x 1 x i32> %x, <vscale x 1 x i32> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
1184 %u = call <vscale x 1 x i32> @llvm.vp.merge.nxv1i32(<vscale x 1 x i1> %m, <vscale x 1 x i32> %y, <vscale x 1 x i32> %c, i32 %evl)
1185 ret <vscale x 1 x i32> %u
1188 define <vscale x 1 x i32> @vmacc_vv_nxv1i32_unmasked(<vscale x 1 x i32> %a, <vscale x 1 x i32> %b, <vscale x 1 x i32> %c, <vscale x 1 x i1> %m, i32 zeroext %evl) {
1189 ; CHECK-LABEL: vmacc_vv_nxv1i32_unmasked:
1191 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, ma
1192 ; CHECK-NEXT: vmacc.vv v10, v8, v9
1193 ; CHECK-NEXT: vmv1r.v v8, v10
1195 %x = call <vscale x 1 x i32> @llvm.vp.mul.nxv1i32(<vscale x 1 x i32> %a, <vscale x 1 x i32> %b, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
1196 %y = call <vscale x 1 x i32> @llvm.vp.add.nxv1i32(<vscale x 1 x i32> %x, <vscale x 1 x i32> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
1197 %u = call <vscale x 1 x i32> @llvm.vp.merge.nxv1i32(<vscale x 1 x i1> splat (i1 -1), <vscale x 1 x i32> %y, <vscale x 1 x i32> %c, i32 %evl)
1198 ret <vscale x 1 x i32> %u
1201 define <vscale x 1 x i32> @vmacc_vx_nxv1i32(<vscale x 1 x i32> %a, i32 %b, <vscale x 1 x i32> %c, <vscale x 1 x i1> %m, i32 zeroext %evl) {
1202 ; CHECK-LABEL: vmacc_vx_nxv1i32:
1204 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, tu, mu
1205 ; CHECK-NEXT: vmacc.vx v9, a0, v8, v0.t
1206 ; CHECK-NEXT: vmv1r.v v8, v9
1208 %elt.head = insertelement <vscale x 1 x i32> poison, i32 %b, i32 0
1209 %vb = shufflevector <vscale x 1 x i32> %elt.head, <vscale x 1 x i32> poison, <vscale x 1 x i32> zeroinitializer
1210 %x = call <vscale x 1 x i32> @llvm.vp.mul.nxv1i32(<vscale x 1 x i32> %a, <vscale x 1 x i32> %vb, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
1211 %y = call <vscale x 1 x i32> @llvm.vp.add.nxv1i32(<vscale x 1 x i32> %x, <vscale x 1 x i32> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
1212 %u = call <vscale x 1 x i32> @llvm.vp.merge.nxv1i32(<vscale x 1 x i1> %m, <vscale x 1 x i32> %y, <vscale x 1 x i32> %c, i32 %evl)
1213 ret <vscale x 1 x i32> %u
1216 define <vscale x 1 x i32> @vmacc_vx_nxv1i32_unmasked(<vscale x 1 x i32> %a, i32 %b, <vscale x 1 x i32> %c, <vscale x 1 x i1> %m, i32 zeroext %evl) {
1217 ; CHECK-LABEL: vmacc_vx_nxv1i32_unmasked:
1219 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, tu, ma
1220 ; CHECK-NEXT: vmacc.vx v9, a0, v8
1221 ; CHECK-NEXT: vmv1r.v v8, v9
1223 %elt.head = insertelement <vscale x 1 x i32> poison, i32 %b, i32 0
1224 %vb = shufflevector <vscale x 1 x i32> %elt.head, <vscale x 1 x i32> poison, <vscale x 1 x i32> zeroinitializer
1225 %x = call <vscale x 1 x i32> @llvm.vp.mul.nxv1i32(<vscale x 1 x i32> %a, <vscale x 1 x i32> %vb, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
1226 %y = call <vscale x 1 x i32> @llvm.vp.add.nxv1i32(<vscale x 1 x i32> %x, <vscale x 1 x i32> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
1227 %u = call <vscale x 1 x i32> @llvm.vp.merge.nxv1i32(<vscale x 1 x i1> splat (i1 -1), <vscale x 1 x i32> %y, <vscale x 1 x i32> %c, i32 %evl)
1228 ret <vscale x 1 x i32> %u
1231 define <vscale x 1 x i32> @vmacc_vv_nxv1i32_ta(<vscale x 1 x i32> %a, <vscale x 1 x i32> %b, <vscale x 1 x i32> %c, <vscale x 1 x i1> %m, i32 zeroext %evl) {
1232 ; CHECK-LABEL: vmacc_vv_nxv1i32_ta:
1234 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu
1235 ; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t
1236 ; CHECK-NEXT: vmv1r.v v8, v10
1238 %x = call <vscale x 1 x i32> @llvm.vp.mul.nxv1i32(<vscale x 1 x i32> %a, <vscale x 1 x i32> %b, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
1239 %y = call <vscale x 1 x i32> @llvm.vp.add.nxv1i32(<vscale x 1 x i32> %x, <vscale x 1 x i32> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
1240 %u = call <vscale x 1 x i32> @llvm.vp.select.nxv1i32(<vscale x 1 x i1> %m, <vscale x 1 x i32> %y, <vscale x 1 x i32> %c, i32 %evl)
1241 ret <vscale x 1 x i32> %u
1244 define <vscale x 1 x i32> @vmacc_vx_nxv1i32_ta(<vscale x 1 x i32> %a, i32 %b, <vscale x 1 x i32> %c, <vscale x 1 x i1> %m, i32 zeroext %evl) {
1245 ; CHECK-LABEL: vmacc_vx_nxv1i32_ta:
1247 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu
1248 ; CHECK-NEXT: vmacc.vx v9, a0, v8, v0.t
1249 ; CHECK-NEXT: vmv1r.v v8, v9
1251 %elt.head = insertelement <vscale x 1 x i32> poison, i32 %b, i32 0
1252 %vb = shufflevector <vscale x 1 x i32> %elt.head, <vscale x 1 x i32> poison, <vscale x 1 x i32> zeroinitializer
1253 %x = call <vscale x 1 x i32> @llvm.vp.mul.nxv1i32(<vscale x 1 x i32> %a, <vscale x 1 x i32> %vb, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
1254 %y = call <vscale x 1 x i32> @llvm.vp.add.nxv1i32(<vscale x 1 x i32> %x, <vscale x 1 x i32> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
1255 %u = call <vscale x 1 x i32> @llvm.vp.select.nxv1i32(<vscale x 1 x i1> %m, <vscale x 1 x i32> %y, <vscale x 1 x i32> %c, i32 %evl)
1256 ret <vscale x 1 x i32> %u
1259 declare <vscale x 2 x i32> @llvm.vp.mul.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i1>, i32)
1260 declare <vscale x 2 x i32> @llvm.vp.add.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i1>, i32)
1261 declare <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1>, <vscale x 2 x i32>, <vscale x 2 x i32>, i32)
1262 declare <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1>, <vscale x 2 x i32>, <vscale x 2 x i32>, i32)
1264 define <vscale x 2 x i32> @vmacc_vv_nxv2i32(<vscale x 2 x i32> %a, <vscale x 2 x i32> %b, <vscale x 2 x i32> %c, <vscale x 2 x i1> %m, i32 zeroext %evl) {
1265 ; CHECK-LABEL: vmacc_vv_nxv2i32:
1267 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu
1268 ; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t
1269 ; CHECK-NEXT: vmv1r.v v8, v10
1271 %x = call <vscale x 2 x i32> @llvm.vp.mul.nxv2i32(<vscale x 2 x i32> %a, <vscale x 2 x i32> %b, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
1272 %y = call <vscale x 2 x i32> @llvm.vp.add.nxv2i32(<vscale x 2 x i32> %x, <vscale x 2 x i32> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
1273 %u = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %y, <vscale x 2 x i32> %c, i32 %evl)
1274 ret <vscale x 2 x i32> %u
1277 define <vscale x 2 x i32> @vmacc_vv_nxv2i32_unmasked(<vscale x 2 x i32> %a, <vscale x 2 x i32> %b, <vscale x 2 x i32> %c, <vscale x 2 x i1> %m, i32 zeroext %evl) {
1278 ; CHECK-LABEL: vmacc_vv_nxv2i32_unmasked:
1280 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, ma
1281 ; CHECK-NEXT: vmacc.vv v10, v8, v9
1282 ; CHECK-NEXT: vmv1r.v v8, v10
1284 %x = call <vscale x 2 x i32> @llvm.vp.mul.nxv2i32(<vscale x 2 x i32> %a, <vscale x 2 x i32> %b, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
1285 %y = call <vscale x 2 x i32> @llvm.vp.add.nxv2i32(<vscale x 2 x i32> %x, <vscale x 2 x i32> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
1286 %u = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> splat (i1 -1), <vscale x 2 x i32> %y, <vscale x 2 x i32> %c, i32 %evl)
1287 ret <vscale x 2 x i32> %u
1290 define <vscale x 2 x i32> @vmacc_vx_nxv2i32(<vscale x 2 x i32> %a, i32 %b, <vscale x 2 x i32> %c, <vscale x 2 x i1> %m, i32 zeroext %evl) {
1291 ; CHECK-LABEL: vmacc_vx_nxv2i32:
1293 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, mu
1294 ; CHECK-NEXT: vmacc.vx v9, a0, v8, v0.t
1295 ; CHECK-NEXT: vmv1r.v v8, v9
1297 %elt.head = insertelement <vscale x 2 x i32> poison, i32 %b, i32 0
1298 %vb = shufflevector <vscale x 2 x i32> %elt.head, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
1299 %x = call <vscale x 2 x i32> @llvm.vp.mul.nxv2i32(<vscale x 2 x i32> %a, <vscale x 2 x i32> %vb, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
1300 %y = call <vscale x 2 x i32> @llvm.vp.add.nxv2i32(<vscale x 2 x i32> %x, <vscale x 2 x i32> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
1301 %u = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %y, <vscale x 2 x i32> %c, i32 %evl)
1302 ret <vscale x 2 x i32> %u
1305 define <vscale x 2 x i32> @vmacc_vx_nxv2i32_unmasked(<vscale x 2 x i32> %a, i32 %b, <vscale x 2 x i32> %c, <vscale x 2 x i1> %m, i32 zeroext %evl) {
1306 ; CHECK-LABEL: vmacc_vx_nxv2i32_unmasked:
1308 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, ma
1309 ; CHECK-NEXT: vmacc.vx v9, a0, v8
1310 ; CHECK-NEXT: vmv1r.v v8, v9
1312 %elt.head = insertelement <vscale x 2 x i32> poison, i32 %b, i32 0
1313 %vb = shufflevector <vscale x 2 x i32> %elt.head, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
1314 %x = call <vscale x 2 x i32> @llvm.vp.mul.nxv2i32(<vscale x 2 x i32> %a, <vscale x 2 x i32> %vb, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
1315 %y = call <vscale x 2 x i32> @llvm.vp.add.nxv2i32(<vscale x 2 x i32> %x, <vscale x 2 x i32> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
1316 %u = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> splat (i1 -1), <vscale x 2 x i32> %y, <vscale x 2 x i32> %c, i32 %evl)
1317 ret <vscale x 2 x i32> %u
1320 define <vscale x 2 x i32> @vmacc_vv_nxv2i32_ta(<vscale x 2 x i32> %a, <vscale x 2 x i32> %b, <vscale x 2 x i32> %c, <vscale x 2 x i1> %m, i32 zeroext %evl) {
1321 ; CHECK-LABEL: vmacc_vv_nxv2i32_ta:
1323 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
1324 ; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t
1325 ; CHECK-NEXT: vmv.v.v v8, v10
1327 %x = call <vscale x 2 x i32> @llvm.vp.mul.nxv2i32(<vscale x 2 x i32> %a, <vscale x 2 x i32> %b, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
1328 %y = call <vscale x 2 x i32> @llvm.vp.add.nxv2i32(<vscale x 2 x i32> %x, <vscale x 2 x i32> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
1329 %u = call <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %y, <vscale x 2 x i32> %c, i32 %evl)
1330 ret <vscale x 2 x i32> %u
1333 define <vscale x 2 x i32> @vmacc_vx_nxv2i32_ta(<vscale x 2 x i32> %a, i32 %b, <vscale x 2 x i32> %c, <vscale x 2 x i1> %m, i32 zeroext %evl) {
1334 ; CHECK-LABEL: vmacc_vx_nxv2i32_ta:
1336 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu
1337 ; CHECK-NEXT: vmacc.vx v9, a0, v8, v0.t
1338 ; CHECK-NEXT: vmv.v.v v8, v9
1340 %elt.head = insertelement <vscale x 2 x i32> poison, i32 %b, i32 0
1341 %vb = shufflevector <vscale x 2 x i32> %elt.head, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
1342 %x = call <vscale x 2 x i32> @llvm.vp.mul.nxv2i32(<vscale x 2 x i32> %a, <vscale x 2 x i32> %vb, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
1343 %y = call <vscale x 2 x i32> @llvm.vp.add.nxv2i32(<vscale x 2 x i32> %x, <vscale x 2 x i32> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
1344 %u = call <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %y, <vscale x 2 x i32> %c, i32 %evl)
1345 ret <vscale x 2 x i32> %u
1348 declare <vscale x 4 x i32> @llvm.vp.mul.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32)
1349 declare <vscale x 4 x i32> @llvm.vp.add.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32)
1350 declare <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>, i32)
1351 declare <vscale x 4 x i32> @llvm.vp.select.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>, i32)
1353 define <vscale x 4 x i32> @vmacc_vv_nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c, <vscale x 4 x i1> %m, i32 zeroext %evl) {
1354 ; CHECK-LABEL: vmacc_vv_nxv4i32:
1356 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu
1357 ; CHECK-NEXT: vmacc.vv v12, v8, v10, v0.t
1358 ; CHECK-NEXT: vmv2r.v v8, v12
1360 %x = call <vscale x 4 x i32> @llvm.vp.mul.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
1361 %y = call <vscale x 4 x i32> @llvm.vp.add.nxv4i32(<vscale x 4 x i32> %x, <vscale x 4 x i32> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
1362 %u = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> %m, <vscale x 4 x i32> %y, <vscale x 4 x i32> %c, i32 %evl)
1363 ret <vscale x 4 x i32> %u
1366 define <vscale x 4 x i32> @vmacc_vv_nxv4i32_unmasked(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c, <vscale x 4 x i1> %m, i32 zeroext %evl) {
1367 ; CHECK-LABEL: vmacc_vv_nxv4i32_unmasked:
1369 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, ma
1370 ; CHECK-NEXT: vmacc.vv v12, v8, v10
1371 ; CHECK-NEXT: vmv2r.v v8, v12
1373 %x = call <vscale x 4 x i32> @llvm.vp.mul.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
1374 %y = call <vscale x 4 x i32> @llvm.vp.add.nxv4i32(<vscale x 4 x i32> %x, <vscale x 4 x i32> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
1375 %u = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> splat (i1 -1), <vscale x 4 x i32> %y, <vscale x 4 x i32> %c, i32 %evl)
1376 ret <vscale x 4 x i32> %u
1379 define <vscale x 4 x i32> @vmacc_vx_nxv4i32(<vscale x 4 x i32> %a, i32 %b, <vscale x 4 x i32> %c, <vscale x 4 x i1> %m, i32 zeroext %evl) {
1380 ; CHECK-LABEL: vmacc_vx_nxv4i32:
1382 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, mu
1383 ; CHECK-NEXT: vmacc.vx v10, a0, v8, v0.t
1384 ; CHECK-NEXT: vmv2r.v v8, v10
1386 %elt.head = insertelement <vscale x 4 x i32> poison, i32 %b, i32 0
1387 %vb = shufflevector <vscale x 4 x i32> %elt.head, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
1388 %x = call <vscale x 4 x i32> @llvm.vp.mul.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %vb, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
1389 %y = call <vscale x 4 x i32> @llvm.vp.add.nxv4i32(<vscale x 4 x i32> %x, <vscale x 4 x i32> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
1390 %u = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> %m, <vscale x 4 x i32> %y, <vscale x 4 x i32> %c, i32 %evl)
1391 ret <vscale x 4 x i32> %u
1394 define <vscale x 4 x i32> @vmacc_vx_nxv4i32_unmasked(<vscale x 4 x i32> %a, i32 %b, <vscale x 4 x i32> %c, <vscale x 4 x i1> %m, i32 zeroext %evl) {
1395 ; CHECK-LABEL: vmacc_vx_nxv4i32_unmasked:
1397 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, ma
1398 ; CHECK-NEXT: vmacc.vx v10, a0, v8
1399 ; CHECK-NEXT: vmv2r.v v8, v10
1401 %elt.head = insertelement <vscale x 4 x i32> poison, i32 %b, i32 0
1402 %vb = shufflevector <vscale x 4 x i32> %elt.head, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
1403 %x = call <vscale x 4 x i32> @llvm.vp.mul.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %vb, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
1404 %y = call <vscale x 4 x i32> @llvm.vp.add.nxv4i32(<vscale x 4 x i32> %x, <vscale x 4 x i32> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
1405 %u = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> splat (i1 -1), <vscale x 4 x i32> %y, <vscale x 4 x i32> %c, i32 %evl)
1406 ret <vscale x 4 x i32> %u
1409 define <vscale x 4 x i32> @vmacc_vv_nxv4i32_ta(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c, <vscale x 4 x i1> %m, i32 zeroext %evl) {
1410 ; CHECK-LABEL: vmacc_vv_nxv4i32_ta:
1412 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu
1413 ; CHECK-NEXT: vmacc.vv v12, v8, v10, v0.t
1414 ; CHECK-NEXT: vmv.v.v v8, v12
1416 %x = call <vscale x 4 x i32> @llvm.vp.mul.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
1417 %y = call <vscale x 4 x i32> @llvm.vp.add.nxv4i32(<vscale x 4 x i32> %x, <vscale x 4 x i32> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
1418 %u = call <vscale x 4 x i32> @llvm.vp.select.nxv4i32(<vscale x 4 x i1> %m, <vscale x 4 x i32> %y, <vscale x 4 x i32> %c, i32 %evl)
1419 ret <vscale x 4 x i32> %u
1422 define <vscale x 4 x i32> @vmacc_vx_nxv4i32_ta(<vscale x 4 x i32> %a, i32 %b, <vscale x 4 x i32> %c, <vscale x 4 x i1> %m, i32 zeroext %evl) {
1423 ; CHECK-LABEL: vmacc_vx_nxv4i32_ta:
1425 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu
1426 ; CHECK-NEXT: vmacc.vx v10, a0, v8, v0.t
1427 ; CHECK-NEXT: vmv.v.v v8, v10
1429 %elt.head = insertelement <vscale x 4 x i32> poison, i32 %b, i32 0
1430 %vb = shufflevector <vscale x 4 x i32> %elt.head, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
1431 %x = call <vscale x 4 x i32> @llvm.vp.mul.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %vb, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
1432 %y = call <vscale x 4 x i32> @llvm.vp.add.nxv4i32(<vscale x 4 x i32> %x, <vscale x 4 x i32> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
1433 %u = call <vscale x 4 x i32> @llvm.vp.select.nxv4i32(<vscale x 4 x i1> %m, <vscale x 4 x i32> %y, <vscale x 4 x i32> %c, i32 %evl)
1434 ret <vscale x 4 x i32> %u
1437 declare <vscale x 8 x i32> @llvm.vp.mul.nxv8i32(<vscale x 8 x i32>, <vscale x 8 x i32>, <vscale x 8 x i1>, i32)
1438 declare <vscale x 8 x i32> @llvm.vp.add.nxv8i32(<vscale x 8 x i32>, <vscale x 8 x i32>, <vscale x 8 x i1>, i32)
1439 declare <vscale x 8 x i32> @llvm.vp.merge.nxv8i32(<vscale x 8 x i1>, <vscale x 8 x i32>, <vscale x 8 x i32>, i32)
1440 declare <vscale x 8 x i32> @llvm.vp.select.nxv8i32(<vscale x 8 x i1>, <vscale x 8 x i32>, <vscale x 8 x i32>, i32)
1442 define <vscale x 8 x i32> @vmacc_vv_nxv8i32(<vscale x 8 x i32> %a, <vscale x 8 x i32> %b, <vscale x 8 x i32> %c, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1443 ; CHECK-LABEL: vmacc_vv_nxv8i32:
1445 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu
1446 ; CHECK-NEXT: vmacc.vv v16, v8, v12, v0.t
1447 ; CHECK-NEXT: vmv4r.v v8, v16
1449 %x = call <vscale x 8 x i32> @llvm.vp.mul.nxv8i32(<vscale x 8 x i32> %a, <vscale x 8 x i32> %b, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
1450 %y = call <vscale x 8 x i32> @llvm.vp.add.nxv8i32(<vscale x 8 x i32> %x, <vscale x 8 x i32> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
1451 %u = call <vscale x 8 x i32> @llvm.vp.merge.nxv8i32(<vscale x 8 x i1> %m, <vscale x 8 x i32> %y, <vscale x 8 x i32> %c, i32 %evl)
1452 ret <vscale x 8 x i32> %u
1455 define <vscale x 8 x i32> @vmacc_vv_nxv8i32_unmasked(<vscale x 8 x i32> %a, <vscale x 8 x i32> %b, <vscale x 8 x i32> %c, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1456 ; CHECK-LABEL: vmacc_vv_nxv8i32_unmasked:
1458 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, ma
1459 ; CHECK-NEXT: vmacc.vv v16, v8, v12
1460 ; CHECK-NEXT: vmv4r.v v8, v16
1462 %x = call <vscale x 8 x i32> @llvm.vp.mul.nxv8i32(<vscale x 8 x i32> %a, <vscale x 8 x i32> %b, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
1463 %y = call <vscale x 8 x i32> @llvm.vp.add.nxv8i32(<vscale x 8 x i32> %x, <vscale x 8 x i32> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
1464 %u = call <vscale x 8 x i32> @llvm.vp.merge.nxv8i32(<vscale x 8 x i1> splat (i1 -1), <vscale x 8 x i32> %y, <vscale x 8 x i32> %c, i32 %evl)
1465 ret <vscale x 8 x i32> %u
1468 define <vscale x 8 x i32> @vmacc_vx_nxv8i32(<vscale x 8 x i32> %a, i32 %b, <vscale x 8 x i32> %c, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1469 ; CHECK-LABEL: vmacc_vx_nxv8i32:
1471 ; CHECK-NEXT: vsetvli zero, a1, e32, m4, tu, mu
1472 ; CHECK-NEXT: vmacc.vx v12, a0, v8, v0.t
1473 ; CHECK-NEXT: vmv4r.v v8, v12
1475 %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0
1476 %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
1477 %x = call <vscale x 8 x i32> @llvm.vp.mul.nxv8i32(<vscale x 8 x i32> %a, <vscale x 8 x i32> %vb, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
1478 %y = call <vscale x 8 x i32> @llvm.vp.add.nxv8i32(<vscale x 8 x i32> %x, <vscale x 8 x i32> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
1479 %u = call <vscale x 8 x i32> @llvm.vp.merge.nxv8i32(<vscale x 8 x i1> %m, <vscale x 8 x i32> %y, <vscale x 8 x i32> %c, i32 %evl)
1480 ret <vscale x 8 x i32> %u
1483 define <vscale x 8 x i32> @vmacc_vx_nxv8i32_unmasked(<vscale x 8 x i32> %a, i32 %b, <vscale x 8 x i32> %c, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1484 ; CHECK-LABEL: vmacc_vx_nxv8i32_unmasked:
1486 ; CHECK-NEXT: vsetvli zero, a1, e32, m4, tu, ma
1487 ; CHECK-NEXT: vmacc.vx v12, a0, v8
1488 ; CHECK-NEXT: vmv4r.v v8, v12
1490 %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0
1491 %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
1492 %x = call <vscale x 8 x i32> @llvm.vp.mul.nxv8i32(<vscale x 8 x i32> %a, <vscale x 8 x i32> %vb, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
1493 %y = call <vscale x 8 x i32> @llvm.vp.add.nxv8i32(<vscale x 8 x i32> %x, <vscale x 8 x i32> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
1494 %u = call <vscale x 8 x i32> @llvm.vp.merge.nxv8i32(<vscale x 8 x i1> splat (i1 -1), <vscale x 8 x i32> %y, <vscale x 8 x i32> %c, i32 %evl)
1495 ret <vscale x 8 x i32> %u
1498 define <vscale x 8 x i32> @vmacc_vv_nxv8i32_ta(<vscale x 8 x i32> %a, <vscale x 8 x i32> %b, <vscale x 8 x i32> %c, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1499 ; CHECK-LABEL: vmacc_vv_nxv8i32_ta:
1501 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu
1502 ; CHECK-NEXT: vmacc.vv v16, v8, v12, v0.t
1503 ; CHECK-NEXT: vmv.v.v v8, v16
1505 %x = call <vscale x 8 x i32> @llvm.vp.mul.nxv8i32(<vscale x 8 x i32> %a, <vscale x 8 x i32> %b, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
1506 %y = call <vscale x 8 x i32> @llvm.vp.add.nxv8i32(<vscale x 8 x i32> %x, <vscale x 8 x i32> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
1507 %u = call <vscale x 8 x i32> @llvm.vp.select.nxv8i32(<vscale x 8 x i1> %m, <vscale x 8 x i32> %y, <vscale x 8 x i32> %c, i32 %evl)
1508 ret <vscale x 8 x i32> %u
1511 define <vscale x 8 x i32> @vmacc_vx_nxv8i32_ta(<vscale x 8 x i32> %a, i32 %b, <vscale x 8 x i32> %c, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1512 ; CHECK-LABEL: vmacc_vx_nxv8i32_ta:
1514 ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu
1515 ; CHECK-NEXT: vmacc.vx v12, a0, v8, v0.t
1516 ; CHECK-NEXT: vmv.v.v v8, v12
1518 %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0
1519 %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
1520 %x = call <vscale x 8 x i32> @llvm.vp.mul.nxv8i32(<vscale x 8 x i32> %a, <vscale x 8 x i32> %vb, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
1521 %y = call <vscale x 8 x i32> @llvm.vp.add.nxv8i32(<vscale x 8 x i32> %x, <vscale x 8 x i32> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
1522 %u = call <vscale x 8 x i32> @llvm.vp.select.nxv8i32(<vscale x 8 x i1> %m, <vscale x 8 x i32> %y, <vscale x 8 x i32> %c, i32 %evl)
1523 ret <vscale x 8 x i32> %u
1526 declare <vscale x 16 x i32> @llvm.vp.mul.nxv16i32(<vscale x 16 x i32>, <vscale x 16 x i32>, <vscale x 16 x i1>, i32)
1527 declare <vscale x 16 x i32> @llvm.vp.add.nxv16i32(<vscale x 16 x i32>, <vscale x 16 x i32>, <vscale x 16 x i1>, i32)
1528 declare <vscale x 16 x i32> @llvm.vp.merge.nxv16i32(<vscale x 16 x i1>, <vscale x 16 x i32>, <vscale x 16 x i32>, i32)
1529 declare <vscale x 16 x i32> @llvm.vp.select.nxv16i32(<vscale x 16 x i1>, <vscale x 16 x i32>, <vscale x 16 x i32>, i32)
1531 define <vscale x 16 x i32> @vmacc_vv_nxv16i32(<vscale x 16 x i32> %a, <vscale x 16 x i32> %b, <vscale x 16 x i32> %c, <vscale x 16 x i1> %m, i32 zeroext %evl) {
1532 ; CHECK-LABEL: vmacc_vv_nxv16i32:
1534 ; CHECK-NEXT: vl8re32.v v24, (a0)
1535 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, mu
1536 ; CHECK-NEXT: vmacc.vv v24, v8, v16, v0.t
1537 ; CHECK-NEXT: vmv8r.v v8, v24
1539 %x = call <vscale x 16 x i32> @llvm.vp.mul.nxv16i32(<vscale x 16 x i32> %a, <vscale x 16 x i32> %b, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
1540 %y = call <vscale x 16 x i32> @llvm.vp.add.nxv16i32(<vscale x 16 x i32> %x, <vscale x 16 x i32> %c, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
1541 %u = call <vscale x 16 x i32> @llvm.vp.merge.nxv16i32(<vscale x 16 x i1> %m, <vscale x 16 x i32> %y, <vscale x 16 x i32> %c, i32 %evl)
1542 ret <vscale x 16 x i32> %u
1545 define <vscale x 16 x i32> @vmacc_vv_nxv16i32_unmasked(<vscale x 16 x i32> %a, <vscale x 16 x i32> %b, <vscale x 16 x i32> %c, <vscale x 16 x i1> %m, i32 zeroext %evl) {
1546 ; CHECK-LABEL: vmacc_vv_nxv16i32_unmasked:
1548 ; CHECK-NEXT: vl8re32.v v24, (a0)
1549 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, ma
1550 ; CHECK-NEXT: vmacc.vv v24, v8, v16
1551 ; CHECK-NEXT: vmv8r.v v8, v24
1553 %x = call <vscale x 16 x i32> @llvm.vp.mul.nxv16i32(<vscale x 16 x i32> %a, <vscale x 16 x i32> %b, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
1554 %y = call <vscale x 16 x i32> @llvm.vp.add.nxv16i32(<vscale x 16 x i32> %x, <vscale x 16 x i32> %c, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
1555 %u = call <vscale x 16 x i32> @llvm.vp.merge.nxv16i32(<vscale x 16 x i1> splat (i1 -1), <vscale x 16 x i32> %y, <vscale x 16 x i32> %c, i32 %evl)
1556 ret <vscale x 16 x i32> %u
1559 define <vscale x 16 x i32> @vmacc_vx_nxv16i32(<vscale x 16 x i32> %a, i32 %b, <vscale x 16 x i32> %c, <vscale x 16 x i1> %m, i32 zeroext %evl) {
1560 ; CHECK-LABEL: vmacc_vx_nxv16i32:
1562 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, mu
1563 ; CHECK-NEXT: vmacc.vx v16, a0, v8, v0.t
1564 ; CHECK-NEXT: vmv8r.v v8, v16
1566 %elt.head = insertelement <vscale x 16 x i32> poison, i32 %b, i32 0
1567 %vb = shufflevector <vscale x 16 x i32> %elt.head, <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
1568 %x = call <vscale x 16 x i32> @llvm.vp.mul.nxv16i32(<vscale x 16 x i32> %a, <vscale x 16 x i32> %vb, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
1569 %y = call <vscale x 16 x i32> @llvm.vp.add.nxv16i32(<vscale x 16 x i32> %x, <vscale x 16 x i32> %c, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
1570 %u = call <vscale x 16 x i32> @llvm.vp.merge.nxv16i32(<vscale x 16 x i1> %m, <vscale x 16 x i32> %y, <vscale x 16 x i32> %c, i32 %evl)
1571 ret <vscale x 16 x i32> %u
1574 define <vscale x 16 x i32> @vmacc_vx_nxv16i32_unmasked(<vscale x 16 x i32> %a, i32 %b, <vscale x 16 x i32> %c, <vscale x 16 x i1> %m, i32 zeroext %evl) {
1575 ; CHECK-LABEL: vmacc_vx_nxv16i32_unmasked:
1577 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, ma
1578 ; CHECK-NEXT: vmacc.vx v16, a0, v8
1579 ; CHECK-NEXT: vmv8r.v v8, v16
1581 %elt.head = insertelement <vscale x 16 x i32> poison, i32 %b, i32 0
1582 %vb = shufflevector <vscale x 16 x i32> %elt.head, <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
1583 %x = call <vscale x 16 x i32> @llvm.vp.mul.nxv16i32(<vscale x 16 x i32> %a, <vscale x 16 x i32> %vb, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
1584 %y = call <vscale x 16 x i32> @llvm.vp.add.nxv16i32(<vscale x 16 x i32> %x, <vscale x 16 x i32> %c, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
1585 %u = call <vscale x 16 x i32> @llvm.vp.merge.nxv16i32(<vscale x 16 x i1> splat (i1 -1), <vscale x 16 x i32> %y, <vscale x 16 x i32> %c, i32 %evl)
1586 ret <vscale x 16 x i32> %u
1589 define <vscale x 16 x i32> @vmacc_vv_nxv16i32_ta(<vscale x 16 x i32> %a, <vscale x 16 x i32> %b, <vscale x 16 x i32> %c, <vscale x 16 x i1> %m, i32 zeroext %evl) {
1590 ; CHECK-LABEL: vmacc_vv_nxv16i32_ta:
1592 ; CHECK-NEXT: vl8re32.v v24, (a0)
1593 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu
1594 ; CHECK-NEXT: vmacc.vv v24, v8, v16, v0.t
1595 ; CHECK-NEXT: vmv.v.v v8, v24
1597 %x = call <vscale x 16 x i32> @llvm.vp.mul.nxv16i32(<vscale x 16 x i32> %a, <vscale x 16 x i32> %b, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
1598 %y = call <vscale x 16 x i32> @llvm.vp.add.nxv16i32(<vscale x 16 x i32> %x, <vscale x 16 x i32> %c, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
1599 %u = call <vscale x 16 x i32> @llvm.vp.select.nxv16i32(<vscale x 16 x i1> %m, <vscale x 16 x i32> %y, <vscale x 16 x i32> %c, i32 %evl)
1600 ret <vscale x 16 x i32> %u
1603 define <vscale x 16 x i32> @vmacc_vx_nxv16i32_ta(<vscale x 16 x i32> %a, i32 %b, <vscale x 16 x i32> %c, <vscale x 16 x i1> %m, i32 zeroext %evl) {
1604 ; CHECK-LABEL: vmacc_vx_nxv16i32_ta:
1606 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu
1607 ; CHECK-NEXT: vmacc.vx v16, a0, v8, v0.t
1608 ; CHECK-NEXT: vmv.v.v v8, v16
1610 %elt.head = insertelement <vscale x 16 x i32> poison, i32 %b, i32 0
1611 %vb = shufflevector <vscale x 16 x i32> %elt.head, <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
1612 %x = call <vscale x 16 x i32> @llvm.vp.mul.nxv16i32(<vscale x 16 x i32> %a, <vscale x 16 x i32> %vb, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
1613 %y = call <vscale x 16 x i32> @llvm.vp.add.nxv16i32(<vscale x 16 x i32> %x, <vscale x 16 x i32> %c, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
1614 %u = call <vscale x 16 x i32> @llvm.vp.select.nxv16i32(<vscale x 16 x i1> %m, <vscale x 16 x i32> %y, <vscale x 16 x i32> %c, i32 %evl)
1615 ret <vscale x 16 x i32> %u
1618 declare <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64>, <vscale x 1 x i64>, <vscale x 1 x i1>, i32)
1619 declare <vscale x 1 x i64> @llvm.vp.add.nxv1i64(<vscale x 1 x i64>, <vscale x 1 x i64>, <vscale x 1 x i1>, i32)
1620 declare <vscale x 1 x i64> @llvm.vp.merge.nxv1i64(<vscale x 1 x i1>, <vscale x 1 x i64>, <vscale x 1 x i64>, i32)
1621 declare <vscale x 1 x i64> @llvm.vp.select.nxv1i64(<vscale x 1 x i1>, <vscale x 1 x i64>, <vscale x 1 x i64>, i32)
1623 define <vscale x 1 x i64> @vmacc_vv_nxv1i64(<vscale x 1 x i64> %a, <vscale x 1 x i64> %b, <vscale x 1 x i64> %c, <vscale x 1 x i1> %m, i32 zeroext %evl) {
1624 ; CHECK-LABEL: vmacc_vv_nxv1i64:
1626 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu
1627 ; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t
1628 ; CHECK-NEXT: vmv1r.v v8, v10
1630 %x = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> %a, <vscale x 1 x i64> %b, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
1631 %y = call <vscale x 1 x i64> @llvm.vp.add.nxv1i64(<vscale x 1 x i64> %x, <vscale x 1 x i64> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
1632 %u = call <vscale x 1 x i64> @llvm.vp.merge.nxv1i64(<vscale x 1 x i1> %m, <vscale x 1 x i64> %y, <vscale x 1 x i64> %c, i32 %evl)
1633 ret <vscale x 1 x i64> %u
1636 define <vscale x 1 x i64> @vmacc_vv_nxv1i64_unmasked(<vscale x 1 x i64> %a, <vscale x 1 x i64> %b, <vscale x 1 x i64> %c, <vscale x 1 x i1> %m, i32 zeroext %evl) {
1637 ; CHECK-LABEL: vmacc_vv_nxv1i64_unmasked:
1639 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, ma
1640 ; CHECK-NEXT: vmacc.vv v10, v8, v9
1641 ; CHECK-NEXT: vmv1r.v v8, v10
1643 %x = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> %a, <vscale x 1 x i64> %b, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
1644 %y = call <vscale x 1 x i64> @llvm.vp.add.nxv1i64(<vscale x 1 x i64> %x, <vscale x 1 x i64> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
1645 %u = call <vscale x 1 x i64> @llvm.vp.merge.nxv1i64(<vscale x 1 x i1> splat (i1 -1), <vscale x 1 x i64> %y, <vscale x 1 x i64> %c, i32 %evl)
1646 ret <vscale x 1 x i64> %u
1649 define <vscale x 1 x i64> @vmacc_vx_nxv1i64(<vscale x 1 x i64> %a, i64 %b, <vscale x 1 x i64> %c, <vscale x 1 x i1> %m, i32 zeroext %evl) {
1650 ; RV32-LABEL: vmacc_vx_nxv1i64:
1652 ; RV32-NEXT: addi sp, sp, -16
1653 ; RV32-NEXT: .cfi_def_cfa_offset 16
1654 ; RV32-NEXT: sw a1, 12(sp)
1655 ; RV32-NEXT: sw a0, 8(sp)
1656 ; RV32-NEXT: addi a0, sp, 8
1657 ; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
1658 ; RV32-NEXT: vlse64.v v10, (a0), zero
1659 ; RV32-NEXT: vsetvli zero, a2, e64, m1, tu, mu
1660 ; RV32-NEXT: vmacc.vv v9, v8, v10, v0.t
1661 ; RV32-NEXT: vmv1r.v v8, v9
1662 ; RV32-NEXT: addi sp, sp, 16
1665 ; RV64-LABEL: vmacc_vx_nxv1i64:
1667 ; RV64-NEXT: vsetvli zero, a1, e64, m1, tu, mu
1668 ; RV64-NEXT: vmacc.vx v9, a0, v8, v0.t
1669 ; RV64-NEXT: vmv1r.v v8, v9
1671 %elt.head = insertelement <vscale x 1 x i64> poison, i64 %b, i32 0
1672 %vb = shufflevector <vscale x 1 x i64> %elt.head, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
1673 %x = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> %a, <vscale x 1 x i64> %vb, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
1674 %y = call <vscale x 1 x i64> @llvm.vp.add.nxv1i64(<vscale x 1 x i64> %x, <vscale x 1 x i64> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
1675 %u = call <vscale x 1 x i64> @llvm.vp.merge.nxv1i64(<vscale x 1 x i1> %m, <vscale x 1 x i64> %y, <vscale x 1 x i64> %c, i32 %evl)
1676 ret <vscale x 1 x i64> %u
1679 define <vscale x 1 x i64> @vmacc_vx_nxv1i64_unmasked(<vscale x 1 x i64> %a, i64 %b, <vscale x 1 x i64> %c, <vscale x 1 x i1> %m, i32 zeroext %evl) {
1680 ; RV32-LABEL: vmacc_vx_nxv1i64_unmasked:
1682 ; RV32-NEXT: addi sp, sp, -16
1683 ; RV32-NEXT: .cfi_def_cfa_offset 16
1684 ; RV32-NEXT: sw a1, 12(sp)
1685 ; RV32-NEXT: sw a0, 8(sp)
1686 ; RV32-NEXT: addi a0, sp, 8
1687 ; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
1688 ; RV32-NEXT: vlse64.v v10, (a0), zero
1689 ; RV32-NEXT: vsetvli zero, a2, e64, m1, tu, ma
1690 ; RV32-NEXT: vmacc.vv v9, v8, v10
1691 ; RV32-NEXT: vmv1r.v v8, v9
1692 ; RV32-NEXT: addi sp, sp, 16
1695 ; RV64-LABEL: vmacc_vx_nxv1i64_unmasked:
1697 ; RV64-NEXT: vsetvli zero, a1, e64, m1, tu, ma
1698 ; RV64-NEXT: vmacc.vx v9, a0, v8
1699 ; RV64-NEXT: vmv1r.v v8, v9
1701 %elt.head = insertelement <vscale x 1 x i64> poison, i64 %b, i32 0
1702 %vb = shufflevector <vscale x 1 x i64> %elt.head, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
1703 %x = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> %a, <vscale x 1 x i64> %vb, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
1704 %y = call <vscale x 1 x i64> @llvm.vp.add.nxv1i64(<vscale x 1 x i64> %x, <vscale x 1 x i64> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
1705 %u = call <vscale x 1 x i64> @llvm.vp.merge.nxv1i64(<vscale x 1 x i1> splat (i1 -1), <vscale x 1 x i64> %y, <vscale x 1 x i64> %c, i32 %evl)
1706 ret <vscale x 1 x i64> %u
1709 define <vscale x 1 x i64> @vmacc_vv_nxv1i64_ta(<vscale x 1 x i64> %a, <vscale x 1 x i64> %b, <vscale x 1 x i64> %c, <vscale x 1 x i1> %m, i32 zeroext %evl) {
1710 ; CHECK-LABEL: vmacc_vv_nxv1i64_ta:
1712 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu
1713 ; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t
1714 ; CHECK-NEXT: vmv.v.v v8, v10
1716 %x = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> %a, <vscale x 1 x i64> %b, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
1717 %y = call <vscale x 1 x i64> @llvm.vp.add.nxv1i64(<vscale x 1 x i64> %x, <vscale x 1 x i64> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
1718 %u = call <vscale x 1 x i64> @llvm.vp.select.nxv1i64(<vscale x 1 x i1> %m, <vscale x 1 x i64> %y, <vscale x 1 x i64> %c, i32 %evl)
1719 ret <vscale x 1 x i64> %u
1722 define <vscale x 1 x i64> @vmacc_vx_nxv1i64_ta(<vscale x 1 x i64> %a, i64 %b, <vscale x 1 x i64> %c, <vscale x 1 x i1> %m, i32 zeroext %evl) {
1723 ; RV32-LABEL: vmacc_vx_nxv1i64_ta:
1725 ; RV32-NEXT: addi sp, sp, -16
1726 ; RV32-NEXT: .cfi_def_cfa_offset 16
1727 ; RV32-NEXT: sw a1, 12(sp)
1728 ; RV32-NEXT: sw a0, 8(sp)
1729 ; RV32-NEXT: addi a0, sp, 8
1730 ; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
1731 ; RV32-NEXT: vlse64.v v10, (a0), zero
1732 ; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu
1733 ; RV32-NEXT: vmacc.vv v9, v8, v10, v0.t
1734 ; RV32-NEXT: vmv.v.v v8, v9
1735 ; RV32-NEXT: addi sp, sp, 16
1738 ; RV64-LABEL: vmacc_vx_nxv1i64_ta:
1740 ; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, mu
1741 ; RV64-NEXT: vmacc.vx v9, a0, v8, v0.t
1742 ; RV64-NEXT: vmv.v.v v8, v9
1744 %elt.head = insertelement <vscale x 1 x i64> poison, i64 %b, i32 0
1745 %vb = shufflevector <vscale x 1 x i64> %elt.head, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
1746 %x = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> %a, <vscale x 1 x i64> %vb, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
1747 %y = call <vscale x 1 x i64> @llvm.vp.add.nxv1i64(<vscale x 1 x i64> %x, <vscale x 1 x i64> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
1748 %u = call <vscale x 1 x i64> @llvm.vp.select.nxv1i64(<vscale x 1 x i1> %m, <vscale x 1 x i64> %y, <vscale x 1 x i64> %c, i32 %evl)
1749 ret <vscale x 1 x i64> %u
1752 declare <vscale x 2 x i64> @llvm.vp.mul.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i1>, i32)
1753 declare <vscale x 2 x i64> @llvm.vp.add.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i1>, i32)
1754 declare <vscale x 2 x i64> @llvm.vp.merge.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>, i32)
1755 declare <vscale x 2 x i64> @llvm.vp.select.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>, i32)
1757 define <vscale x 2 x i64> @vmacc_vv_nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c, <vscale x 2 x i1> %m, i32 zeroext %evl) {
1758 ; CHECK-LABEL: vmacc_vv_nxv2i64:
1760 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu
1761 ; CHECK-NEXT: vmacc.vv v12, v8, v10, v0.t
1762 ; CHECK-NEXT: vmv2r.v v8, v12
1764 %x = call <vscale x 2 x i64> @llvm.vp.mul.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
1765 %y = call <vscale x 2 x i64> @llvm.vp.add.nxv2i64(<vscale x 2 x i64> %x, <vscale x 2 x i64> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
1766 %u = call <vscale x 2 x i64> @llvm.vp.merge.nxv2i64(<vscale x 2 x i1> %m, <vscale x 2 x i64> %y, <vscale x 2 x i64> %c, i32 %evl)
1767 ret <vscale x 2 x i64> %u
1770 define <vscale x 2 x i64> @vmacc_vv_nxv2i64_unmasked(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c, <vscale x 2 x i1> %m, i32 zeroext %evl) {
1771 ; CHECK-LABEL: vmacc_vv_nxv2i64_unmasked:
1773 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, ma
1774 ; CHECK-NEXT: vmacc.vv v12, v8, v10
1775 ; CHECK-NEXT: vmv2r.v v8, v12
1777 %x = call <vscale x 2 x i64> @llvm.vp.mul.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
1778 %y = call <vscale x 2 x i64> @llvm.vp.add.nxv2i64(<vscale x 2 x i64> %x, <vscale x 2 x i64> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
1779 %u = call <vscale x 2 x i64> @llvm.vp.merge.nxv2i64(<vscale x 2 x i1> splat (i1 -1), <vscale x 2 x i64> %y, <vscale x 2 x i64> %c, i32 %evl)
1780 ret <vscale x 2 x i64> %u
1783 define <vscale x 2 x i64> @vmacc_vx_nxv2i64(<vscale x 2 x i64> %a, i64 %b, <vscale x 2 x i64> %c, <vscale x 2 x i1> %m, i32 zeroext %evl) {
1784 ; RV32-LABEL: vmacc_vx_nxv2i64:
1786 ; RV32-NEXT: addi sp, sp, -16
1787 ; RV32-NEXT: .cfi_def_cfa_offset 16
1788 ; RV32-NEXT: sw a1, 12(sp)
1789 ; RV32-NEXT: sw a0, 8(sp)
1790 ; RV32-NEXT: addi a0, sp, 8
1791 ; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
1792 ; RV32-NEXT: vlse64.v v12, (a0), zero
1793 ; RV32-NEXT: vsetvli zero, a2, e64, m2, tu, mu
1794 ; RV32-NEXT: vmacc.vv v10, v8, v12, v0.t
1795 ; RV32-NEXT: vmv2r.v v8, v10
1796 ; RV32-NEXT: addi sp, sp, 16
1799 ; RV64-LABEL: vmacc_vx_nxv2i64:
1801 ; RV64-NEXT: vsetvli zero, a1, e64, m2, tu, mu
1802 ; RV64-NEXT: vmacc.vx v10, a0, v8, v0.t
1803 ; RV64-NEXT: vmv2r.v v8, v10
1805 %elt.head = insertelement <vscale x 2 x i64> poison, i64 %b, i32 0
1806 %vb = shufflevector <vscale x 2 x i64> %elt.head, <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
1807 %x = call <vscale x 2 x i64> @llvm.vp.mul.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %vb, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
1808 %y = call <vscale x 2 x i64> @llvm.vp.add.nxv2i64(<vscale x 2 x i64> %x, <vscale x 2 x i64> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
1809 %u = call <vscale x 2 x i64> @llvm.vp.merge.nxv2i64(<vscale x 2 x i1> %m, <vscale x 2 x i64> %y, <vscale x 2 x i64> %c, i32 %evl)
1810 ret <vscale x 2 x i64> %u
1813 define <vscale x 2 x i64> @vmacc_vx_nxv2i64_unmasked(<vscale x 2 x i64> %a, i64 %b, <vscale x 2 x i64> %c, <vscale x 2 x i1> %m, i32 zeroext %evl) {
1814 ; RV32-LABEL: vmacc_vx_nxv2i64_unmasked:
1816 ; RV32-NEXT: addi sp, sp, -16
1817 ; RV32-NEXT: .cfi_def_cfa_offset 16
1818 ; RV32-NEXT: sw a1, 12(sp)
1819 ; RV32-NEXT: sw a0, 8(sp)
1820 ; RV32-NEXT: addi a0, sp, 8
1821 ; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
1822 ; RV32-NEXT: vlse64.v v12, (a0), zero
1823 ; RV32-NEXT: vsetvli zero, a2, e64, m2, tu, ma
1824 ; RV32-NEXT: vmacc.vv v10, v8, v12
1825 ; RV32-NEXT: vmv2r.v v8, v10
1826 ; RV32-NEXT: addi sp, sp, 16
1829 ; RV64-LABEL: vmacc_vx_nxv2i64_unmasked:
1831 ; RV64-NEXT: vsetvli zero, a1, e64, m2, tu, ma
1832 ; RV64-NEXT: vmacc.vx v10, a0, v8
1833 ; RV64-NEXT: vmv2r.v v8, v10
1835 %elt.head = insertelement <vscale x 2 x i64> poison, i64 %b, i32 0
1836 %vb = shufflevector <vscale x 2 x i64> %elt.head, <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
1837 %x = call <vscale x 2 x i64> @llvm.vp.mul.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %vb, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
1838 %y = call <vscale x 2 x i64> @llvm.vp.add.nxv2i64(<vscale x 2 x i64> %x, <vscale x 2 x i64> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
1839 %u = call <vscale x 2 x i64> @llvm.vp.merge.nxv2i64(<vscale x 2 x i1> splat (i1 -1), <vscale x 2 x i64> %y, <vscale x 2 x i64> %c, i32 %evl)
1840 ret <vscale x 2 x i64> %u
1843 define <vscale x 2 x i64> @vmacc_vv_nxv2i64_ta(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c, <vscale x 2 x i1> %m, i32 zeroext %evl) {
1844 ; CHECK-LABEL: vmacc_vv_nxv2i64_ta:
1846 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu
1847 ; CHECK-NEXT: vmacc.vv v12, v8, v10, v0.t
1848 ; CHECK-NEXT: vmv.v.v v8, v12
1850 %x = call <vscale x 2 x i64> @llvm.vp.mul.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
1851 %y = call <vscale x 2 x i64> @llvm.vp.add.nxv2i64(<vscale x 2 x i64> %x, <vscale x 2 x i64> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
1852 %u = call <vscale x 2 x i64> @llvm.vp.select.nxv2i64(<vscale x 2 x i1> %m, <vscale x 2 x i64> %y, <vscale x 2 x i64> %c, i32 %evl)
1853 ret <vscale x 2 x i64> %u
1856 define <vscale x 2 x i64> @vmacc_vx_nxv2i64_ta(<vscale x 2 x i64> %a, i64 %b, <vscale x 2 x i64> %c, <vscale x 2 x i1> %m, i32 zeroext %evl) {
1857 ; RV32-LABEL: vmacc_vx_nxv2i64_ta:
1859 ; RV32-NEXT: addi sp, sp, -16
1860 ; RV32-NEXT: .cfi_def_cfa_offset 16
1861 ; RV32-NEXT: sw a1, 12(sp)
1862 ; RV32-NEXT: sw a0, 8(sp)
1863 ; RV32-NEXT: addi a0, sp, 8
1864 ; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
1865 ; RV32-NEXT: vlse64.v v12, (a0), zero
1866 ; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu
1867 ; RV32-NEXT: vmacc.vv v10, v8, v12, v0.t
1868 ; RV32-NEXT: vmv.v.v v8, v10
1869 ; RV32-NEXT: addi sp, sp, 16
1872 ; RV64-LABEL: vmacc_vx_nxv2i64_ta:
1874 ; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, mu
1875 ; RV64-NEXT: vmacc.vx v10, a0, v8, v0.t
1876 ; RV64-NEXT: vmv.v.v v8, v10
1878 %elt.head = insertelement <vscale x 2 x i64> poison, i64 %b, i32 0
1879 %vb = shufflevector <vscale x 2 x i64> %elt.head, <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
1880 %x = call <vscale x 2 x i64> @llvm.vp.mul.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %vb, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
1881 %y = call <vscale x 2 x i64> @llvm.vp.add.nxv2i64(<vscale x 2 x i64> %x, <vscale x 2 x i64> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
1882 %u = call <vscale x 2 x i64> @llvm.vp.select.nxv2i64(<vscale x 2 x i1> %m, <vscale x 2 x i64> %y, <vscale x 2 x i64> %c, i32 %evl)
1883 ret <vscale x 2 x i64> %u
1886 declare <vscale x 4 x i64> @llvm.vp.mul.nxv4i64(<vscale x 4 x i64>, <vscale x 4 x i64>, <vscale x 4 x i1>, i32)
1887 declare <vscale x 4 x i64> @llvm.vp.add.nxv4i64(<vscale x 4 x i64>, <vscale x 4 x i64>, <vscale x 4 x i1>, i32)
1888 declare <vscale x 4 x i64> @llvm.vp.merge.nxv4i64(<vscale x 4 x i1>, <vscale x 4 x i64>, <vscale x 4 x i64>, i32)
1889 declare <vscale x 4 x i64> @llvm.vp.select.nxv4i64(<vscale x 4 x i1>, <vscale x 4 x i64>, <vscale x 4 x i64>, i32)
1891 define <vscale x 4 x i64> @vmacc_vv_nxv4i64(<vscale x 4 x i64> %a, <vscale x 4 x i64> %b, <vscale x 4 x i64> %c, <vscale x 4 x i1> %m, i32 zeroext %evl) {
1892 ; CHECK-LABEL: vmacc_vv_nxv4i64:
1894 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu
1895 ; CHECK-NEXT: vmacc.vv v16, v8, v12, v0.t
1896 ; CHECK-NEXT: vmv4r.v v8, v16
1898 %x = call <vscale x 4 x i64> @llvm.vp.mul.nxv4i64(<vscale x 4 x i64> %a, <vscale x 4 x i64> %b, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
1899 %y = call <vscale x 4 x i64> @llvm.vp.add.nxv4i64(<vscale x 4 x i64> %x, <vscale x 4 x i64> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
1900 %u = call <vscale x 4 x i64> @llvm.vp.merge.nxv4i64(<vscale x 4 x i1> %m, <vscale x 4 x i64> %y, <vscale x 4 x i64> %c, i32 %evl)
1901 ret <vscale x 4 x i64> %u
1904 define <vscale x 4 x i64> @vmacc_vv_nxv4i64_unmasked(<vscale x 4 x i64> %a, <vscale x 4 x i64> %b, <vscale x 4 x i64> %c, <vscale x 4 x i1> %m, i32 zeroext %evl) {
1905 ; CHECK-LABEL: vmacc_vv_nxv4i64_unmasked:
1907 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, ma
1908 ; CHECK-NEXT: vmacc.vv v16, v8, v12
1909 ; CHECK-NEXT: vmv4r.v v8, v16
1911 %x = call <vscale x 4 x i64> @llvm.vp.mul.nxv4i64(<vscale x 4 x i64> %a, <vscale x 4 x i64> %b, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
1912 %y = call <vscale x 4 x i64> @llvm.vp.add.nxv4i64(<vscale x 4 x i64> %x, <vscale x 4 x i64> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
1913 %u = call <vscale x 4 x i64> @llvm.vp.merge.nxv4i64(<vscale x 4 x i1> splat (i1 -1), <vscale x 4 x i64> %y, <vscale x 4 x i64> %c, i32 %evl)
1914 ret <vscale x 4 x i64> %u
1917 define <vscale x 4 x i64> @vmacc_vx_nxv4i64(<vscale x 4 x i64> %a, i64 %b, <vscale x 4 x i64> %c, <vscale x 4 x i1> %m, i32 zeroext %evl) {
1918 ; RV32-LABEL: vmacc_vx_nxv4i64:
1920 ; RV32-NEXT: addi sp, sp, -16
1921 ; RV32-NEXT: .cfi_def_cfa_offset 16
1922 ; RV32-NEXT: sw a1, 12(sp)
1923 ; RV32-NEXT: sw a0, 8(sp)
1924 ; RV32-NEXT: addi a0, sp, 8
1925 ; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma
1926 ; RV32-NEXT: vlse64.v v16, (a0), zero
1927 ; RV32-NEXT: vsetvli zero, a2, e64, m4, tu, mu
1928 ; RV32-NEXT: vmacc.vv v12, v8, v16, v0.t
1929 ; RV32-NEXT: vmv4r.v v8, v12
1930 ; RV32-NEXT: addi sp, sp, 16
1933 ; RV64-LABEL: vmacc_vx_nxv4i64:
1935 ; RV64-NEXT: vsetvli zero, a1, e64, m4, tu, mu
1936 ; RV64-NEXT: vmacc.vx v12, a0, v8, v0.t
1937 ; RV64-NEXT: vmv4r.v v8, v12
1939 %elt.head = insertelement <vscale x 4 x i64> poison, i64 %b, i32 0
1940 %vb = shufflevector <vscale x 4 x i64> %elt.head, <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
1941 %x = call <vscale x 4 x i64> @llvm.vp.mul.nxv4i64(<vscale x 4 x i64> %a, <vscale x 4 x i64> %vb, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
1942 %y = call <vscale x 4 x i64> @llvm.vp.add.nxv4i64(<vscale x 4 x i64> %x, <vscale x 4 x i64> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
1943 %u = call <vscale x 4 x i64> @llvm.vp.merge.nxv4i64(<vscale x 4 x i1> %m, <vscale x 4 x i64> %y, <vscale x 4 x i64> %c, i32 %evl)
1944 ret <vscale x 4 x i64> %u
1947 define <vscale x 4 x i64> @vmacc_vx_nxv4i64_unmasked(<vscale x 4 x i64> %a, i64 %b, <vscale x 4 x i64> %c, <vscale x 4 x i1> %m, i32 zeroext %evl) {
1948 ; RV32-LABEL: vmacc_vx_nxv4i64_unmasked:
1950 ; RV32-NEXT: addi sp, sp, -16
1951 ; RV32-NEXT: .cfi_def_cfa_offset 16
1952 ; RV32-NEXT: sw a1, 12(sp)
1953 ; RV32-NEXT: sw a0, 8(sp)
1954 ; RV32-NEXT: addi a0, sp, 8
1955 ; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma
1956 ; RV32-NEXT: vlse64.v v16, (a0), zero
1957 ; RV32-NEXT: vsetvli zero, a2, e64, m4, tu, ma
1958 ; RV32-NEXT: vmacc.vv v12, v8, v16
1959 ; RV32-NEXT: vmv4r.v v8, v12
1960 ; RV32-NEXT: addi sp, sp, 16
1963 ; RV64-LABEL: vmacc_vx_nxv4i64_unmasked:
1965 ; RV64-NEXT: vsetvli zero, a1, e64, m4, tu, ma
1966 ; RV64-NEXT: vmacc.vx v12, a0, v8
1967 ; RV64-NEXT: vmv4r.v v8, v12
1969 %elt.head = insertelement <vscale x 4 x i64> poison, i64 %b, i32 0
1970 %vb = shufflevector <vscale x 4 x i64> %elt.head, <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
1971 %x = call <vscale x 4 x i64> @llvm.vp.mul.nxv4i64(<vscale x 4 x i64> %a, <vscale x 4 x i64> %vb, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
1972 %y = call <vscale x 4 x i64> @llvm.vp.add.nxv4i64(<vscale x 4 x i64> %x, <vscale x 4 x i64> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
1973 %u = call <vscale x 4 x i64> @llvm.vp.merge.nxv4i64(<vscale x 4 x i1> splat (i1 -1), <vscale x 4 x i64> %y, <vscale x 4 x i64> %c, i32 %evl)
1974 ret <vscale x 4 x i64> %u
1977 define <vscale x 4 x i64> @vmacc_vv_nxv4i64_ta(<vscale x 4 x i64> %a, <vscale x 4 x i64> %b, <vscale x 4 x i64> %c, <vscale x 4 x i1> %m, i32 zeroext %evl) {
1978 ; CHECK-LABEL: vmacc_vv_nxv4i64_ta:
1980 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu
1981 ; CHECK-NEXT: vmacc.vv v16, v8, v12, v0.t
1982 ; CHECK-NEXT: vmv.v.v v8, v16
1984 %x = call <vscale x 4 x i64> @llvm.vp.mul.nxv4i64(<vscale x 4 x i64> %a, <vscale x 4 x i64> %b, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
1985 %y = call <vscale x 4 x i64> @llvm.vp.add.nxv4i64(<vscale x 4 x i64> %x, <vscale x 4 x i64> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
1986 %u = call <vscale x 4 x i64> @llvm.vp.select.nxv4i64(<vscale x 4 x i1> %m, <vscale x 4 x i64> %y, <vscale x 4 x i64> %c, i32 %evl)
1987 ret <vscale x 4 x i64> %u
1990 define <vscale x 4 x i64> @vmacc_vx_nxv4i64_ta(<vscale x 4 x i64> %a, i64 %b, <vscale x 4 x i64> %c, <vscale x 4 x i1> %m, i32 zeroext %evl) {
1991 ; RV32-LABEL: vmacc_vx_nxv4i64_ta:
1993 ; RV32-NEXT: addi sp, sp, -16
1994 ; RV32-NEXT: .cfi_def_cfa_offset 16
1995 ; RV32-NEXT: sw a1, 12(sp)
1996 ; RV32-NEXT: sw a0, 8(sp)
1997 ; RV32-NEXT: addi a0, sp, 8
1998 ; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma
1999 ; RV32-NEXT: vlse64.v v16, (a0), zero
2000 ; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu
2001 ; RV32-NEXT: vmacc.vv v12, v8, v16, v0.t
2002 ; RV32-NEXT: vmv.v.v v8, v12
2003 ; RV32-NEXT: addi sp, sp, 16
2006 ; RV64-LABEL: vmacc_vx_nxv4i64_ta:
2008 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu
2009 ; RV64-NEXT: vmacc.vx v12, a0, v8, v0.t
2010 ; RV64-NEXT: vmv.v.v v8, v12
2012 %elt.head = insertelement <vscale x 4 x i64> poison, i64 %b, i32 0
2013 %vb = shufflevector <vscale x 4 x i64> %elt.head, <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
2014 %x = call <vscale x 4 x i64> @llvm.vp.mul.nxv4i64(<vscale x 4 x i64> %a, <vscale x 4 x i64> %vb, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
2015 %y = call <vscale x 4 x i64> @llvm.vp.add.nxv4i64(<vscale x 4 x i64> %x, <vscale x 4 x i64> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
2016 %u = call <vscale x 4 x i64> @llvm.vp.select.nxv4i64(<vscale x 4 x i1> %m, <vscale x 4 x i64> %y, <vscale x 4 x i64> %c, i32 %evl)
2017 ret <vscale x 4 x i64> %u
2020 declare <vscale x 8 x i64> @llvm.vp.mul.nxv8i64(<vscale x 8 x i64>, <vscale x 8 x i64>, <vscale x 8 x i1>, i32)
2021 declare <vscale x 8 x i64> @llvm.vp.add.nxv8i64(<vscale x 8 x i64>, <vscale x 8 x i64>, <vscale x 8 x i1>, i32)
2022 declare <vscale x 8 x i64> @llvm.vp.merge.nxv8i64(<vscale x 8 x i1>, <vscale x 8 x i64>, <vscale x 8 x i64>, i32)
2023 declare <vscale x 8 x i64> @llvm.vp.select.nxv8i64(<vscale x 8 x i1>, <vscale x 8 x i64>, <vscale x 8 x i64>, i32)
2025 define <vscale x 8 x i64> @vmacc_vv_nxv8i64(<vscale x 8 x i64> %a, <vscale x 8 x i64> %b, <vscale x 8 x i64> %c, <vscale x 8 x i1> %m, i32 zeroext %evl) {
2026 ; CHECK-LABEL: vmacc_vv_nxv8i64:
2028 ; CHECK-NEXT: vl8re64.v v24, (a0)
2029 ; CHECK-NEXT: vsetvli zero, a1, e64, m8, tu, mu
2030 ; CHECK-NEXT: vmacc.vv v24, v8, v16, v0.t
2031 ; CHECK-NEXT: vmv8r.v v8, v24
2033 %x = call <vscale x 8 x i64> @llvm.vp.mul.nxv8i64(<vscale x 8 x i64> %a, <vscale x 8 x i64> %b, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
2034 %y = call <vscale x 8 x i64> @llvm.vp.add.nxv8i64(<vscale x 8 x i64> %x, <vscale x 8 x i64> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
2035 %u = call <vscale x 8 x i64> @llvm.vp.merge.nxv8i64(<vscale x 8 x i1> %m, <vscale x 8 x i64> %y, <vscale x 8 x i64> %c, i32 %evl)
2036 ret <vscale x 8 x i64> %u
2039 define <vscale x 8 x i64> @vmacc_vv_nxv8i64_unmasked(<vscale x 8 x i64> %a, <vscale x 8 x i64> %b, <vscale x 8 x i64> %c, <vscale x 8 x i1> %m, i32 zeroext %evl) {
2040 ; CHECK-LABEL: vmacc_vv_nxv8i64_unmasked:
2042 ; CHECK-NEXT: vl8re64.v v24, (a0)
2043 ; CHECK-NEXT: vsetvli zero, a1, e64, m8, tu, ma
2044 ; CHECK-NEXT: vmacc.vv v24, v8, v16
2045 ; CHECK-NEXT: vmv8r.v v8, v24
2047 %x = call <vscale x 8 x i64> @llvm.vp.mul.nxv8i64(<vscale x 8 x i64> %a, <vscale x 8 x i64> %b, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
2048 %y = call <vscale x 8 x i64> @llvm.vp.add.nxv8i64(<vscale x 8 x i64> %x, <vscale x 8 x i64> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
2049 %u = call <vscale x 8 x i64> @llvm.vp.merge.nxv8i64(<vscale x 8 x i1> splat (i1 -1), <vscale x 8 x i64> %y, <vscale x 8 x i64> %c, i32 %evl)
2050 ret <vscale x 8 x i64> %u
2053 define <vscale x 8 x i64> @vmacc_vx_nxv8i64(<vscale x 8 x i64> %a, i64 %b, <vscale x 8 x i64> %c, <vscale x 8 x i1> %m, i32 zeroext %evl) {
2054 ; RV32-LABEL: vmacc_vx_nxv8i64:
2056 ; RV32-NEXT: addi sp, sp, -16
2057 ; RV32-NEXT: .cfi_def_cfa_offset 16
2058 ; RV32-NEXT: sw a1, 12(sp)
2059 ; RV32-NEXT: sw a0, 8(sp)
2060 ; RV32-NEXT: addi a0, sp, 8
2061 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
2062 ; RV32-NEXT: vlse64.v v24, (a0), zero
2063 ; RV32-NEXT: vsetvli zero, a2, e64, m8, tu, mu
2064 ; RV32-NEXT: vmacc.vv v16, v8, v24, v0.t
2065 ; RV32-NEXT: vmv8r.v v8, v16
2066 ; RV32-NEXT: addi sp, sp, 16
2069 ; RV64-LABEL: vmacc_vx_nxv8i64:
2071 ; RV64-NEXT: vsetvli zero, a1, e64, m8, tu, mu
2072 ; RV64-NEXT: vmacc.vx v16, a0, v8, v0.t
2073 ; RV64-NEXT: vmv8r.v v8, v16
2075 %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0
2076 %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
2077 %x = call <vscale x 8 x i64> @llvm.vp.mul.nxv8i64(<vscale x 8 x i64> %a, <vscale x 8 x i64> %vb, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
2078 %y = call <vscale x 8 x i64> @llvm.vp.add.nxv8i64(<vscale x 8 x i64> %x, <vscale x 8 x i64> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
2079 %u = call <vscale x 8 x i64> @llvm.vp.merge.nxv8i64(<vscale x 8 x i1> %m, <vscale x 8 x i64> %y, <vscale x 8 x i64> %c, i32 %evl)
2080 ret <vscale x 8 x i64> %u
2083 define <vscale x 8 x i64> @vmacc_vx_nxv8i64_unmasked(<vscale x 8 x i64> %a, i64 %b, <vscale x 8 x i64> %c, <vscale x 8 x i1> %m, i32 zeroext %evl) {
2084 ; RV32-LABEL: vmacc_vx_nxv8i64_unmasked:
2086 ; RV32-NEXT: addi sp, sp, -16
2087 ; RV32-NEXT: .cfi_def_cfa_offset 16
2088 ; RV32-NEXT: sw a1, 12(sp)
2089 ; RV32-NEXT: sw a0, 8(sp)
2090 ; RV32-NEXT: addi a0, sp, 8
2091 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
2092 ; RV32-NEXT: vlse64.v v24, (a0), zero
2093 ; RV32-NEXT: vsetvli zero, a2, e64, m8, tu, ma
2094 ; RV32-NEXT: vmacc.vv v16, v8, v24
2095 ; RV32-NEXT: vmv8r.v v8, v16
2096 ; RV32-NEXT: addi sp, sp, 16
2099 ; RV64-LABEL: vmacc_vx_nxv8i64_unmasked:
2101 ; RV64-NEXT: vsetvli zero, a1, e64, m8, tu, ma
2102 ; RV64-NEXT: vmacc.vx v16, a0, v8
2103 ; RV64-NEXT: vmv8r.v v8, v16
2105 %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0
2106 %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
2107 %x = call <vscale x 8 x i64> @llvm.vp.mul.nxv8i64(<vscale x 8 x i64> %a, <vscale x 8 x i64> %vb, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
2108 %y = call <vscale x 8 x i64> @llvm.vp.add.nxv8i64(<vscale x 8 x i64> %x, <vscale x 8 x i64> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
2109 %u = call <vscale x 8 x i64> @llvm.vp.merge.nxv8i64(<vscale x 8 x i1> splat (i1 -1), <vscale x 8 x i64> %y, <vscale x 8 x i64> %c, i32 %evl)
2110 ret <vscale x 8 x i64> %u
2113 define <vscale x 8 x i64> @vmacc_vv_nxv8i64_ta(<vscale x 8 x i64> %a, <vscale x 8 x i64> %b, <vscale x 8 x i64> %c, <vscale x 8 x i1> %m, i32 zeroext %evl) {
2114 ; CHECK-LABEL: vmacc_vv_nxv8i64_ta:
2116 ; CHECK-NEXT: vl8re64.v v24, (a0)
2117 ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu
2118 ; CHECK-NEXT: vmacc.vv v24, v8, v16, v0.t
2119 ; CHECK-NEXT: vmv.v.v v8, v24
2121 %x = call <vscale x 8 x i64> @llvm.vp.mul.nxv8i64(<vscale x 8 x i64> %a, <vscale x 8 x i64> %b, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
2122 %y = call <vscale x 8 x i64> @llvm.vp.add.nxv8i64(<vscale x 8 x i64> %x, <vscale x 8 x i64> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
2123 %u = call <vscale x 8 x i64> @llvm.vp.select.nxv8i64(<vscale x 8 x i1> %m, <vscale x 8 x i64> %y, <vscale x 8 x i64> %c, i32 %evl)
2124 ret <vscale x 8 x i64> %u
2127 define <vscale x 8 x i64> @vmacc_vx_nxv8i64_ta(<vscale x 8 x i64> %a, i64 %b, <vscale x 8 x i64> %c, <vscale x 8 x i1> %m, i32 zeroext %evl) {
2128 ; RV32-LABEL: vmacc_vx_nxv8i64_ta:
2130 ; RV32-NEXT: addi sp, sp, -16
2131 ; RV32-NEXT: .cfi_def_cfa_offset 16
2132 ; RV32-NEXT: sw a1, 12(sp)
2133 ; RV32-NEXT: sw a0, 8(sp)
2134 ; RV32-NEXT: addi a0, sp, 8
2135 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
2136 ; RV32-NEXT: vlse64.v v24, (a0), zero
2137 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu
2138 ; RV32-NEXT: vmacc.vv v16, v8, v24, v0.t
2139 ; RV32-NEXT: vmv.v.v v8, v16
2140 ; RV32-NEXT: addi sp, sp, 16
2143 ; RV64-LABEL: vmacc_vx_nxv8i64_ta:
2145 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu
2146 ; RV64-NEXT: vmacc.vx v16, a0, v8, v0.t
2147 ; RV64-NEXT: vmv.v.v v8, v16
2149 %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0
2150 %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
2151 %x = call <vscale x 8 x i64> @llvm.vp.mul.nxv8i64(<vscale x 8 x i64> %a, <vscale x 8 x i64> %vb, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
2152 %y = call <vscale x 8 x i64> @llvm.vp.add.nxv8i64(<vscale x 8 x i64> %x, <vscale x 8 x i64> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
2153 %u = call <vscale x 8 x i64> @llvm.vp.select.nxv8i64(<vscale x 8 x i1> %m, <vscale x 8 x i64> %y, <vscale x 8 x i64> %c, i32 %evl)
2154 ret <vscale x 8 x i64> %u