1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zvfh,+zfbfmin,+zvfbfmin,+v -target-abi=ilp32d \
3 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32V,RV32V-ZVFH
4 ; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zvfh,+zfbfmin,+zvfbfmin,+v -target-abi=lp64d \
5 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64V,RV64V-ZVFH
6 ; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v -target-abi=ilp32d \
7 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32V,RV32V-ZVFHMIN
8 ; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+v -target-abi=lp64d \
9 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64V,RV64V-ZVFHMIN
11 ; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zvfh,+zfbfmin,+zvfbfmin,+zve32f,+zvl128b -target-abi=ilp32d \
12 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32ZVE32F,RV32ZVE32F-ZVFH
13 ; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zvfh,+zfbfmin,+zvfbfmin,+zve32f,+zvl128b -target-abi=lp64d \
14 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64ZVE32F,RV64ZVE32F-ZVFH
15 ; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+zve32f,+zvl128b -target-abi=ilp32d \
16 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32ZVE32F,RV32ZVE32F-ZVFHMIN
17 ; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin,+zve32f,+zvl128b -target-abi=lp64d \
18 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64ZVE32F,RV64ZVE32F-ZVFHMIN
20 declare void @llvm.masked.scatter.v1i8.v1p0(<1 x i8>, <1 x ptr>, i32, <1 x i1>)
22 define void @mscatter_v1i8(<1 x i8> %val, <1 x ptr> %ptrs, <1 x i1> %m) {
23 ; RV32V-LABEL: mscatter_v1i8:
25 ; RV32V-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
26 ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t
29 ; RV64V-LABEL: mscatter_v1i8:
31 ; RV64V-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
32 ; RV64V-NEXT: vsoxei64.v v8, (zero), v9, v0.t
35 ; RV32ZVE32F-LABEL: mscatter_v1i8:
36 ; RV32ZVE32F: # %bb.0:
37 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
38 ; RV32ZVE32F-NEXT: vsoxei32.v v8, (zero), v9, v0.t
39 ; RV32ZVE32F-NEXT: ret
41 ; RV64ZVE32F-LABEL: mscatter_v1i8:
42 ; RV64ZVE32F: # %bb.0:
43 ; RV64ZVE32F-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
44 ; RV64ZVE32F-NEXT: vfirst.m a1, v0
45 ; RV64ZVE32F-NEXT: bnez a1, .LBB0_2
46 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
47 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
48 ; RV64ZVE32F-NEXT: vse8.v v8, (a0)
49 ; RV64ZVE32F-NEXT: .LBB0_2: # %else
50 ; RV64ZVE32F-NEXT: ret
51 call void @llvm.masked.scatter.v1i8.v1p0(<1 x i8> %val, <1 x ptr> %ptrs, i32 1, <1 x i1> %m)
55 declare void @llvm.masked.scatter.v2i8.v2p0(<2 x i8>, <2 x ptr>, i32, <2 x i1>)
57 define void @mscatter_v2i8(<2 x i8> %val, <2 x ptr> %ptrs, <2 x i1> %m) {
58 ; RV32V-LABEL: mscatter_v2i8:
60 ; RV32V-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
61 ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t
64 ; RV64V-LABEL: mscatter_v2i8:
66 ; RV64V-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
67 ; RV64V-NEXT: vsoxei64.v v8, (zero), v9, v0.t
70 ; RV32ZVE32F-LABEL: mscatter_v2i8:
71 ; RV32ZVE32F: # %bb.0:
72 ; RV32ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
73 ; RV32ZVE32F-NEXT: vsoxei32.v v8, (zero), v9, v0.t
74 ; RV32ZVE32F-NEXT: ret
76 ; RV64ZVE32F-LABEL: mscatter_v2i8:
77 ; RV64ZVE32F: # %bb.0:
78 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
79 ; RV64ZVE32F-NEXT: vmv.x.s a2, v0
80 ; RV64ZVE32F-NEXT: andi a3, a2, 1
81 ; RV64ZVE32F-NEXT: bnez a3, .LBB1_3
82 ; RV64ZVE32F-NEXT: # %bb.1: # %else
83 ; RV64ZVE32F-NEXT: andi a2, a2, 2
84 ; RV64ZVE32F-NEXT: bnez a2, .LBB1_4
85 ; RV64ZVE32F-NEXT: .LBB1_2: # %else2
86 ; RV64ZVE32F-NEXT: ret
87 ; RV64ZVE32F-NEXT: .LBB1_3: # %cond.store
88 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
89 ; RV64ZVE32F-NEXT: vse8.v v8, (a0)
90 ; RV64ZVE32F-NEXT: andi a2, a2, 2
91 ; RV64ZVE32F-NEXT: beqz a2, .LBB1_2
92 ; RV64ZVE32F-NEXT: .LBB1_4: # %cond.store1
93 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
94 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
95 ; RV64ZVE32F-NEXT: vse8.v v8, (a1)
96 ; RV64ZVE32F-NEXT: ret
97 call void @llvm.masked.scatter.v2i8.v2p0(<2 x i8> %val, <2 x ptr> %ptrs, i32 1, <2 x i1> %m)
101 define void @mscatter_v2i16_truncstore_v2i8(<2 x i16> %val, <2 x ptr> %ptrs, <2 x i1> %m) {
102 ; RV32V-LABEL: mscatter_v2i16_truncstore_v2i8:
104 ; RV32V-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
105 ; RV32V-NEXT: vnsrl.wi v8, v8, 0
106 ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t
109 ; RV64V-LABEL: mscatter_v2i16_truncstore_v2i8:
111 ; RV64V-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
112 ; RV64V-NEXT: vnsrl.wi v8, v8, 0
113 ; RV64V-NEXT: vsoxei64.v v8, (zero), v9, v0.t
116 ; RV32ZVE32F-LABEL: mscatter_v2i16_truncstore_v2i8:
117 ; RV32ZVE32F: # %bb.0:
118 ; RV32ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
119 ; RV32ZVE32F-NEXT: vnsrl.wi v8, v8, 0
120 ; RV32ZVE32F-NEXT: vsoxei32.v v8, (zero), v9, v0.t
121 ; RV32ZVE32F-NEXT: ret
123 ; RV64ZVE32F-LABEL: mscatter_v2i16_truncstore_v2i8:
124 ; RV64ZVE32F: # %bb.0:
125 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
126 ; RV64ZVE32F-NEXT: vmv.x.s a2, v0
127 ; RV64ZVE32F-NEXT: andi a3, a2, 1
128 ; RV64ZVE32F-NEXT: vnsrl.wi v8, v8, 0
129 ; RV64ZVE32F-NEXT: bnez a3, .LBB2_3
130 ; RV64ZVE32F-NEXT: # %bb.1: # %else
131 ; RV64ZVE32F-NEXT: andi a2, a2, 2
132 ; RV64ZVE32F-NEXT: bnez a2, .LBB2_4
133 ; RV64ZVE32F-NEXT: .LBB2_2: # %else2
134 ; RV64ZVE32F-NEXT: ret
135 ; RV64ZVE32F-NEXT: .LBB2_3: # %cond.store
136 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
137 ; RV64ZVE32F-NEXT: vse8.v v8, (a0)
138 ; RV64ZVE32F-NEXT: andi a2, a2, 2
139 ; RV64ZVE32F-NEXT: beqz a2, .LBB2_2
140 ; RV64ZVE32F-NEXT: .LBB2_4: # %cond.store1
141 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
142 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
143 ; RV64ZVE32F-NEXT: vse8.v v8, (a1)
144 ; RV64ZVE32F-NEXT: ret
145 %tval = trunc <2 x i16> %val to <2 x i8>
146 call void @llvm.masked.scatter.v2i8.v2p0(<2 x i8> %tval, <2 x ptr> %ptrs, i32 1, <2 x i1> %m)
150 define void @mscatter_v2i32_truncstore_v2i8(<2 x i32> %val, <2 x ptr> %ptrs, <2 x i1> %m) {
151 ; RV32V-LABEL: mscatter_v2i32_truncstore_v2i8:
153 ; RV32V-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
154 ; RV32V-NEXT: vnsrl.wi v8, v8, 0
155 ; RV32V-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
156 ; RV32V-NEXT: vnsrl.wi v8, v8, 0
157 ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t
160 ; RV64V-LABEL: mscatter_v2i32_truncstore_v2i8:
162 ; RV64V-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
163 ; RV64V-NEXT: vnsrl.wi v8, v8, 0
164 ; RV64V-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
165 ; RV64V-NEXT: vnsrl.wi v8, v8, 0
166 ; RV64V-NEXT: vsoxei64.v v8, (zero), v9, v0.t
169 ; RV32ZVE32F-LABEL: mscatter_v2i32_truncstore_v2i8:
170 ; RV32ZVE32F: # %bb.0:
171 ; RV32ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
172 ; RV32ZVE32F-NEXT: vnsrl.wi v8, v8, 0
173 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
174 ; RV32ZVE32F-NEXT: vnsrl.wi v8, v8, 0
175 ; RV32ZVE32F-NEXT: vsoxei32.v v8, (zero), v9, v0.t
176 ; RV32ZVE32F-NEXT: ret
178 ; RV64ZVE32F-LABEL: mscatter_v2i32_truncstore_v2i8:
179 ; RV64ZVE32F: # %bb.0:
180 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
181 ; RV64ZVE32F-NEXT: vnsrl.wi v8, v8, 0
182 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
183 ; RV64ZVE32F-NEXT: vmv.x.s a2, v0
184 ; RV64ZVE32F-NEXT: andi a3, a2, 1
185 ; RV64ZVE32F-NEXT: vnsrl.wi v8, v8, 0
186 ; RV64ZVE32F-NEXT: bnez a3, .LBB3_3
187 ; RV64ZVE32F-NEXT: # %bb.1: # %else
188 ; RV64ZVE32F-NEXT: andi a2, a2, 2
189 ; RV64ZVE32F-NEXT: bnez a2, .LBB3_4
190 ; RV64ZVE32F-NEXT: .LBB3_2: # %else2
191 ; RV64ZVE32F-NEXT: ret
192 ; RV64ZVE32F-NEXT: .LBB3_3: # %cond.store
193 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
194 ; RV64ZVE32F-NEXT: vse8.v v8, (a0)
195 ; RV64ZVE32F-NEXT: andi a2, a2, 2
196 ; RV64ZVE32F-NEXT: beqz a2, .LBB3_2
197 ; RV64ZVE32F-NEXT: .LBB3_4: # %cond.store1
198 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
199 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
200 ; RV64ZVE32F-NEXT: vse8.v v8, (a1)
201 ; RV64ZVE32F-NEXT: ret
202 %tval = trunc <2 x i32> %val to <2 x i8>
203 call void @llvm.masked.scatter.v2i8.v2p0(<2 x i8> %tval, <2 x ptr> %ptrs, i32 1, <2 x i1> %m)
207 define void @mscatter_v2i64_truncstore_v2i8(<2 x i64> %val, <2 x ptr> %ptrs, <2 x i1> %m) {
208 ; RV32V-LABEL: mscatter_v2i64_truncstore_v2i8:
210 ; RV32V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
211 ; RV32V-NEXT: vnsrl.wi v8, v8, 0
212 ; RV32V-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
213 ; RV32V-NEXT: vnsrl.wi v8, v8, 0
214 ; RV32V-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
215 ; RV32V-NEXT: vnsrl.wi v8, v8, 0
216 ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t
219 ; RV64V-LABEL: mscatter_v2i64_truncstore_v2i8:
221 ; RV64V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
222 ; RV64V-NEXT: vnsrl.wi v8, v8, 0
223 ; RV64V-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
224 ; RV64V-NEXT: vnsrl.wi v8, v8, 0
225 ; RV64V-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
226 ; RV64V-NEXT: vnsrl.wi v8, v8, 0
227 ; RV64V-NEXT: vsoxei64.v v8, (zero), v9, v0.t
230 ; RV32ZVE32F-LABEL: mscatter_v2i64_truncstore_v2i8:
231 ; RV32ZVE32F: # %bb.0:
232 ; RV32ZVE32F-NEXT: lw a1, 8(a0)
233 ; RV32ZVE32F-NEXT: lw a0, 0(a0)
234 ; RV32ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
235 ; RV32ZVE32F-NEXT: vmv.s.x v9, a1
236 ; RV32ZVE32F-NEXT: vmv.s.x v10, a0
237 ; RV32ZVE32F-NEXT: vslideup.vi v10, v9, 1
238 ; RV32ZVE32F-NEXT: vsoxei32.v v10, (zero), v8, v0.t
239 ; RV32ZVE32F-NEXT: ret
241 ; RV64ZVE32F-LABEL: mscatter_v2i64_truncstore_v2i8:
242 ; RV64ZVE32F: # %bb.0:
243 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
244 ; RV64ZVE32F-NEXT: vmv.s.x v9, a1
245 ; RV64ZVE32F-NEXT: vmv.s.x v8, a0
246 ; RV64ZVE32F-NEXT: vmv.x.s a0, v0
247 ; RV64ZVE32F-NEXT: andi a1, a0, 1
248 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
249 ; RV64ZVE32F-NEXT: bnez a1, .LBB4_3
250 ; RV64ZVE32F-NEXT: # %bb.1: # %else
251 ; RV64ZVE32F-NEXT: andi a0, a0, 2
252 ; RV64ZVE32F-NEXT: bnez a0, .LBB4_4
253 ; RV64ZVE32F-NEXT: .LBB4_2: # %else2
254 ; RV64ZVE32F-NEXT: ret
255 ; RV64ZVE32F-NEXT: .LBB4_3: # %cond.store
256 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
257 ; RV64ZVE32F-NEXT: vse8.v v8, (a2)
258 ; RV64ZVE32F-NEXT: andi a0, a0, 2
259 ; RV64ZVE32F-NEXT: beqz a0, .LBB4_2
260 ; RV64ZVE32F-NEXT: .LBB4_4: # %cond.store1
261 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
262 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
263 ; RV64ZVE32F-NEXT: vse8.v v8, (a3)
264 ; RV64ZVE32F-NEXT: ret
265 %tval = trunc <2 x i64> %val to <2 x i8>
266 call void @llvm.masked.scatter.v2i8.v2p0(<2 x i8> %tval, <2 x ptr> %ptrs, i32 1, <2 x i1> %m)
270 declare void @llvm.masked.scatter.v4i8.v4p0(<4 x i8>, <4 x ptr>, i32, <4 x i1>)
272 define void @mscatter_v4i8(<4 x i8> %val, <4 x ptr> %ptrs, <4 x i1> %m) {
273 ; RV32-LABEL: mscatter_v4i8:
275 ; RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
276 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
279 ; RV64V-LABEL: mscatter_v4i8:
281 ; RV64V-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
282 ; RV64V-NEXT: vsoxei64.v v8, (zero), v10, v0.t
285 ; RV64ZVE32F-LABEL: mscatter_v4i8:
286 ; RV64ZVE32F: # %bb.0:
287 ; RV64ZVE32F-NEXT: ld a4, 8(a0)
288 ; RV64ZVE32F-NEXT: ld a2, 16(a0)
289 ; RV64ZVE32F-NEXT: ld a1, 24(a0)
290 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
291 ; RV64ZVE32F-NEXT: vmv.x.s a3, v0
292 ; RV64ZVE32F-NEXT: andi a5, a3, 1
293 ; RV64ZVE32F-NEXT: bnez a5, .LBB5_5
294 ; RV64ZVE32F-NEXT: # %bb.1: # %else
295 ; RV64ZVE32F-NEXT: andi a0, a3, 2
296 ; RV64ZVE32F-NEXT: bnez a0, .LBB5_6
297 ; RV64ZVE32F-NEXT: .LBB5_2: # %else2
298 ; RV64ZVE32F-NEXT: andi a0, a3, 4
299 ; RV64ZVE32F-NEXT: bnez a0, .LBB5_7
300 ; RV64ZVE32F-NEXT: .LBB5_3: # %else4
301 ; RV64ZVE32F-NEXT: andi a3, a3, 8
302 ; RV64ZVE32F-NEXT: bnez a3, .LBB5_8
303 ; RV64ZVE32F-NEXT: .LBB5_4: # %else6
304 ; RV64ZVE32F-NEXT: ret
305 ; RV64ZVE32F-NEXT: .LBB5_5: # %cond.store
306 ; RV64ZVE32F-NEXT: ld a0, 0(a0)
307 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
308 ; RV64ZVE32F-NEXT: vse8.v v8, (a0)
309 ; RV64ZVE32F-NEXT: andi a0, a3, 2
310 ; RV64ZVE32F-NEXT: beqz a0, .LBB5_2
311 ; RV64ZVE32F-NEXT: .LBB5_6: # %cond.store1
312 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
313 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
314 ; RV64ZVE32F-NEXT: vse8.v v9, (a4)
315 ; RV64ZVE32F-NEXT: andi a0, a3, 4
316 ; RV64ZVE32F-NEXT: beqz a0, .LBB5_3
317 ; RV64ZVE32F-NEXT: .LBB5_7: # %cond.store3
318 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
319 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
320 ; RV64ZVE32F-NEXT: vse8.v v9, (a2)
321 ; RV64ZVE32F-NEXT: andi a3, a3, 8
322 ; RV64ZVE32F-NEXT: beqz a3, .LBB5_4
323 ; RV64ZVE32F-NEXT: .LBB5_8: # %cond.store5
324 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
325 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 3
326 ; RV64ZVE32F-NEXT: vse8.v v8, (a1)
327 ; RV64ZVE32F-NEXT: ret
328 call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> %m)
332 define void @mscatter_truemask_v4i8(<4 x i8> %val, <4 x ptr> %ptrs) {
333 ; RV32-LABEL: mscatter_truemask_v4i8:
335 ; RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
336 ; RV32-NEXT: vsoxei32.v v8, (zero), v9
339 ; RV64V-LABEL: mscatter_truemask_v4i8:
341 ; RV64V-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
342 ; RV64V-NEXT: vsoxei64.v v8, (zero), v10
345 ; RV64ZVE32F-LABEL: mscatter_truemask_v4i8:
346 ; RV64ZVE32F: # %bb.0:
347 ; RV64ZVE32F-NEXT: ld a1, 0(a0)
348 ; RV64ZVE32F-NEXT: ld a2, 8(a0)
349 ; RV64ZVE32F-NEXT: ld a3, 16(a0)
350 ; RV64ZVE32F-NEXT: ld a0, 24(a0)
351 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
352 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
353 ; RV64ZVE32F-NEXT: vse8.v v8, (a1)
354 ; RV64ZVE32F-NEXT: vse8.v v9, (a2)
355 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
356 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 3
357 ; RV64ZVE32F-NEXT: vse8.v v9, (a3)
358 ; RV64ZVE32F-NEXT: vse8.v v8, (a0)
359 ; RV64ZVE32F-NEXT: ret
360 call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> splat (i1 1))
364 define void @mscatter_falsemask_v4i8(<4 x i8> %val, <4 x ptr> %ptrs) {
365 ; CHECK-LABEL: mscatter_falsemask_v4i8:
368 call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> zeroinitializer)
372 declare void @llvm.masked.scatter.v8i8.v8p0(<8 x i8>, <8 x ptr>, i32, <8 x i1>)
374 define void @mscatter_v8i8(<8 x i8> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
375 ; RV32-LABEL: mscatter_v8i8:
377 ; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
378 ; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t
381 ; RV64V-LABEL: mscatter_v8i8:
383 ; RV64V-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
384 ; RV64V-NEXT: vsoxei64.v v8, (zero), v12, v0.t
387 ; RV64ZVE32F-LABEL: mscatter_v8i8:
388 ; RV64ZVE32F: # %bb.0:
389 ; RV64ZVE32F-NEXT: ld a3, 40(a0)
390 ; RV64ZVE32F-NEXT: ld a2, 48(a0)
391 ; RV64ZVE32F-NEXT: ld a1, 56(a0)
392 ; RV64ZVE32F-NEXT: ld t0, 8(a0)
393 ; RV64ZVE32F-NEXT: ld a7, 16(a0)
394 ; RV64ZVE32F-NEXT: ld a6, 24(a0)
395 ; RV64ZVE32F-NEXT: ld a5, 32(a0)
396 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
397 ; RV64ZVE32F-NEXT: vmv.x.s a4, v0
398 ; RV64ZVE32F-NEXT: andi t1, a4, 1
399 ; RV64ZVE32F-NEXT: bnez t1, .LBB8_9
400 ; RV64ZVE32F-NEXT: # %bb.1: # %else
401 ; RV64ZVE32F-NEXT: andi a0, a4, 2
402 ; RV64ZVE32F-NEXT: bnez a0, .LBB8_10
403 ; RV64ZVE32F-NEXT: .LBB8_2: # %else2
404 ; RV64ZVE32F-NEXT: andi a0, a4, 4
405 ; RV64ZVE32F-NEXT: bnez a0, .LBB8_11
406 ; RV64ZVE32F-NEXT: .LBB8_3: # %else4
407 ; RV64ZVE32F-NEXT: andi a0, a4, 8
408 ; RV64ZVE32F-NEXT: bnez a0, .LBB8_12
409 ; RV64ZVE32F-NEXT: .LBB8_4: # %else6
410 ; RV64ZVE32F-NEXT: andi a0, a4, 16
411 ; RV64ZVE32F-NEXT: bnez a0, .LBB8_13
412 ; RV64ZVE32F-NEXT: .LBB8_5: # %else8
413 ; RV64ZVE32F-NEXT: andi a0, a4, 32
414 ; RV64ZVE32F-NEXT: bnez a0, .LBB8_14
415 ; RV64ZVE32F-NEXT: .LBB8_6: # %else10
416 ; RV64ZVE32F-NEXT: andi a0, a4, 64
417 ; RV64ZVE32F-NEXT: bnez a0, .LBB8_15
418 ; RV64ZVE32F-NEXT: .LBB8_7: # %else12
419 ; RV64ZVE32F-NEXT: andi a0, a4, -128
420 ; RV64ZVE32F-NEXT: bnez a0, .LBB8_16
421 ; RV64ZVE32F-NEXT: .LBB8_8: # %else14
422 ; RV64ZVE32F-NEXT: ret
423 ; RV64ZVE32F-NEXT: .LBB8_9: # %cond.store
424 ; RV64ZVE32F-NEXT: ld a0, 0(a0)
425 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
426 ; RV64ZVE32F-NEXT: vse8.v v8, (a0)
427 ; RV64ZVE32F-NEXT: andi a0, a4, 2
428 ; RV64ZVE32F-NEXT: beqz a0, .LBB8_2
429 ; RV64ZVE32F-NEXT: .LBB8_10: # %cond.store1
430 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
431 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
432 ; RV64ZVE32F-NEXT: vse8.v v9, (t0)
433 ; RV64ZVE32F-NEXT: andi a0, a4, 4
434 ; RV64ZVE32F-NEXT: beqz a0, .LBB8_3
435 ; RV64ZVE32F-NEXT: .LBB8_11: # %cond.store3
436 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
437 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
438 ; RV64ZVE32F-NEXT: vse8.v v9, (a7)
439 ; RV64ZVE32F-NEXT: andi a0, a4, 8
440 ; RV64ZVE32F-NEXT: beqz a0, .LBB8_4
441 ; RV64ZVE32F-NEXT: .LBB8_12: # %cond.store5
442 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
443 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 3
444 ; RV64ZVE32F-NEXT: vse8.v v9, (a6)
445 ; RV64ZVE32F-NEXT: andi a0, a4, 16
446 ; RV64ZVE32F-NEXT: beqz a0, .LBB8_5
447 ; RV64ZVE32F-NEXT: .LBB8_13: # %cond.store7
448 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
449 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
450 ; RV64ZVE32F-NEXT: vse8.v v9, (a5)
451 ; RV64ZVE32F-NEXT: andi a0, a4, 32
452 ; RV64ZVE32F-NEXT: beqz a0, .LBB8_6
453 ; RV64ZVE32F-NEXT: .LBB8_14: # %cond.store9
454 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
455 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5
456 ; RV64ZVE32F-NEXT: vse8.v v9, (a3)
457 ; RV64ZVE32F-NEXT: andi a0, a4, 64
458 ; RV64ZVE32F-NEXT: beqz a0, .LBB8_7
459 ; RV64ZVE32F-NEXT: .LBB8_15: # %cond.store11
460 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
461 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 6
462 ; RV64ZVE32F-NEXT: vse8.v v9, (a2)
463 ; RV64ZVE32F-NEXT: andi a0, a4, -128
464 ; RV64ZVE32F-NEXT: beqz a0, .LBB8_8
465 ; RV64ZVE32F-NEXT: .LBB8_16: # %cond.store13
466 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
467 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
468 ; RV64ZVE32F-NEXT: vse8.v v8, (a1)
469 ; RV64ZVE32F-NEXT: ret
470 call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> %val, <8 x ptr> %ptrs, i32 1, <8 x i1> %m)
474 define void @mscatter_baseidx_v8i8(<8 x i8> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) {
475 ; RV32-LABEL: mscatter_baseidx_v8i8:
477 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
478 ; RV32-NEXT: vsext.vf4 v10, v9
479 ; RV32-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
480 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
483 ; RV64V-LABEL: mscatter_baseidx_v8i8:
485 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
486 ; RV64V-NEXT: vsext.vf8 v12, v9
487 ; RV64V-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
488 ; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t
491 ; RV64ZVE32F-LABEL: mscatter_baseidx_v8i8:
492 ; RV64ZVE32F: # %bb.0:
493 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
494 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
495 ; RV64ZVE32F-NEXT: andi a2, a1, 1
496 ; RV64ZVE32F-NEXT: beqz a2, .LBB9_2
497 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
498 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
499 ; RV64ZVE32F-NEXT: add a2, a0, a2
500 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
501 ; RV64ZVE32F-NEXT: vse8.v v8, (a2)
502 ; RV64ZVE32F-NEXT: .LBB9_2: # %else
503 ; RV64ZVE32F-NEXT: andi a2, a1, 2
504 ; RV64ZVE32F-NEXT: beqz a2, .LBB9_4
505 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
506 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
507 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1
508 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
509 ; RV64ZVE32F-NEXT: add a2, a0, a2
510 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
511 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
512 ; RV64ZVE32F-NEXT: vse8.v v10, (a2)
513 ; RV64ZVE32F-NEXT: .LBB9_4: # %else2
514 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
515 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4
516 ; RV64ZVE32F-NEXT: andi a2, a1, 4
517 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
518 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2
519 ; RV64ZVE32F-NEXT: bnez a2, .LBB9_12
520 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
521 ; RV64ZVE32F-NEXT: andi a2, a1, 8
522 ; RV64ZVE32F-NEXT: bnez a2, .LBB9_13
523 ; RV64ZVE32F-NEXT: .LBB9_6: # %else6
524 ; RV64ZVE32F-NEXT: andi a2, a1, 16
525 ; RV64ZVE32F-NEXT: bnez a2, .LBB9_14
526 ; RV64ZVE32F-NEXT: .LBB9_7: # %else8
527 ; RV64ZVE32F-NEXT: andi a2, a1, 32
528 ; RV64ZVE32F-NEXT: beqz a2, .LBB9_9
529 ; RV64ZVE32F-NEXT: .LBB9_8: # %cond.store9
530 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
531 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 1
532 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
533 ; RV64ZVE32F-NEXT: add a2, a0, a2
534 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
535 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5
536 ; RV64ZVE32F-NEXT: vse8.v v9, (a2)
537 ; RV64ZVE32F-NEXT: .LBB9_9: # %else10
538 ; RV64ZVE32F-NEXT: andi a2, a1, 64
539 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
540 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 2
541 ; RV64ZVE32F-NEXT: bnez a2, .LBB9_15
542 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
543 ; RV64ZVE32F-NEXT: andi a1, a1, -128
544 ; RV64ZVE32F-NEXT: bnez a1, .LBB9_16
545 ; RV64ZVE32F-NEXT: .LBB9_11: # %else14
546 ; RV64ZVE32F-NEXT: ret
547 ; RV64ZVE32F-NEXT: .LBB9_12: # %cond.store3
548 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
549 ; RV64ZVE32F-NEXT: add a2, a0, a2
550 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
551 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2
552 ; RV64ZVE32F-NEXT: vse8.v v11, (a2)
553 ; RV64ZVE32F-NEXT: andi a2, a1, 8
554 ; RV64ZVE32F-NEXT: beqz a2, .LBB9_6
555 ; RV64ZVE32F-NEXT: .LBB9_13: # %cond.store5
556 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
557 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
558 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
559 ; RV64ZVE32F-NEXT: add a2, a0, a2
560 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
561 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 3
562 ; RV64ZVE32F-NEXT: vse8.v v9, (a2)
563 ; RV64ZVE32F-NEXT: andi a2, a1, 16
564 ; RV64ZVE32F-NEXT: beqz a2, .LBB9_7
565 ; RV64ZVE32F-NEXT: .LBB9_14: # %cond.store7
566 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
567 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
568 ; RV64ZVE32F-NEXT: add a2, a0, a2
569 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
570 ; RV64ZVE32F-NEXT: vse8.v v9, (a2)
571 ; RV64ZVE32F-NEXT: andi a2, a1, 32
572 ; RV64ZVE32F-NEXT: bnez a2, .LBB9_8
573 ; RV64ZVE32F-NEXT: j .LBB9_9
574 ; RV64ZVE32F-NEXT: .LBB9_15: # %cond.store11
575 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
576 ; RV64ZVE32F-NEXT: add a2, a0, a2
577 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
578 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 6
579 ; RV64ZVE32F-NEXT: vse8.v v10, (a2)
580 ; RV64ZVE32F-NEXT: andi a1, a1, -128
581 ; RV64ZVE32F-NEXT: beqz a1, .LBB9_11
582 ; RV64ZVE32F-NEXT: .LBB9_16: # %cond.store13
583 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
584 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
585 ; RV64ZVE32F-NEXT: vmv.x.s a1, v9
586 ; RV64ZVE32F-NEXT: add a0, a0, a1
587 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
588 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
589 ; RV64ZVE32F-NEXT: vse8.v v8, (a0)
590 ; RV64ZVE32F-NEXT: ret
591 %ptrs = getelementptr inbounds i8, ptr %base, <8 x i8> %idxs
592 call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> %val, <8 x ptr> %ptrs, i32 1, <8 x i1> %m)
596 declare void @llvm.masked.scatter.v1i16.v1p0(<1 x i16>, <1 x ptr>, i32, <1 x i1>)
598 define void @mscatter_v1i16(<1 x i16> %val, <1 x ptr> %ptrs, <1 x i1> %m) {
599 ; RV32V-LABEL: mscatter_v1i16:
601 ; RV32V-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
602 ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t
605 ; RV64V-LABEL: mscatter_v1i16:
607 ; RV64V-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
608 ; RV64V-NEXT: vsoxei64.v v8, (zero), v9, v0.t
611 ; RV32ZVE32F-LABEL: mscatter_v1i16:
612 ; RV32ZVE32F: # %bb.0:
613 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
614 ; RV32ZVE32F-NEXT: vsoxei32.v v8, (zero), v9, v0.t
615 ; RV32ZVE32F-NEXT: ret
617 ; RV64ZVE32F-LABEL: mscatter_v1i16:
618 ; RV64ZVE32F: # %bb.0:
619 ; RV64ZVE32F-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
620 ; RV64ZVE32F-NEXT: vfirst.m a1, v0
621 ; RV64ZVE32F-NEXT: bnez a1, .LBB10_2
622 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
623 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
624 ; RV64ZVE32F-NEXT: vse16.v v8, (a0)
625 ; RV64ZVE32F-NEXT: .LBB10_2: # %else
626 ; RV64ZVE32F-NEXT: ret
627 call void @llvm.masked.scatter.v1i16.v1p0(<1 x i16> %val, <1 x ptr> %ptrs, i32 2, <1 x i1> %m)
631 declare void @llvm.masked.scatter.v2i16.v2p0(<2 x i16>, <2 x ptr>, i32, <2 x i1>)
633 define void @mscatter_v2i16(<2 x i16> %val, <2 x ptr> %ptrs, <2 x i1> %m) {
634 ; RV32V-LABEL: mscatter_v2i16:
636 ; RV32V-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
637 ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t
640 ; RV64V-LABEL: mscatter_v2i16:
642 ; RV64V-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
643 ; RV64V-NEXT: vsoxei64.v v8, (zero), v9, v0.t
646 ; RV32ZVE32F-LABEL: mscatter_v2i16:
647 ; RV32ZVE32F: # %bb.0:
648 ; RV32ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
649 ; RV32ZVE32F-NEXT: vsoxei32.v v8, (zero), v9, v0.t
650 ; RV32ZVE32F-NEXT: ret
652 ; RV64ZVE32F-LABEL: mscatter_v2i16:
653 ; RV64ZVE32F: # %bb.0:
654 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
655 ; RV64ZVE32F-NEXT: vmv.x.s a2, v0
656 ; RV64ZVE32F-NEXT: andi a3, a2, 1
657 ; RV64ZVE32F-NEXT: bnez a3, .LBB11_3
658 ; RV64ZVE32F-NEXT: # %bb.1: # %else
659 ; RV64ZVE32F-NEXT: andi a2, a2, 2
660 ; RV64ZVE32F-NEXT: bnez a2, .LBB11_4
661 ; RV64ZVE32F-NEXT: .LBB11_2: # %else2
662 ; RV64ZVE32F-NEXT: ret
663 ; RV64ZVE32F-NEXT: .LBB11_3: # %cond.store
664 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
665 ; RV64ZVE32F-NEXT: vse16.v v8, (a0)
666 ; RV64ZVE32F-NEXT: andi a2, a2, 2
667 ; RV64ZVE32F-NEXT: beqz a2, .LBB11_2
668 ; RV64ZVE32F-NEXT: .LBB11_4: # %cond.store1
669 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
670 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
671 ; RV64ZVE32F-NEXT: vse16.v v8, (a1)
672 ; RV64ZVE32F-NEXT: ret
673 call void @llvm.masked.scatter.v2i16.v2p0(<2 x i16> %val, <2 x ptr> %ptrs, i32 2, <2 x i1> %m)
677 define void @mscatter_v2i32_truncstore_v2i16(<2 x i32> %val, <2 x ptr> %ptrs, <2 x i1> %m) {
678 ; RV32V-LABEL: mscatter_v2i32_truncstore_v2i16:
680 ; RV32V-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
681 ; RV32V-NEXT: vnsrl.wi v8, v8, 0
682 ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t
685 ; RV64V-LABEL: mscatter_v2i32_truncstore_v2i16:
687 ; RV64V-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
688 ; RV64V-NEXT: vnsrl.wi v8, v8, 0
689 ; RV64V-NEXT: vsoxei64.v v8, (zero), v9, v0.t
692 ; RV32ZVE32F-LABEL: mscatter_v2i32_truncstore_v2i16:
693 ; RV32ZVE32F: # %bb.0:
694 ; RV32ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
695 ; RV32ZVE32F-NEXT: vnsrl.wi v8, v8, 0
696 ; RV32ZVE32F-NEXT: vsoxei32.v v8, (zero), v9, v0.t
697 ; RV32ZVE32F-NEXT: ret
699 ; RV64ZVE32F-LABEL: mscatter_v2i32_truncstore_v2i16:
700 ; RV64ZVE32F: # %bb.0:
701 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
702 ; RV64ZVE32F-NEXT: vmv.x.s a2, v0
703 ; RV64ZVE32F-NEXT: andi a3, a2, 1
704 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
705 ; RV64ZVE32F-NEXT: vnsrl.wi v8, v8, 0
706 ; RV64ZVE32F-NEXT: bnez a3, .LBB12_3
707 ; RV64ZVE32F-NEXT: # %bb.1: # %else
708 ; RV64ZVE32F-NEXT: andi a2, a2, 2
709 ; RV64ZVE32F-NEXT: bnez a2, .LBB12_4
710 ; RV64ZVE32F-NEXT: .LBB12_2: # %else2
711 ; RV64ZVE32F-NEXT: ret
712 ; RV64ZVE32F-NEXT: .LBB12_3: # %cond.store
713 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
714 ; RV64ZVE32F-NEXT: vse16.v v8, (a0)
715 ; RV64ZVE32F-NEXT: andi a2, a2, 2
716 ; RV64ZVE32F-NEXT: beqz a2, .LBB12_2
717 ; RV64ZVE32F-NEXT: .LBB12_4: # %cond.store1
718 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
719 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
720 ; RV64ZVE32F-NEXT: vse16.v v8, (a1)
721 ; RV64ZVE32F-NEXT: ret
722 %tval = trunc <2 x i32> %val to <2 x i16>
723 call void @llvm.masked.scatter.v2i16.v2p0(<2 x i16> %tval, <2 x ptr> %ptrs, i32 2, <2 x i1> %m)
727 define void @mscatter_v2i64_truncstore_v2i16(<2 x i64> %val, <2 x ptr> %ptrs, <2 x i1> %m) {
728 ; RV32V-LABEL: mscatter_v2i64_truncstore_v2i16:
730 ; RV32V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
731 ; RV32V-NEXT: vnsrl.wi v8, v8, 0
732 ; RV32V-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
733 ; RV32V-NEXT: vnsrl.wi v8, v8, 0
734 ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t
737 ; RV64V-LABEL: mscatter_v2i64_truncstore_v2i16:
739 ; RV64V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
740 ; RV64V-NEXT: vnsrl.wi v8, v8, 0
741 ; RV64V-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
742 ; RV64V-NEXT: vnsrl.wi v8, v8, 0
743 ; RV64V-NEXT: vsoxei64.v v8, (zero), v9, v0.t
746 ; RV32ZVE32F-LABEL: mscatter_v2i64_truncstore_v2i16:
747 ; RV32ZVE32F: # %bb.0:
748 ; RV32ZVE32F-NEXT: lw a1, 8(a0)
749 ; RV32ZVE32F-NEXT: lw a0, 0(a0)
750 ; RV32ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
751 ; RV32ZVE32F-NEXT: vmv.s.x v9, a1
752 ; RV32ZVE32F-NEXT: vmv.s.x v10, a0
753 ; RV32ZVE32F-NEXT: vslideup.vi v10, v9, 1
754 ; RV32ZVE32F-NEXT: vsoxei32.v v10, (zero), v8, v0.t
755 ; RV32ZVE32F-NEXT: ret
757 ; RV64ZVE32F-LABEL: mscatter_v2i64_truncstore_v2i16:
758 ; RV64ZVE32F: # %bb.0:
759 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
760 ; RV64ZVE32F-NEXT: vmv.s.x v9, a1
761 ; RV64ZVE32F-NEXT: vmv.s.x v8, a0
762 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
763 ; RV64ZVE32F-NEXT: vmv.x.s a0, v0
764 ; RV64ZVE32F-NEXT: andi a1, a0, 1
765 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
766 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
767 ; RV64ZVE32F-NEXT: bnez a1, .LBB13_3
768 ; RV64ZVE32F-NEXT: # %bb.1: # %else
769 ; RV64ZVE32F-NEXT: andi a0, a0, 2
770 ; RV64ZVE32F-NEXT: bnez a0, .LBB13_4
771 ; RV64ZVE32F-NEXT: .LBB13_2: # %else2
772 ; RV64ZVE32F-NEXT: ret
773 ; RV64ZVE32F-NEXT: .LBB13_3: # %cond.store
774 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
775 ; RV64ZVE32F-NEXT: vse16.v v8, (a2)
776 ; RV64ZVE32F-NEXT: andi a0, a0, 2
777 ; RV64ZVE32F-NEXT: beqz a0, .LBB13_2
778 ; RV64ZVE32F-NEXT: .LBB13_4: # %cond.store1
779 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
780 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
781 ; RV64ZVE32F-NEXT: vse16.v v8, (a3)
782 ; RV64ZVE32F-NEXT: ret
783 %tval = trunc <2 x i64> %val to <2 x i16>
784 call void @llvm.masked.scatter.v2i16.v2p0(<2 x i16> %tval, <2 x ptr> %ptrs, i32 2, <2 x i1> %m)
788 declare void @llvm.masked.scatter.v4i16.v4p0(<4 x i16>, <4 x ptr>, i32, <4 x i1>)
790 define void @mscatter_v4i16(<4 x i16> %val, <4 x ptr> %ptrs, <4 x i1> %m) {
791 ; RV32-LABEL: mscatter_v4i16:
793 ; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
794 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
797 ; RV64V-LABEL: mscatter_v4i16:
799 ; RV64V-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
800 ; RV64V-NEXT: vsoxei64.v v8, (zero), v10, v0.t
803 ; RV64ZVE32F-LABEL: mscatter_v4i16:
804 ; RV64ZVE32F: # %bb.0:
805 ; RV64ZVE32F-NEXT: ld a4, 8(a0)
806 ; RV64ZVE32F-NEXT: ld a2, 16(a0)
807 ; RV64ZVE32F-NEXT: ld a1, 24(a0)
808 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
809 ; RV64ZVE32F-NEXT: vmv.x.s a3, v0
810 ; RV64ZVE32F-NEXT: andi a5, a3, 1
811 ; RV64ZVE32F-NEXT: bnez a5, .LBB14_5
812 ; RV64ZVE32F-NEXT: # %bb.1: # %else
813 ; RV64ZVE32F-NEXT: andi a0, a3, 2
814 ; RV64ZVE32F-NEXT: bnez a0, .LBB14_6
815 ; RV64ZVE32F-NEXT: .LBB14_2: # %else2
816 ; RV64ZVE32F-NEXT: andi a0, a3, 4
817 ; RV64ZVE32F-NEXT: bnez a0, .LBB14_7
818 ; RV64ZVE32F-NEXT: .LBB14_3: # %else4
819 ; RV64ZVE32F-NEXT: andi a3, a3, 8
820 ; RV64ZVE32F-NEXT: bnez a3, .LBB14_8
821 ; RV64ZVE32F-NEXT: .LBB14_4: # %else6
822 ; RV64ZVE32F-NEXT: ret
823 ; RV64ZVE32F-NEXT: .LBB14_5: # %cond.store
824 ; RV64ZVE32F-NEXT: ld a0, 0(a0)
825 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
826 ; RV64ZVE32F-NEXT: vse16.v v8, (a0)
827 ; RV64ZVE32F-NEXT: andi a0, a3, 2
828 ; RV64ZVE32F-NEXT: beqz a0, .LBB14_2
829 ; RV64ZVE32F-NEXT: .LBB14_6: # %cond.store1
830 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
831 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
832 ; RV64ZVE32F-NEXT: vse16.v v9, (a4)
833 ; RV64ZVE32F-NEXT: andi a0, a3, 4
834 ; RV64ZVE32F-NEXT: beqz a0, .LBB14_3
835 ; RV64ZVE32F-NEXT: .LBB14_7: # %cond.store3
836 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
837 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
838 ; RV64ZVE32F-NEXT: vse16.v v9, (a2)
839 ; RV64ZVE32F-NEXT: andi a3, a3, 8
840 ; RV64ZVE32F-NEXT: beqz a3, .LBB14_4
841 ; RV64ZVE32F-NEXT: .LBB14_8: # %cond.store5
842 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
843 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 3
844 ; RV64ZVE32F-NEXT: vse16.v v8, (a1)
845 ; RV64ZVE32F-NEXT: ret
846 call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> %val, <4 x ptr> %ptrs, i32 2, <4 x i1> %m)
850 define void @mscatter_truemask_v4i16(<4 x i16> %val, <4 x ptr> %ptrs) {
851 ; RV32-LABEL: mscatter_truemask_v4i16:
853 ; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
854 ; RV32-NEXT: vsoxei32.v v8, (zero), v9
857 ; RV64V-LABEL: mscatter_truemask_v4i16:
859 ; RV64V-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
860 ; RV64V-NEXT: vsoxei64.v v8, (zero), v10
863 ; RV64ZVE32F-LABEL: mscatter_truemask_v4i16:
864 ; RV64ZVE32F: # %bb.0:
865 ; RV64ZVE32F-NEXT: ld a1, 0(a0)
866 ; RV64ZVE32F-NEXT: ld a2, 8(a0)
867 ; RV64ZVE32F-NEXT: ld a3, 16(a0)
868 ; RV64ZVE32F-NEXT: ld a0, 24(a0)
869 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
870 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
871 ; RV64ZVE32F-NEXT: vse16.v v8, (a1)
872 ; RV64ZVE32F-NEXT: vse16.v v9, (a2)
873 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
874 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 3
875 ; RV64ZVE32F-NEXT: vse16.v v9, (a3)
876 ; RV64ZVE32F-NEXT: vse16.v v8, (a0)
877 ; RV64ZVE32F-NEXT: ret
878 call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> %val, <4 x ptr> %ptrs, i32 2, <4 x i1> splat (i1 1))
882 define void @mscatter_falsemask_v4i16(<4 x i16> %val, <4 x ptr> %ptrs) {
883 ; CHECK-LABEL: mscatter_falsemask_v4i16:
886 call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> %val, <4 x ptr> %ptrs, i32 2, <4 x i1> zeroinitializer)
890 declare void @llvm.masked.scatter.v8i16.v8p0(<8 x i16>, <8 x ptr>, i32, <8 x i1>)
892 define void @mscatter_v8i16(<8 x i16> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
893 ; RV32-LABEL: mscatter_v8i16:
895 ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
896 ; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t
899 ; RV64V-LABEL: mscatter_v8i16:
901 ; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
902 ; RV64V-NEXT: vsoxei64.v v8, (zero), v12, v0.t
905 ; RV64ZVE32F-LABEL: mscatter_v8i16:
906 ; RV64ZVE32F: # %bb.0:
907 ; RV64ZVE32F-NEXT: ld a3, 40(a0)
908 ; RV64ZVE32F-NEXT: ld a2, 48(a0)
909 ; RV64ZVE32F-NEXT: ld a1, 56(a0)
910 ; RV64ZVE32F-NEXT: ld t0, 8(a0)
911 ; RV64ZVE32F-NEXT: ld a7, 16(a0)
912 ; RV64ZVE32F-NEXT: ld a6, 24(a0)
913 ; RV64ZVE32F-NEXT: ld a5, 32(a0)
914 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
915 ; RV64ZVE32F-NEXT: vmv.x.s a4, v0
916 ; RV64ZVE32F-NEXT: andi t1, a4, 1
917 ; RV64ZVE32F-NEXT: bnez t1, .LBB17_9
918 ; RV64ZVE32F-NEXT: # %bb.1: # %else
919 ; RV64ZVE32F-NEXT: andi a0, a4, 2
920 ; RV64ZVE32F-NEXT: bnez a0, .LBB17_10
921 ; RV64ZVE32F-NEXT: .LBB17_2: # %else2
922 ; RV64ZVE32F-NEXT: andi a0, a4, 4
923 ; RV64ZVE32F-NEXT: bnez a0, .LBB17_11
924 ; RV64ZVE32F-NEXT: .LBB17_3: # %else4
925 ; RV64ZVE32F-NEXT: andi a0, a4, 8
926 ; RV64ZVE32F-NEXT: bnez a0, .LBB17_12
927 ; RV64ZVE32F-NEXT: .LBB17_4: # %else6
928 ; RV64ZVE32F-NEXT: andi a0, a4, 16
929 ; RV64ZVE32F-NEXT: bnez a0, .LBB17_13
930 ; RV64ZVE32F-NEXT: .LBB17_5: # %else8
931 ; RV64ZVE32F-NEXT: andi a0, a4, 32
932 ; RV64ZVE32F-NEXT: bnez a0, .LBB17_14
933 ; RV64ZVE32F-NEXT: .LBB17_6: # %else10
934 ; RV64ZVE32F-NEXT: andi a0, a4, 64
935 ; RV64ZVE32F-NEXT: bnez a0, .LBB17_15
936 ; RV64ZVE32F-NEXT: .LBB17_7: # %else12
937 ; RV64ZVE32F-NEXT: andi a0, a4, -128
938 ; RV64ZVE32F-NEXT: bnez a0, .LBB17_16
939 ; RV64ZVE32F-NEXT: .LBB17_8: # %else14
940 ; RV64ZVE32F-NEXT: ret
941 ; RV64ZVE32F-NEXT: .LBB17_9: # %cond.store
942 ; RV64ZVE32F-NEXT: ld a0, 0(a0)
943 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
944 ; RV64ZVE32F-NEXT: vse16.v v8, (a0)
945 ; RV64ZVE32F-NEXT: andi a0, a4, 2
946 ; RV64ZVE32F-NEXT: beqz a0, .LBB17_2
947 ; RV64ZVE32F-NEXT: .LBB17_10: # %cond.store1
948 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
949 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
950 ; RV64ZVE32F-NEXT: vse16.v v9, (t0)
951 ; RV64ZVE32F-NEXT: andi a0, a4, 4
952 ; RV64ZVE32F-NEXT: beqz a0, .LBB17_3
953 ; RV64ZVE32F-NEXT: .LBB17_11: # %cond.store3
954 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
955 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
956 ; RV64ZVE32F-NEXT: vse16.v v9, (a7)
957 ; RV64ZVE32F-NEXT: andi a0, a4, 8
958 ; RV64ZVE32F-NEXT: beqz a0, .LBB17_4
959 ; RV64ZVE32F-NEXT: .LBB17_12: # %cond.store5
960 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
961 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 3
962 ; RV64ZVE32F-NEXT: vse16.v v9, (a6)
963 ; RV64ZVE32F-NEXT: andi a0, a4, 16
964 ; RV64ZVE32F-NEXT: beqz a0, .LBB17_5
965 ; RV64ZVE32F-NEXT: .LBB17_13: # %cond.store7
966 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
967 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
968 ; RV64ZVE32F-NEXT: vse16.v v9, (a5)
969 ; RV64ZVE32F-NEXT: andi a0, a4, 32
970 ; RV64ZVE32F-NEXT: beqz a0, .LBB17_6
971 ; RV64ZVE32F-NEXT: .LBB17_14: # %cond.store9
972 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
973 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5
974 ; RV64ZVE32F-NEXT: vse16.v v9, (a3)
975 ; RV64ZVE32F-NEXT: andi a0, a4, 64
976 ; RV64ZVE32F-NEXT: beqz a0, .LBB17_7
977 ; RV64ZVE32F-NEXT: .LBB17_15: # %cond.store11
978 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
979 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 6
980 ; RV64ZVE32F-NEXT: vse16.v v9, (a2)
981 ; RV64ZVE32F-NEXT: andi a0, a4, -128
982 ; RV64ZVE32F-NEXT: beqz a0, .LBB17_8
983 ; RV64ZVE32F-NEXT: .LBB17_16: # %cond.store13
984 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
985 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
986 ; RV64ZVE32F-NEXT: vse16.v v8, (a1)
987 ; RV64ZVE32F-NEXT: ret
988 call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> %m)
992 define void @mscatter_baseidx_v8i8_v8i16(<8 x i16> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) {
993 ; RV32-LABEL: mscatter_baseidx_v8i8_v8i16:
995 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
996 ; RV32-NEXT: vsext.vf4 v10, v9
997 ; RV32-NEXT: vadd.vv v10, v10, v10
998 ; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma
999 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
1002 ; RV64V-LABEL: mscatter_baseidx_v8i8_v8i16:
1004 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
1005 ; RV64V-NEXT: vsext.vf8 v12, v9
1006 ; RV64V-NEXT: vadd.vv v12, v12, v12
1007 ; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, ma
1008 ; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t
1011 ; RV64ZVE32F-LABEL: mscatter_baseidx_v8i8_v8i16:
1012 ; RV64ZVE32F: # %bb.0:
1013 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
1014 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
1015 ; RV64ZVE32F-NEXT: andi a2, a1, 1
1016 ; RV64ZVE32F-NEXT: beqz a2, .LBB18_2
1017 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
1018 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
1019 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1020 ; RV64ZVE32F-NEXT: add a2, a0, a2
1021 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1022 ; RV64ZVE32F-NEXT: vse16.v v8, (a2)
1023 ; RV64ZVE32F-NEXT: .LBB18_2: # %else
1024 ; RV64ZVE32F-NEXT: andi a2, a1, 2
1025 ; RV64ZVE32F-NEXT: beqz a2, .LBB18_4
1026 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
1027 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1028 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1
1029 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
1030 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1031 ; RV64ZVE32F-NEXT: add a2, a0, a2
1032 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1033 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
1034 ; RV64ZVE32F-NEXT: vse16.v v10, (a2)
1035 ; RV64ZVE32F-NEXT: .LBB18_4: # %else2
1036 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
1037 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4
1038 ; RV64ZVE32F-NEXT: andi a2, a1, 4
1039 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
1040 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2
1041 ; RV64ZVE32F-NEXT: bnez a2, .LBB18_12
1042 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
1043 ; RV64ZVE32F-NEXT: andi a2, a1, 8
1044 ; RV64ZVE32F-NEXT: bnez a2, .LBB18_13
1045 ; RV64ZVE32F-NEXT: .LBB18_6: # %else6
1046 ; RV64ZVE32F-NEXT: andi a2, a1, 16
1047 ; RV64ZVE32F-NEXT: bnez a2, .LBB18_14
1048 ; RV64ZVE32F-NEXT: .LBB18_7: # %else8
1049 ; RV64ZVE32F-NEXT: andi a2, a1, 32
1050 ; RV64ZVE32F-NEXT: beqz a2, .LBB18_9
1051 ; RV64ZVE32F-NEXT: .LBB18_8: # %cond.store9
1052 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1053 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 1
1054 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
1055 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1056 ; RV64ZVE32F-NEXT: add a2, a0, a2
1057 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1058 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5
1059 ; RV64ZVE32F-NEXT: vse16.v v9, (a2)
1060 ; RV64ZVE32F-NEXT: .LBB18_9: # %else10
1061 ; RV64ZVE32F-NEXT: andi a2, a1, 64
1062 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
1063 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 2
1064 ; RV64ZVE32F-NEXT: bnez a2, .LBB18_15
1065 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
1066 ; RV64ZVE32F-NEXT: andi a1, a1, -128
1067 ; RV64ZVE32F-NEXT: bnez a1, .LBB18_16
1068 ; RV64ZVE32F-NEXT: .LBB18_11: # %else14
1069 ; RV64ZVE32F-NEXT: ret
1070 ; RV64ZVE32F-NEXT: .LBB18_12: # %cond.store3
1071 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
1072 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1073 ; RV64ZVE32F-NEXT: add a2, a0, a2
1074 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1075 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2
1076 ; RV64ZVE32F-NEXT: vse16.v v11, (a2)
1077 ; RV64ZVE32F-NEXT: andi a2, a1, 8
1078 ; RV64ZVE32F-NEXT: beqz a2, .LBB18_6
1079 ; RV64ZVE32F-NEXT: .LBB18_13: # %cond.store5
1080 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1081 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
1082 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
1083 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1084 ; RV64ZVE32F-NEXT: add a2, a0, a2
1085 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1086 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 3
1087 ; RV64ZVE32F-NEXT: vse16.v v9, (a2)
1088 ; RV64ZVE32F-NEXT: andi a2, a1, 16
1089 ; RV64ZVE32F-NEXT: beqz a2, .LBB18_7
1090 ; RV64ZVE32F-NEXT: .LBB18_14: # %cond.store7
1091 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
1092 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
1093 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1094 ; RV64ZVE32F-NEXT: add a2, a0, a2
1095 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1096 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
1097 ; RV64ZVE32F-NEXT: vse16.v v9, (a2)
1098 ; RV64ZVE32F-NEXT: andi a2, a1, 32
1099 ; RV64ZVE32F-NEXT: bnez a2, .LBB18_8
1100 ; RV64ZVE32F-NEXT: j .LBB18_9
1101 ; RV64ZVE32F-NEXT: .LBB18_15: # %cond.store11
1102 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
1103 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1104 ; RV64ZVE32F-NEXT: add a2, a0, a2
1105 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1106 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 6
1107 ; RV64ZVE32F-NEXT: vse16.v v10, (a2)
1108 ; RV64ZVE32F-NEXT: andi a1, a1, -128
1109 ; RV64ZVE32F-NEXT: beqz a1, .LBB18_11
1110 ; RV64ZVE32F-NEXT: .LBB18_16: # %cond.store13
1111 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1112 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
1113 ; RV64ZVE32F-NEXT: vmv.x.s a1, v9
1114 ; RV64ZVE32F-NEXT: slli a1, a1, 1
1115 ; RV64ZVE32F-NEXT: add a0, a0, a1
1116 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1117 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
1118 ; RV64ZVE32F-NEXT: vse16.v v8, (a0)
1119 ; RV64ZVE32F-NEXT: ret
1120 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i8> %idxs
1121 call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> %m)
1125 define void @mscatter_baseidx_sext_v8i8_v8i16(<8 x i16> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) {
1126 ; RV32-LABEL: mscatter_baseidx_sext_v8i8_v8i16:
1128 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1129 ; RV32-NEXT: vsext.vf4 v10, v9
1130 ; RV32-NEXT: vadd.vv v10, v10, v10
1131 ; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma
1132 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
1135 ; RV64V-LABEL: mscatter_baseidx_sext_v8i8_v8i16:
1137 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
1138 ; RV64V-NEXT: vsext.vf8 v12, v9
1139 ; RV64V-NEXT: vadd.vv v12, v12, v12
1140 ; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, ma
1141 ; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t
1144 ; RV64ZVE32F-LABEL: mscatter_baseidx_sext_v8i8_v8i16:
1145 ; RV64ZVE32F: # %bb.0:
1146 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
1147 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
1148 ; RV64ZVE32F-NEXT: andi a2, a1, 1
1149 ; RV64ZVE32F-NEXT: beqz a2, .LBB19_2
1150 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
1151 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
1152 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1153 ; RV64ZVE32F-NEXT: add a2, a0, a2
1154 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1155 ; RV64ZVE32F-NEXT: vse16.v v8, (a2)
1156 ; RV64ZVE32F-NEXT: .LBB19_2: # %else
1157 ; RV64ZVE32F-NEXT: andi a2, a1, 2
1158 ; RV64ZVE32F-NEXT: beqz a2, .LBB19_4
1159 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
1160 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1161 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1
1162 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
1163 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1164 ; RV64ZVE32F-NEXT: add a2, a0, a2
1165 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1166 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
1167 ; RV64ZVE32F-NEXT: vse16.v v10, (a2)
1168 ; RV64ZVE32F-NEXT: .LBB19_4: # %else2
1169 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
1170 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4
1171 ; RV64ZVE32F-NEXT: andi a2, a1, 4
1172 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
1173 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2
1174 ; RV64ZVE32F-NEXT: bnez a2, .LBB19_12
1175 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
1176 ; RV64ZVE32F-NEXT: andi a2, a1, 8
1177 ; RV64ZVE32F-NEXT: bnez a2, .LBB19_13
1178 ; RV64ZVE32F-NEXT: .LBB19_6: # %else6
1179 ; RV64ZVE32F-NEXT: andi a2, a1, 16
1180 ; RV64ZVE32F-NEXT: bnez a2, .LBB19_14
1181 ; RV64ZVE32F-NEXT: .LBB19_7: # %else8
1182 ; RV64ZVE32F-NEXT: andi a2, a1, 32
1183 ; RV64ZVE32F-NEXT: beqz a2, .LBB19_9
1184 ; RV64ZVE32F-NEXT: .LBB19_8: # %cond.store9
1185 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1186 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 1
1187 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
1188 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1189 ; RV64ZVE32F-NEXT: add a2, a0, a2
1190 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1191 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5
1192 ; RV64ZVE32F-NEXT: vse16.v v9, (a2)
1193 ; RV64ZVE32F-NEXT: .LBB19_9: # %else10
1194 ; RV64ZVE32F-NEXT: andi a2, a1, 64
1195 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
1196 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 2
1197 ; RV64ZVE32F-NEXT: bnez a2, .LBB19_15
1198 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
1199 ; RV64ZVE32F-NEXT: andi a1, a1, -128
1200 ; RV64ZVE32F-NEXT: bnez a1, .LBB19_16
1201 ; RV64ZVE32F-NEXT: .LBB19_11: # %else14
1202 ; RV64ZVE32F-NEXT: ret
1203 ; RV64ZVE32F-NEXT: .LBB19_12: # %cond.store3
1204 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
1205 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1206 ; RV64ZVE32F-NEXT: add a2, a0, a2
1207 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1208 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2
1209 ; RV64ZVE32F-NEXT: vse16.v v11, (a2)
1210 ; RV64ZVE32F-NEXT: andi a2, a1, 8
1211 ; RV64ZVE32F-NEXT: beqz a2, .LBB19_6
1212 ; RV64ZVE32F-NEXT: .LBB19_13: # %cond.store5
1213 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1214 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
1215 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
1216 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1217 ; RV64ZVE32F-NEXT: add a2, a0, a2
1218 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1219 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 3
1220 ; RV64ZVE32F-NEXT: vse16.v v9, (a2)
1221 ; RV64ZVE32F-NEXT: andi a2, a1, 16
1222 ; RV64ZVE32F-NEXT: beqz a2, .LBB19_7
1223 ; RV64ZVE32F-NEXT: .LBB19_14: # %cond.store7
1224 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
1225 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
1226 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1227 ; RV64ZVE32F-NEXT: add a2, a0, a2
1228 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1229 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
1230 ; RV64ZVE32F-NEXT: vse16.v v9, (a2)
1231 ; RV64ZVE32F-NEXT: andi a2, a1, 32
1232 ; RV64ZVE32F-NEXT: bnez a2, .LBB19_8
1233 ; RV64ZVE32F-NEXT: j .LBB19_9
1234 ; RV64ZVE32F-NEXT: .LBB19_15: # %cond.store11
1235 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
1236 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1237 ; RV64ZVE32F-NEXT: add a2, a0, a2
1238 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1239 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 6
1240 ; RV64ZVE32F-NEXT: vse16.v v10, (a2)
1241 ; RV64ZVE32F-NEXT: andi a1, a1, -128
1242 ; RV64ZVE32F-NEXT: beqz a1, .LBB19_11
1243 ; RV64ZVE32F-NEXT: .LBB19_16: # %cond.store13
1244 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1245 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
1246 ; RV64ZVE32F-NEXT: vmv.x.s a1, v9
1247 ; RV64ZVE32F-NEXT: slli a1, a1, 1
1248 ; RV64ZVE32F-NEXT: add a0, a0, a1
1249 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1250 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
1251 ; RV64ZVE32F-NEXT: vse16.v v8, (a0)
1252 ; RV64ZVE32F-NEXT: ret
1253 %eidxs = sext <8 x i8> %idxs to <8 x i16>
1254 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i16> %eidxs
1255 call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> %m)
1259 define void @mscatter_baseidx_zext_v8i8_v8i16(<8 x i16> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) {
1260 ; RV32-LABEL: mscatter_baseidx_zext_v8i8_v8i16:
1262 ; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
1263 ; RV32-NEXT: vwaddu.vv v10, v9, v9
1264 ; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma
1265 ; RV32-NEXT: vsoxei16.v v8, (a0), v10, v0.t
1268 ; RV64V-LABEL: mscatter_baseidx_zext_v8i8_v8i16:
1270 ; RV64V-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
1271 ; RV64V-NEXT: vwaddu.vv v10, v9, v9
1272 ; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, ma
1273 ; RV64V-NEXT: vsoxei16.v v8, (a0), v10, v0.t
1276 ; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i8_v8i16:
1277 ; RV64ZVE32F: # %bb.0:
1278 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
1279 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
1280 ; RV64ZVE32F-NEXT: andi a2, a1, 1
1281 ; RV64ZVE32F-NEXT: beqz a2, .LBB20_2
1282 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
1283 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
1284 ; RV64ZVE32F-NEXT: andi a2, a2, 255
1285 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1286 ; RV64ZVE32F-NEXT: add a2, a0, a2
1287 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1288 ; RV64ZVE32F-NEXT: vse16.v v8, (a2)
1289 ; RV64ZVE32F-NEXT: .LBB20_2: # %else
1290 ; RV64ZVE32F-NEXT: andi a2, a1, 2
1291 ; RV64ZVE32F-NEXT: beqz a2, .LBB20_4
1292 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
1293 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1294 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1
1295 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
1296 ; RV64ZVE32F-NEXT: andi a2, a2, 255
1297 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1298 ; RV64ZVE32F-NEXT: add a2, a0, a2
1299 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1300 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
1301 ; RV64ZVE32F-NEXT: vse16.v v10, (a2)
1302 ; RV64ZVE32F-NEXT: .LBB20_4: # %else2
1303 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
1304 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4
1305 ; RV64ZVE32F-NEXT: andi a2, a1, 4
1306 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
1307 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2
1308 ; RV64ZVE32F-NEXT: bnez a2, .LBB20_12
1309 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
1310 ; RV64ZVE32F-NEXT: andi a2, a1, 8
1311 ; RV64ZVE32F-NEXT: bnez a2, .LBB20_13
1312 ; RV64ZVE32F-NEXT: .LBB20_6: # %else6
1313 ; RV64ZVE32F-NEXT: andi a2, a1, 16
1314 ; RV64ZVE32F-NEXT: bnez a2, .LBB20_14
1315 ; RV64ZVE32F-NEXT: .LBB20_7: # %else8
1316 ; RV64ZVE32F-NEXT: andi a2, a1, 32
1317 ; RV64ZVE32F-NEXT: beqz a2, .LBB20_9
1318 ; RV64ZVE32F-NEXT: .LBB20_8: # %cond.store9
1319 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1320 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 1
1321 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
1322 ; RV64ZVE32F-NEXT: andi a2, a2, 255
1323 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1324 ; RV64ZVE32F-NEXT: add a2, a0, a2
1325 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1326 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5
1327 ; RV64ZVE32F-NEXT: vse16.v v9, (a2)
1328 ; RV64ZVE32F-NEXT: .LBB20_9: # %else10
1329 ; RV64ZVE32F-NEXT: andi a2, a1, 64
1330 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
1331 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 2
1332 ; RV64ZVE32F-NEXT: bnez a2, .LBB20_15
1333 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
1334 ; RV64ZVE32F-NEXT: andi a1, a1, -128
1335 ; RV64ZVE32F-NEXT: bnez a1, .LBB20_16
1336 ; RV64ZVE32F-NEXT: .LBB20_11: # %else14
1337 ; RV64ZVE32F-NEXT: ret
1338 ; RV64ZVE32F-NEXT: .LBB20_12: # %cond.store3
1339 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
1340 ; RV64ZVE32F-NEXT: andi a2, a2, 255
1341 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1342 ; RV64ZVE32F-NEXT: add a2, a0, a2
1343 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1344 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2
1345 ; RV64ZVE32F-NEXT: vse16.v v11, (a2)
1346 ; RV64ZVE32F-NEXT: andi a2, a1, 8
1347 ; RV64ZVE32F-NEXT: beqz a2, .LBB20_6
1348 ; RV64ZVE32F-NEXT: .LBB20_13: # %cond.store5
1349 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1350 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
1351 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
1352 ; RV64ZVE32F-NEXT: andi a2, a2, 255
1353 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1354 ; RV64ZVE32F-NEXT: add a2, a0, a2
1355 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1356 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 3
1357 ; RV64ZVE32F-NEXT: vse16.v v9, (a2)
1358 ; RV64ZVE32F-NEXT: andi a2, a1, 16
1359 ; RV64ZVE32F-NEXT: beqz a2, .LBB20_7
1360 ; RV64ZVE32F-NEXT: .LBB20_14: # %cond.store7
1361 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
1362 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
1363 ; RV64ZVE32F-NEXT: andi a2, a2, 255
1364 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1365 ; RV64ZVE32F-NEXT: add a2, a0, a2
1366 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1367 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
1368 ; RV64ZVE32F-NEXT: vse16.v v9, (a2)
1369 ; RV64ZVE32F-NEXT: andi a2, a1, 32
1370 ; RV64ZVE32F-NEXT: bnez a2, .LBB20_8
1371 ; RV64ZVE32F-NEXT: j .LBB20_9
1372 ; RV64ZVE32F-NEXT: .LBB20_15: # %cond.store11
1373 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
1374 ; RV64ZVE32F-NEXT: andi a2, a2, 255
1375 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1376 ; RV64ZVE32F-NEXT: add a2, a0, a2
1377 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1378 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 6
1379 ; RV64ZVE32F-NEXT: vse16.v v10, (a2)
1380 ; RV64ZVE32F-NEXT: andi a1, a1, -128
1381 ; RV64ZVE32F-NEXT: beqz a1, .LBB20_11
1382 ; RV64ZVE32F-NEXT: .LBB20_16: # %cond.store13
1383 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1384 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
1385 ; RV64ZVE32F-NEXT: vmv.x.s a1, v9
1386 ; RV64ZVE32F-NEXT: andi a1, a1, 255
1387 ; RV64ZVE32F-NEXT: slli a1, a1, 1
1388 ; RV64ZVE32F-NEXT: add a0, a0, a1
1389 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1390 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
1391 ; RV64ZVE32F-NEXT: vse16.v v8, (a0)
1392 ; RV64ZVE32F-NEXT: ret
1393 %eidxs = zext <8 x i8> %idxs to <8 x i16>
1394 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i16> %eidxs
1395 call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> %m)
1399 define void @mscatter_baseidx_v8i16(<8 x i16> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m) {
1400 ; RV32-LABEL: mscatter_baseidx_v8i16:
1402 ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1403 ; RV32-NEXT: vwadd.vv v10, v9, v9
1404 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
1407 ; RV64V-LABEL: mscatter_baseidx_v8i16:
1409 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
1410 ; RV64V-NEXT: vsext.vf4 v12, v9
1411 ; RV64V-NEXT: vadd.vv v12, v12, v12
1412 ; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, ma
1413 ; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t
1416 ; RV64ZVE32F-LABEL: mscatter_baseidx_v8i16:
1417 ; RV64ZVE32F: # %bb.0:
1418 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
1419 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
1420 ; RV64ZVE32F-NEXT: andi a2, a1, 1
1421 ; RV64ZVE32F-NEXT: beqz a2, .LBB21_2
1422 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
1423 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1424 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
1425 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1426 ; RV64ZVE32F-NEXT: add a2, a0, a2
1427 ; RV64ZVE32F-NEXT: vse16.v v8, (a2)
1428 ; RV64ZVE32F-NEXT: .LBB21_2: # %else
1429 ; RV64ZVE32F-NEXT: andi a2, a1, 2
1430 ; RV64ZVE32F-NEXT: beqz a2, .LBB21_4
1431 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
1432 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
1433 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1
1434 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
1435 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1436 ; RV64ZVE32F-NEXT: add a2, a0, a2
1437 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1438 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
1439 ; RV64ZVE32F-NEXT: vse16.v v10, (a2)
1440 ; RV64ZVE32F-NEXT: .LBB21_4: # %else2
1441 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
1442 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4
1443 ; RV64ZVE32F-NEXT: andi a2, a1, 4
1444 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
1445 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2
1446 ; RV64ZVE32F-NEXT: bnez a2, .LBB21_12
1447 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
1448 ; RV64ZVE32F-NEXT: andi a2, a1, 8
1449 ; RV64ZVE32F-NEXT: bnez a2, .LBB21_13
1450 ; RV64ZVE32F-NEXT: .LBB21_6: # %else6
1451 ; RV64ZVE32F-NEXT: andi a2, a1, 16
1452 ; RV64ZVE32F-NEXT: bnez a2, .LBB21_14
1453 ; RV64ZVE32F-NEXT: .LBB21_7: # %else8
1454 ; RV64ZVE32F-NEXT: andi a2, a1, 32
1455 ; RV64ZVE32F-NEXT: beqz a2, .LBB21_9
1456 ; RV64ZVE32F-NEXT: .LBB21_8: # %cond.store9
1457 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
1458 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 1
1459 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
1460 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1461 ; RV64ZVE32F-NEXT: add a2, a0, a2
1462 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1463 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5
1464 ; RV64ZVE32F-NEXT: vse16.v v9, (a2)
1465 ; RV64ZVE32F-NEXT: .LBB21_9: # %else10
1466 ; RV64ZVE32F-NEXT: andi a2, a1, 64
1467 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
1468 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 2
1469 ; RV64ZVE32F-NEXT: bnez a2, .LBB21_15
1470 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
1471 ; RV64ZVE32F-NEXT: andi a1, a1, -128
1472 ; RV64ZVE32F-NEXT: bnez a1, .LBB21_16
1473 ; RV64ZVE32F-NEXT: .LBB21_11: # %else14
1474 ; RV64ZVE32F-NEXT: ret
1475 ; RV64ZVE32F-NEXT: .LBB21_12: # %cond.store3
1476 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
1477 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1478 ; RV64ZVE32F-NEXT: add a2, a0, a2
1479 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1480 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2
1481 ; RV64ZVE32F-NEXT: vse16.v v11, (a2)
1482 ; RV64ZVE32F-NEXT: andi a2, a1, 8
1483 ; RV64ZVE32F-NEXT: beqz a2, .LBB21_6
1484 ; RV64ZVE32F-NEXT: .LBB21_13: # %cond.store5
1485 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
1486 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
1487 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
1488 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1489 ; RV64ZVE32F-NEXT: add a2, a0, a2
1490 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1491 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 3
1492 ; RV64ZVE32F-NEXT: vse16.v v9, (a2)
1493 ; RV64ZVE32F-NEXT: andi a2, a1, 16
1494 ; RV64ZVE32F-NEXT: beqz a2, .LBB21_7
1495 ; RV64ZVE32F-NEXT: .LBB21_14: # %cond.store7
1496 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1497 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
1498 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1499 ; RV64ZVE32F-NEXT: add a2, a0, a2
1500 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
1501 ; RV64ZVE32F-NEXT: vse16.v v9, (a2)
1502 ; RV64ZVE32F-NEXT: andi a2, a1, 32
1503 ; RV64ZVE32F-NEXT: bnez a2, .LBB21_8
1504 ; RV64ZVE32F-NEXT: j .LBB21_9
1505 ; RV64ZVE32F-NEXT: .LBB21_15: # %cond.store11
1506 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
1507 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1508 ; RV64ZVE32F-NEXT: add a2, a0, a2
1509 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1510 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 6
1511 ; RV64ZVE32F-NEXT: vse16.v v10, (a2)
1512 ; RV64ZVE32F-NEXT: andi a1, a1, -128
1513 ; RV64ZVE32F-NEXT: beqz a1, .LBB21_11
1514 ; RV64ZVE32F-NEXT: .LBB21_16: # %cond.store13
1515 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
1516 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
1517 ; RV64ZVE32F-NEXT: vmv.x.s a1, v9
1518 ; RV64ZVE32F-NEXT: slli a1, a1, 1
1519 ; RV64ZVE32F-NEXT: add a0, a0, a1
1520 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1521 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
1522 ; RV64ZVE32F-NEXT: vse16.v v8, (a0)
1523 ; RV64ZVE32F-NEXT: ret
1524 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i16> %idxs
1525 call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> %m)
1529 declare void @llvm.masked.scatter.v1i32.v1p0(<1 x i32>, <1 x ptr>, i32, <1 x i1>)
1531 define void @mscatter_v1i32(<1 x i32> %val, <1 x ptr> %ptrs, <1 x i1> %m) {
1532 ; RV32V-LABEL: mscatter_v1i32:
1534 ; RV32V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
1535 ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t
1538 ; RV64V-LABEL: mscatter_v1i32:
1540 ; RV64V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
1541 ; RV64V-NEXT: vsoxei64.v v8, (zero), v9, v0.t
1544 ; RV32ZVE32F-LABEL: mscatter_v1i32:
1545 ; RV32ZVE32F: # %bb.0:
1546 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1547 ; RV32ZVE32F-NEXT: vsoxei32.v v8, (zero), v9, v0.t
1548 ; RV32ZVE32F-NEXT: ret
1550 ; RV64ZVE32F-LABEL: mscatter_v1i32:
1551 ; RV64ZVE32F: # %bb.0:
1552 ; RV64ZVE32F-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
1553 ; RV64ZVE32F-NEXT: vfirst.m a1, v0
1554 ; RV64ZVE32F-NEXT: bnez a1, .LBB22_2
1555 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
1556 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1557 ; RV64ZVE32F-NEXT: vse32.v v8, (a0)
1558 ; RV64ZVE32F-NEXT: .LBB22_2: # %else
1559 ; RV64ZVE32F-NEXT: ret
1560 call void @llvm.masked.scatter.v1i32.v1p0(<1 x i32> %val, <1 x ptr> %ptrs, i32 4, <1 x i1> %m)
1564 declare void @llvm.masked.scatter.v2i32.v2p0(<2 x i32>, <2 x ptr>, i32, <2 x i1>)
1566 define void @mscatter_v2i32(<2 x i32> %val, <2 x ptr> %ptrs, <2 x i1> %m) {
1567 ; RV32V-LABEL: mscatter_v2i32:
1569 ; RV32V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
1570 ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t
1573 ; RV64V-LABEL: mscatter_v2i32:
1575 ; RV64V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
1576 ; RV64V-NEXT: vsoxei64.v v8, (zero), v9, v0.t
1579 ; RV32ZVE32F-LABEL: mscatter_v2i32:
1580 ; RV32ZVE32F: # %bb.0:
1581 ; RV32ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
1582 ; RV32ZVE32F-NEXT: vsoxei32.v v8, (zero), v9, v0.t
1583 ; RV32ZVE32F-NEXT: ret
1585 ; RV64ZVE32F-LABEL: mscatter_v2i32:
1586 ; RV64ZVE32F: # %bb.0:
1587 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
1588 ; RV64ZVE32F-NEXT: vmv.x.s a2, v0
1589 ; RV64ZVE32F-NEXT: andi a3, a2, 1
1590 ; RV64ZVE32F-NEXT: bnez a3, .LBB23_3
1591 ; RV64ZVE32F-NEXT: # %bb.1: # %else
1592 ; RV64ZVE32F-NEXT: andi a2, a2, 2
1593 ; RV64ZVE32F-NEXT: bnez a2, .LBB23_4
1594 ; RV64ZVE32F-NEXT: .LBB23_2: # %else2
1595 ; RV64ZVE32F-NEXT: ret
1596 ; RV64ZVE32F-NEXT: .LBB23_3: # %cond.store
1597 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1598 ; RV64ZVE32F-NEXT: vse32.v v8, (a0)
1599 ; RV64ZVE32F-NEXT: andi a2, a2, 2
1600 ; RV64ZVE32F-NEXT: beqz a2, .LBB23_2
1601 ; RV64ZVE32F-NEXT: .LBB23_4: # %cond.store1
1602 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1603 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
1604 ; RV64ZVE32F-NEXT: vse32.v v8, (a1)
1605 ; RV64ZVE32F-NEXT: ret
1606 call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> %val, <2 x ptr> %ptrs, i32 4, <2 x i1> %m)
1610 define void @mscatter_v2i64_truncstore_v2i32(<2 x i64> %val, <2 x ptr> %ptrs, <2 x i1> %m) {
1611 ; RV32V-LABEL: mscatter_v2i64_truncstore_v2i32:
1613 ; RV32V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
1614 ; RV32V-NEXT: vnsrl.wi v8, v8, 0
1615 ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t
1618 ; RV64V-LABEL: mscatter_v2i64_truncstore_v2i32:
1620 ; RV64V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
1621 ; RV64V-NEXT: vnsrl.wi v8, v8, 0
1622 ; RV64V-NEXT: vsoxei64.v v8, (zero), v9, v0.t
1625 ; RV32ZVE32F-LABEL: mscatter_v2i64_truncstore_v2i32:
1626 ; RV32ZVE32F: # %bb.0:
1627 ; RV32ZVE32F-NEXT: lw a1, 0(a0)
1628 ; RV32ZVE32F-NEXT: lw a0, 8(a0)
1629 ; RV32ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
1630 ; RV32ZVE32F-NEXT: vmv.v.x v9, a1
1631 ; RV32ZVE32F-NEXT: vslide1down.vx v9, v9, a0
1632 ; RV32ZVE32F-NEXT: vsoxei32.v v9, (zero), v8, v0.t
1633 ; RV32ZVE32F-NEXT: ret
1635 ; RV64ZVE32F-LABEL: mscatter_v2i64_truncstore_v2i32:
1636 ; RV64ZVE32F: # %bb.0:
1637 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
1638 ; RV64ZVE32F-NEXT: vmv.v.x v8, a0
1639 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
1640 ; RV64ZVE32F-NEXT: vmv.x.s a0, v0
1641 ; RV64ZVE32F-NEXT: andi a4, a0, 1
1642 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
1643 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a1
1644 ; RV64ZVE32F-NEXT: bnez a4, .LBB24_3
1645 ; RV64ZVE32F-NEXT: # %bb.1: # %else
1646 ; RV64ZVE32F-NEXT: andi a0, a0, 2
1647 ; RV64ZVE32F-NEXT: bnez a0, .LBB24_4
1648 ; RV64ZVE32F-NEXT: .LBB24_2: # %else2
1649 ; RV64ZVE32F-NEXT: ret
1650 ; RV64ZVE32F-NEXT: .LBB24_3: # %cond.store
1651 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1652 ; RV64ZVE32F-NEXT: vse32.v v8, (a2)
1653 ; RV64ZVE32F-NEXT: andi a0, a0, 2
1654 ; RV64ZVE32F-NEXT: beqz a0, .LBB24_2
1655 ; RV64ZVE32F-NEXT: .LBB24_4: # %cond.store1
1656 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1657 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
1658 ; RV64ZVE32F-NEXT: vse32.v v8, (a3)
1659 ; RV64ZVE32F-NEXT: ret
1660 %tval = trunc <2 x i64> %val to <2 x i32>
1661 call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> %tval, <2 x ptr> %ptrs, i32 4, <2 x i1> %m)
1665 declare void @llvm.masked.scatter.v4i32.v4p0(<4 x i32>, <4 x ptr>, i32, <4 x i1>)
1667 define void @mscatter_v4i32(<4 x i32> %val, <4 x ptr> %ptrs, <4 x i1> %m) {
1668 ; RV32-LABEL: mscatter_v4i32:
1670 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1671 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
1674 ; RV64V-LABEL: mscatter_v4i32:
1676 ; RV64V-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1677 ; RV64V-NEXT: vsoxei64.v v8, (zero), v10, v0.t
1680 ; RV64ZVE32F-LABEL: mscatter_v4i32:
1681 ; RV64ZVE32F: # %bb.0:
1682 ; RV64ZVE32F-NEXT: ld a4, 8(a0)
1683 ; RV64ZVE32F-NEXT: ld a2, 16(a0)
1684 ; RV64ZVE32F-NEXT: ld a1, 24(a0)
1685 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
1686 ; RV64ZVE32F-NEXT: vmv.x.s a3, v0
1687 ; RV64ZVE32F-NEXT: andi a5, a3, 1
1688 ; RV64ZVE32F-NEXT: bnez a5, .LBB25_5
1689 ; RV64ZVE32F-NEXT: # %bb.1: # %else
1690 ; RV64ZVE32F-NEXT: andi a0, a3, 2
1691 ; RV64ZVE32F-NEXT: bnez a0, .LBB25_6
1692 ; RV64ZVE32F-NEXT: .LBB25_2: # %else2
1693 ; RV64ZVE32F-NEXT: andi a0, a3, 4
1694 ; RV64ZVE32F-NEXT: bnez a0, .LBB25_7
1695 ; RV64ZVE32F-NEXT: .LBB25_3: # %else4
1696 ; RV64ZVE32F-NEXT: andi a3, a3, 8
1697 ; RV64ZVE32F-NEXT: bnez a3, .LBB25_8
1698 ; RV64ZVE32F-NEXT: .LBB25_4: # %else6
1699 ; RV64ZVE32F-NEXT: ret
1700 ; RV64ZVE32F-NEXT: .LBB25_5: # %cond.store
1701 ; RV64ZVE32F-NEXT: ld a0, 0(a0)
1702 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1703 ; RV64ZVE32F-NEXT: vse32.v v8, (a0)
1704 ; RV64ZVE32F-NEXT: andi a0, a3, 2
1705 ; RV64ZVE32F-NEXT: beqz a0, .LBB25_2
1706 ; RV64ZVE32F-NEXT: .LBB25_6: # %cond.store1
1707 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1708 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
1709 ; RV64ZVE32F-NEXT: vse32.v v9, (a4)
1710 ; RV64ZVE32F-NEXT: andi a0, a3, 4
1711 ; RV64ZVE32F-NEXT: beqz a0, .LBB25_3
1712 ; RV64ZVE32F-NEXT: .LBB25_7: # %cond.store3
1713 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1714 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
1715 ; RV64ZVE32F-NEXT: vse32.v v9, (a2)
1716 ; RV64ZVE32F-NEXT: andi a3, a3, 8
1717 ; RV64ZVE32F-NEXT: beqz a3, .LBB25_4
1718 ; RV64ZVE32F-NEXT: .LBB25_8: # %cond.store5
1719 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1720 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 3
1721 ; RV64ZVE32F-NEXT: vse32.v v8, (a1)
1722 ; RV64ZVE32F-NEXT: ret
1723 call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %val, <4 x ptr> %ptrs, i32 4, <4 x i1> %m)
1727 define void @mscatter_truemask_v4i32(<4 x i32> %val, <4 x ptr> %ptrs) {
1728 ; RV32-LABEL: mscatter_truemask_v4i32:
1730 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1731 ; RV32-NEXT: vsoxei32.v v8, (zero), v9
1734 ; RV64V-LABEL: mscatter_truemask_v4i32:
1736 ; RV64V-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1737 ; RV64V-NEXT: vsoxei64.v v8, (zero), v10
1740 ; RV64ZVE32F-LABEL: mscatter_truemask_v4i32:
1741 ; RV64ZVE32F: # %bb.0:
1742 ; RV64ZVE32F-NEXT: ld a1, 0(a0)
1743 ; RV64ZVE32F-NEXT: ld a2, 8(a0)
1744 ; RV64ZVE32F-NEXT: ld a3, 16(a0)
1745 ; RV64ZVE32F-NEXT: ld a0, 24(a0)
1746 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1747 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
1748 ; RV64ZVE32F-NEXT: vse32.v v8, (a1)
1749 ; RV64ZVE32F-NEXT: vse32.v v9, (a2)
1750 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
1751 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 3
1752 ; RV64ZVE32F-NEXT: vse32.v v9, (a3)
1753 ; RV64ZVE32F-NEXT: vse32.v v8, (a0)
1754 ; RV64ZVE32F-NEXT: ret
1755 call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %val, <4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 1))
1759 define void @mscatter_falsemask_v4i32(<4 x i32> %val, <4 x ptr> %ptrs) {
1760 ; CHECK-LABEL: mscatter_falsemask_v4i32:
1763 call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %val, <4 x ptr> %ptrs, i32 4, <4 x i1> zeroinitializer)
1767 declare void @llvm.masked.scatter.v8i32.v8p0(<8 x i32>, <8 x ptr>, i32, <8 x i1>)
1769 define void @mscatter_v8i32(<8 x i32> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
1770 ; RV32-LABEL: mscatter_v8i32:
1772 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1773 ; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t
1776 ; RV64V-LABEL: mscatter_v8i32:
1778 ; RV64V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1779 ; RV64V-NEXT: vsoxei64.v v8, (zero), v12, v0.t
1782 ; RV64ZVE32F-LABEL: mscatter_v8i32:
1783 ; RV64ZVE32F: # %bb.0:
1784 ; RV64ZVE32F-NEXT: ld a3, 40(a0)
1785 ; RV64ZVE32F-NEXT: ld a2, 48(a0)
1786 ; RV64ZVE32F-NEXT: ld a1, 56(a0)
1787 ; RV64ZVE32F-NEXT: ld t0, 8(a0)
1788 ; RV64ZVE32F-NEXT: ld a7, 16(a0)
1789 ; RV64ZVE32F-NEXT: ld a6, 24(a0)
1790 ; RV64ZVE32F-NEXT: ld a5, 32(a0)
1791 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
1792 ; RV64ZVE32F-NEXT: vmv.x.s a4, v0
1793 ; RV64ZVE32F-NEXT: andi t1, a4, 1
1794 ; RV64ZVE32F-NEXT: bnez t1, .LBB28_9
1795 ; RV64ZVE32F-NEXT: # %bb.1: # %else
1796 ; RV64ZVE32F-NEXT: andi a0, a4, 2
1797 ; RV64ZVE32F-NEXT: bnez a0, .LBB28_10
1798 ; RV64ZVE32F-NEXT: .LBB28_2: # %else2
1799 ; RV64ZVE32F-NEXT: andi a0, a4, 4
1800 ; RV64ZVE32F-NEXT: bnez a0, .LBB28_11
1801 ; RV64ZVE32F-NEXT: .LBB28_3: # %else4
1802 ; RV64ZVE32F-NEXT: andi a0, a4, 8
1803 ; RV64ZVE32F-NEXT: bnez a0, .LBB28_12
1804 ; RV64ZVE32F-NEXT: .LBB28_4: # %else6
1805 ; RV64ZVE32F-NEXT: andi a0, a4, 16
1806 ; RV64ZVE32F-NEXT: bnez a0, .LBB28_13
1807 ; RV64ZVE32F-NEXT: .LBB28_5: # %else8
1808 ; RV64ZVE32F-NEXT: andi a0, a4, 32
1809 ; RV64ZVE32F-NEXT: bnez a0, .LBB28_14
1810 ; RV64ZVE32F-NEXT: .LBB28_6: # %else10
1811 ; RV64ZVE32F-NEXT: andi a0, a4, 64
1812 ; RV64ZVE32F-NEXT: bnez a0, .LBB28_15
1813 ; RV64ZVE32F-NEXT: .LBB28_7: # %else12
1814 ; RV64ZVE32F-NEXT: andi a0, a4, -128
1815 ; RV64ZVE32F-NEXT: bnez a0, .LBB28_16
1816 ; RV64ZVE32F-NEXT: .LBB28_8: # %else14
1817 ; RV64ZVE32F-NEXT: ret
1818 ; RV64ZVE32F-NEXT: .LBB28_9: # %cond.store
1819 ; RV64ZVE32F-NEXT: ld a0, 0(a0)
1820 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1821 ; RV64ZVE32F-NEXT: vse32.v v8, (a0)
1822 ; RV64ZVE32F-NEXT: andi a0, a4, 2
1823 ; RV64ZVE32F-NEXT: beqz a0, .LBB28_2
1824 ; RV64ZVE32F-NEXT: .LBB28_10: # %cond.store1
1825 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1826 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
1827 ; RV64ZVE32F-NEXT: vse32.v v10, (t0)
1828 ; RV64ZVE32F-NEXT: andi a0, a4, 4
1829 ; RV64ZVE32F-NEXT: beqz a0, .LBB28_3
1830 ; RV64ZVE32F-NEXT: .LBB28_11: # %cond.store3
1831 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1832 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 2
1833 ; RV64ZVE32F-NEXT: vse32.v v10, (a7)
1834 ; RV64ZVE32F-NEXT: andi a0, a4, 8
1835 ; RV64ZVE32F-NEXT: beqz a0, .LBB28_4
1836 ; RV64ZVE32F-NEXT: .LBB28_12: # %cond.store5
1837 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1838 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
1839 ; RV64ZVE32F-NEXT: vse32.v v10, (a6)
1840 ; RV64ZVE32F-NEXT: andi a0, a4, 16
1841 ; RV64ZVE32F-NEXT: beqz a0, .LBB28_5
1842 ; RV64ZVE32F-NEXT: .LBB28_13: # %cond.store7
1843 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
1844 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
1845 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1846 ; RV64ZVE32F-NEXT: vse32.v v10, (a5)
1847 ; RV64ZVE32F-NEXT: andi a0, a4, 32
1848 ; RV64ZVE32F-NEXT: beqz a0, .LBB28_6
1849 ; RV64ZVE32F-NEXT: .LBB28_14: # %cond.store9
1850 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
1851 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 5
1852 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1853 ; RV64ZVE32F-NEXT: vse32.v v10, (a3)
1854 ; RV64ZVE32F-NEXT: andi a0, a4, 64
1855 ; RV64ZVE32F-NEXT: beqz a0, .LBB28_7
1856 ; RV64ZVE32F-NEXT: .LBB28_15: # %cond.store11
1857 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
1858 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 6
1859 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1860 ; RV64ZVE32F-NEXT: vse32.v v10, (a2)
1861 ; RV64ZVE32F-NEXT: andi a0, a4, -128
1862 ; RV64ZVE32F-NEXT: beqz a0, .LBB28_8
1863 ; RV64ZVE32F-NEXT: .LBB28_16: # %cond.store13
1864 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
1865 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
1866 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1867 ; RV64ZVE32F-NEXT: vse32.v v8, (a1)
1868 ; RV64ZVE32F-NEXT: ret
1869 call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %val, <8 x ptr> %ptrs, i32 4, <8 x i1> %m)
1873 define void @mscatter_baseidx_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) {
1874 ; RV32-LABEL: mscatter_baseidx_v8i8_v8i32:
1876 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1877 ; RV32-NEXT: vsext.vf4 v12, v10
1878 ; RV32-NEXT: vsll.vi v10, v12, 2
1879 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
1882 ; RV64V-LABEL: mscatter_baseidx_v8i8_v8i32:
1884 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
1885 ; RV64V-NEXT: vsext.vf8 v12, v10
1886 ; RV64V-NEXT: vsll.vi v12, v12, 2
1887 ; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, ma
1888 ; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t
1891 ; RV64ZVE32F-LABEL: mscatter_baseidx_v8i8_v8i32:
1892 ; RV64ZVE32F: # %bb.0:
1893 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
1894 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
1895 ; RV64ZVE32F-NEXT: andi a2, a1, 1
1896 ; RV64ZVE32F-NEXT: beqz a2, .LBB29_2
1897 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
1898 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
1899 ; RV64ZVE32F-NEXT: slli a2, a2, 2
1900 ; RV64ZVE32F-NEXT: add a2, a0, a2
1901 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1902 ; RV64ZVE32F-NEXT: vse32.v v8, (a2)
1903 ; RV64ZVE32F-NEXT: .LBB29_2: # %else
1904 ; RV64ZVE32F-NEXT: andi a2, a1, 2
1905 ; RV64ZVE32F-NEXT: beqz a2, .LBB29_4
1906 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
1907 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1908 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1
1909 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
1910 ; RV64ZVE32F-NEXT: slli a2, a2, 2
1911 ; RV64ZVE32F-NEXT: add a2, a0, a2
1912 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
1913 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 1
1914 ; RV64ZVE32F-NEXT: vse32.v v11, (a2)
1915 ; RV64ZVE32F-NEXT: .LBB29_4: # %else2
1916 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
1917 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4
1918 ; RV64ZVE32F-NEXT: andi a2, a1, 4
1919 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
1920 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
1921 ; RV64ZVE32F-NEXT: bnez a2, .LBB29_12
1922 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
1923 ; RV64ZVE32F-NEXT: andi a2, a1, 8
1924 ; RV64ZVE32F-NEXT: bnez a2, .LBB29_13
1925 ; RV64ZVE32F-NEXT: .LBB29_6: # %else6
1926 ; RV64ZVE32F-NEXT: andi a2, a1, 16
1927 ; RV64ZVE32F-NEXT: bnez a2, .LBB29_14
1928 ; RV64ZVE32F-NEXT: .LBB29_7: # %else8
1929 ; RV64ZVE32F-NEXT: andi a2, a1, 32
1930 ; RV64ZVE32F-NEXT: beqz a2, .LBB29_9
1931 ; RV64ZVE32F-NEXT: .LBB29_8: # %cond.store9
1932 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1933 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 1
1934 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
1935 ; RV64ZVE32F-NEXT: slli a2, a2, 2
1936 ; RV64ZVE32F-NEXT: add a2, a0, a2
1937 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
1938 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5
1939 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1940 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
1941 ; RV64ZVE32F-NEXT: .LBB29_9: # %else10
1942 ; RV64ZVE32F-NEXT: andi a2, a1, 64
1943 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
1944 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2
1945 ; RV64ZVE32F-NEXT: bnez a2, .LBB29_15
1946 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
1947 ; RV64ZVE32F-NEXT: andi a1, a1, -128
1948 ; RV64ZVE32F-NEXT: bnez a1, .LBB29_16
1949 ; RV64ZVE32F-NEXT: .LBB29_11: # %else14
1950 ; RV64ZVE32F-NEXT: ret
1951 ; RV64ZVE32F-NEXT: .LBB29_12: # %cond.store3
1952 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
1953 ; RV64ZVE32F-NEXT: slli a2, a2, 2
1954 ; RV64ZVE32F-NEXT: add a2, a0, a2
1955 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
1956 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2
1957 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1958 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
1959 ; RV64ZVE32F-NEXT: andi a2, a1, 8
1960 ; RV64ZVE32F-NEXT: beqz a2, .LBB29_6
1961 ; RV64ZVE32F-NEXT: .LBB29_13: # %cond.store5
1962 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1963 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
1964 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
1965 ; RV64ZVE32F-NEXT: slli a2, a2, 2
1966 ; RV64ZVE32F-NEXT: add a2, a0, a2
1967 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
1968 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
1969 ; RV64ZVE32F-NEXT: vse32.v v10, (a2)
1970 ; RV64ZVE32F-NEXT: andi a2, a1, 16
1971 ; RV64ZVE32F-NEXT: beqz a2, .LBB29_7
1972 ; RV64ZVE32F-NEXT: .LBB29_14: # %cond.store7
1973 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
1974 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
1975 ; RV64ZVE32F-NEXT: slli a2, a2, 2
1976 ; RV64ZVE32F-NEXT: add a2, a0, a2
1977 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
1978 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
1979 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1980 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
1981 ; RV64ZVE32F-NEXT: andi a2, a1, 32
1982 ; RV64ZVE32F-NEXT: bnez a2, .LBB29_8
1983 ; RV64ZVE32F-NEXT: j .LBB29_9
1984 ; RV64ZVE32F-NEXT: .LBB29_15: # %cond.store11
1985 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
1986 ; RV64ZVE32F-NEXT: slli a2, a2, 2
1987 ; RV64ZVE32F-NEXT: add a2, a0, a2
1988 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
1989 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6
1990 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1991 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
1992 ; RV64ZVE32F-NEXT: andi a1, a1, -128
1993 ; RV64ZVE32F-NEXT: beqz a1, .LBB29_11
1994 ; RV64ZVE32F-NEXT: .LBB29_16: # %cond.store13
1995 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1996 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
1997 ; RV64ZVE32F-NEXT: vmv.x.s a1, v10
1998 ; RV64ZVE32F-NEXT: slli a1, a1, 2
1999 ; RV64ZVE32F-NEXT: add a0, a0, a1
2000 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
2001 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
2002 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2003 ; RV64ZVE32F-NEXT: vse32.v v8, (a0)
2004 ; RV64ZVE32F-NEXT: ret
2005 %ptrs = getelementptr inbounds i32, ptr %base, <8 x i8> %idxs
2006 call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %val, <8 x ptr> %ptrs, i32 4, <8 x i1> %m)
2010 define void @mscatter_baseidx_sext_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) {
2011 ; RV32-LABEL: mscatter_baseidx_sext_v8i8_v8i32:
2013 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
2014 ; RV32-NEXT: vsext.vf4 v12, v10
2015 ; RV32-NEXT: vsll.vi v10, v12, 2
2016 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
2019 ; RV64V-LABEL: mscatter_baseidx_sext_v8i8_v8i32:
2021 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
2022 ; RV64V-NEXT: vsext.vf8 v12, v10
2023 ; RV64V-NEXT: vsll.vi v12, v12, 2
2024 ; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, ma
2025 ; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t
2028 ; RV64ZVE32F-LABEL: mscatter_baseidx_sext_v8i8_v8i32:
2029 ; RV64ZVE32F: # %bb.0:
2030 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
2031 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
2032 ; RV64ZVE32F-NEXT: andi a2, a1, 1
2033 ; RV64ZVE32F-NEXT: beqz a2, .LBB30_2
2034 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
2035 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2036 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2037 ; RV64ZVE32F-NEXT: add a2, a0, a2
2038 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2039 ; RV64ZVE32F-NEXT: vse32.v v8, (a2)
2040 ; RV64ZVE32F-NEXT: .LBB30_2: # %else
2041 ; RV64ZVE32F-NEXT: andi a2, a1, 2
2042 ; RV64ZVE32F-NEXT: beqz a2, .LBB30_4
2043 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
2044 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
2045 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1
2046 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
2047 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2048 ; RV64ZVE32F-NEXT: add a2, a0, a2
2049 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2050 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 1
2051 ; RV64ZVE32F-NEXT: vse32.v v11, (a2)
2052 ; RV64ZVE32F-NEXT: .LBB30_4: # %else2
2053 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
2054 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4
2055 ; RV64ZVE32F-NEXT: andi a2, a1, 4
2056 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
2057 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
2058 ; RV64ZVE32F-NEXT: bnez a2, .LBB30_12
2059 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
2060 ; RV64ZVE32F-NEXT: andi a2, a1, 8
2061 ; RV64ZVE32F-NEXT: bnez a2, .LBB30_13
2062 ; RV64ZVE32F-NEXT: .LBB30_6: # %else6
2063 ; RV64ZVE32F-NEXT: andi a2, a1, 16
2064 ; RV64ZVE32F-NEXT: bnez a2, .LBB30_14
2065 ; RV64ZVE32F-NEXT: .LBB30_7: # %else8
2066 ; RV64ZVE32F-NEXT: andi a2, a1, 32
2067 ; RV64ZVE32F-NEXT: beqz a2, .LBB30_9
2068 ; RV64ZVE32F-NEXT: .LBB30_8: # %cond.store9
2069 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
2070 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 1
2071 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2072 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2073 ; RV64ZVE32F-NEXT: add a2, a0, a2
2074 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
2075 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5
2076 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2077 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
2078 ; RV64ZVE32F-NEXT: .LBB30_9: # %else10
2079 ; RV64ZVE32F-NEXT: andi a2, a1, 64
2080 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
2081 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2
2082 ; RV64ZVE32F-NEXT: bnez a2, .LBB30_15
2083 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
2084 ; RV64ZVE32F-NEXT: andi a1, a1, -128
2085 ; RV64ZVE32F-NEXT: bnez a1, .LBB30_16
2086 ; RV64ZVE32F-NEXT: .LBB30_11: # %else14
2087 ; RV64ZVE32F-NEXT: ret
2088 ; RV64ZVE32F-NEXT: .LBB30_12: # %cond.store3
2089 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2090 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2091 ; RV64ZVE32F-NEXT: add a2, a0, a2
2092 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2093 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2
2094 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2095 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
2096 ; RV64ZVE32F-NEXT: andi a2, a1, 8
2097 ; RV64ZVE32F-NEXT: beqz a2, .LBB30_6
2098 ; RV64ZVE32F-NEXT: .LBB30_13: # %cond.store5
2099 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
2100 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
2101 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2102 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2103 ; RV64ZVE32F-NEXT: add a2, a0, a2
2104 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2105 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
2106 ; RV64ZVE32F-NEXT: vse32.v v10, (a2)
2107 ; RV64ZVE32F-NEXT: andi a2, a1, 16
2108 ; RV64ZVE32F-NEXT: beqz a2, .LBB30_7
2109 ; RV64ZVE32F-NEXT: .LBB30_14: # %cond.store7
2110 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
2111 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
2112 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2113 ; RV64ZVE32F-NEXT: add a2, a0, a2
2114 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
2115 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
2116 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2117 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
2118 ; RV64ZVE32F-NEXT: andi a2, a1, 32
2119 ; RV64ZVE32F-NEXT: bnez a2, .LBB30_8
2120 ; RV64ZVE32F-NEXT: j .LBB30_9
2121 ; RV64ZVE32F-NEXT: .LBB30_15: # %cond.store11
2122 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2123 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2124 ; RV64ZVE32F-NEXT: add a2, a0, a2
2125 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
2126 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6
2127 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2128 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
2129 ; RV64ZVE32F-NEXT: andi a1, a1, -128
2130 ; RV64ZVE32F-NEXT: beqz a1, .LBB30_11
2131 ; RV64ZVE32F-NEXT: .LBB30_16: # %cond.store13
2132 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
2133 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
2134 ; RV64ZVE32F-NEXT: vmv.x.s a1, v10
2135 ; RV64ZVE32F-NEXT: slli a1, a1, 2
2136 ; RV64ZVE32F-NEXT: add a0, a0, a1
2137 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
2138 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
2139 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2140 ; RV64ZVE32F-NEXT: vse32.v v8, (a0)
2141 ; RV64ZVE32F-NEXT: ret
2142 %eidxs = sext <8 x i8> %idxs to <8 x i32>
2143 %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %eidxs
2144 call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %val, <8 x ptr> %ptrs, i32 4, <8 x i1> %m)
2148 define void @mscatter_baseidx_zext_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) {
2149 ; RV32-LABEL: mscatter_baseidx_zext_v8i8_v8i32:
2151 ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
2152 ; RV32-NEXT: vzext.vf2 v11, v10
2153 ; RV32-NEXT: vsll.vi v10, v11, 2
2154 ; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
2155 ; RV32-NEXT: vsoxei16.v v8, (a0), v10, v0.t
2158 ; RV64V-LABEL: mscatter_baseidx_zext_v8i8_v8i32:
2160 ; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
2161 ; RV64V-NEXT: vzext.vf2 v11, v10
2162 ; RV64V-NEXT: vsll.vi v10, v11, 2
2163 ; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, ma
2164 ; RV64V-NEXT: vsoxei16.v v8, (a0), v10, v0.t
2167 ; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i8_v8i32:
2168 ; RV64ZVE32F: # %bb.0:
2169 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
2170 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
2171 ; RV64ZVE32F-NEXT: andi a2, a1, 1
2172 ; RV64ZVE32F-NEXT: beqz a2, .LBB31_2
2173 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
2174 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2175 ; RV64ZVE32F-NEXT: andi a2, a2, 255
2176 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2177 ; RV64ZVE32F-NEXT: add a2, a0, a2
2178 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2179 ; RV64ZVE32F-NEXT: vse32.v v8, (a2)
2180 ; RV64ZVE32F-NEXT: .LBB31_2: # %else
2181 ; RV64ZVE32F-NEXT: andi a2, a1, 2
2182 ; RV64ZVE32F-NEXT: beqz a2, .LBB31_4
2183 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
2184 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
2185 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1
2186 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
2187 ; RV64ZVE32F-NEXT: andi a2, a2, 255
2188 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2189 ; RV64ZVE32F-NEXT: add a2, a0, a2
2190 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2191 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 1
2192 ; RV64ZVE32F-NEXT: vse32.v v11, (a2)
2193 ; RV64ZVE32F-NEXT: .LBB31_4: # %else2
2194 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
2195 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4
2196 ; RV64ZVE32F-NEXT: andi a2, a1, 4
2197 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
2198 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
2199 ; RV64ZVE32F-NEXT: bnez a2, .LBB31_12
2200 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
2201 ; RV64ZVE32F-NEXT: andi a2, a1, 8
2202 ; RV64ZVE32F-NEXT: bnez a2, .LBB31_13
2203 ; RV64ZVE32F-NEXT: .LBB31_6: # %else6
2204 ; RV64ZVE32F-NEXT: andi a2, a1, 16
2205 ; RV64ZVE32F-NEXT: bnez a2, .LBB31_14
2206 ; RV64ZVE32F-NEXT: .LBB31_7: # %else8
2207 ; RV64ZVE32F-NEXT: andi a2, a1, 32
2208 ; RV64ZVE32F-NEXT: beqz a2, .LBB31_9
2209 ; RV64ZVE32F-NEXT: .LBB31_8: # %cond.store9
2210 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
2211 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 1
2212 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2213 ; RV64ZVE32F-NEXT: andi a2, a2, 255
2214 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2215 ; RV64ZVE32F-NEXT: add a2, a0, a2
2216 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
2217 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5
2218 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2219 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
2220 ; RV64ZVE32F-NEXT: .LBB31_9: # %else10
2221 ; RV64ZVE32F-NEXT: andi a2, a1, 64
2222 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
2223 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2
2224 ; RV64ZVE32F-NEXT: bnez a2, .LBB31_15
2225 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
2226 ; RV64ZVE32F-NEXT: andi a1, a1, -128
2227 ; RV64ZVE32F-NEXT: bnez a1, .LBB31_16
2228 ; RV64ZVE32F-NEXT: .LBB31_11: # %else14
2229 ; RV64ZVE32F-NEXT: ret
2230 ; RV64ZVE32F-NEXT: .LBB31_12: # %cond.store3
2231 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2232 ; RV64ZVE32F-NEXT: andi a2, a2, 255
2233 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2234 ; RV64ZVE32F-NEXT: add a2, a0, a2
2235 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2236 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2
2237 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2238 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
2239 ; RV64ZVE32F-NEXT: andi a2, a1, 8
2240 ; RV64ZVE32F-NEXT: beqz a2, .LBB31_6
2241 ; RV64ZVE32F-NEXT: .LBB31_13: # %cond.store5
2242 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
2243 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
2244 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2245 ; RV64ZVE32F-NEXT: andi a2, a2, 255
2246 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2247 ; RV64ZVE32F-NEXT: add a2, a0, a2
2248 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2249 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
2250 ; RV64ZVE32F-NEXT: vse32.v v10, (a2)
2251 ; RV64ZVE32F-NEXT: andi a2, a1, 16
2252 ; RV64ZVE32F-NEXT: beqz a2, .LBB31_7
2253 ; RV64ZVE32F-NEXT: .LBB31_14: # %cond.store7
2254 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
2255 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
2256 ; RV64ZVE32F-NEXT: andi a2, a2, 255
2257 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2258 ; RV64ZVE32F-NEXT: add a2, a0, a2
2259 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
2260 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
2261 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2262 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
2263 ; RV64ZVE32F-NEXT: andi a2, a1, 32
2264 ; RV64ZVE32F-NEXT: bnez a2, .LBB31_8
2265 ; RV64ZVE32F-NEXT: j .LBB31_9
2266 ; RV64ZVE32F-NEXT: .LBB31_15: # %cond.store11
2267 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2268 ; RV64ZVE32F-NEXT: andi a2, a2, 255
2269 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2270 ; RV64ZVE32F-NEXT: add a2, a0, a2
2271 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
2272 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6
2273 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2274 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
2275 ; RV64ZVE32F-NEXT: andi a1, a1, -128
2276 ; RV64ZVE32F-NEXT: beqz a1, .LBB31_11
2277 ; RV64ZVE32F-NEXT: .LBB31_16: # %cond.store13
2278 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
2279 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
2280 ; RV64ZVE32F-NEXT: vmv.x.s a1, v10
2281 ; RV64ZVE32F-NEXT: andi a1, a1, 255
2282 ; RV64ZVE32F-NEXT: slli a1, a1, 2
2283 ; RV64ZVE32F-NEXT: add a0, a0, a1
2284 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
2285 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
2286 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2287 ; RV64ZVE32F-NEXT: vse32.v v8, (a0)
2288 ; RV64ZVE32F-NEXT: ret
2289 %eidxs = zext <8 x i8> %idxs to <8 x i32>
2290 %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %eidxs
2291 call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %val, <8 x ptr> %ptrs, i32 4, <8 x i1> %m)
2295 define void @mscatter_baseidx_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m) {
2296 ; RV32-LABEL: mscatter_baseidx_v8i16_v8i32:
2298 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
2299 ; RV32-NEXT: vsext.vf2 v12, v10
2300 ; RV32-NEXT: vsll.vi v10, v12, 2
2301 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
2304 ; RV64V-LABEL: mscatter_baseidx_v8i16_v8i32:
2306 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
2307 ; RV64V-NEXT: vsext.vf4 v12, v10
2308 ; RV64V-NEXT: vsll.vi v12, v12, 2
2309 ; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, ma
2310 ; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t
2313 ; RV64ZVE32F-LABEL: mscatter_baseidx_v8i16_v8i32:
2314 ; RV64ZVE32F: # %bb.0:
2315 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
2316 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
2317 ; RV64ZVE32F-NEXT: andi a2, a1, 1
2318 ; RV64ZVE32F-NEXT: beqz a2, .LBB32_2
2319 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
2320 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
2321 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2322 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2323 ; RV64ZVE32F-NEXT: add a2, a0, a2
2324 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2325 ; RV64ZVE32F-NEXT: vse32.v v8, (a2)
2326 ; RV64ZVE32F-NEXT: .LBB32_2: # %else
2327 ; RV64ZVE32F-NEXT: andi a2, a1, 2
2328 ; RV64ZVE32F-NEXT: beqz a2, .LBB32_4
2329 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
2330 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
2331 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1
2332 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
2333 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2334 ; RV64ZVE32F-NEXT: add a2, a0, a2
2335 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2336 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 1
2337 ; RV64ZVE32F-NEXT: vse32.v v11, (a2)
2338 ; RV64ZVE32F-NEXT: .LBB32_4: # %else2
2339 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
2340 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4
2341 ; RV64ZVE32F-NEXT: andi a2, a1, 4
2342 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
2343 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
2344 ; RV64ZVE32F-NEXT: bnez a2, .LBB32_12
2345 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
2346 ; RV64ZVE32F-NEXT: andi a2, a1, 8
2347 ; RV64ZVE32F-NEXT: bnez a2, .LBB32_13
2348 ; RV64ZVE32F-NEXT: .LBB32_6: # %else6
2349 ; RV64ZVE32F-NEXT: andi a2, a1, 16
2350 ; RV64ZVE32F-NEXT: bnez a2, .LBB32_14
2351 ; RV64ZVE32F-NEXT: .LBB32_7: # %else8
2352 ; RV64ZVE32F-NEXT: andi a2, a1, 32
2353 ; RV64ZVE32F-NEXT: beqz a2, .LBB32_9
2354 ; RV64ZVE32F-NEXT: .LBB32_8: # %cond.store9
2355 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
2356 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 1
2357 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2358 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2359 ; RV64ZVE32F-NEXT: add a2, a0, a2
2360 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
2361 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5
2362 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2363 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
2364 ; RV64ZVE32F-NEXT: .LBB32_9: # %else10
2365 ; RV64ZVE32F-NEXT: andi a2, a1, 64
2366 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
2367 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2
2368 ; RV64ZVE32F-NEXT: bnez a2, .LBB32_15
2369 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
2370 ; RV64ZVE32F-NEXT: andi a1, a1, -128
2371 ; RV64ZVE32F-NEXT: bnez a1, .LBB32_16
2372 ; RV64ZVE32F-NEXT: .LBB32_11: # %else14
2373 ; RV64ZVE32F-NEXT: ret
2374 ; RV64ZVE32F-NEXT: .LBB32_12: # %cond.store3
2375 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2376 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2377 ; RV64ZVE32F-NEXT: add a2, a0, a2
2378 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2379 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2
2380 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2381 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
2382 ; RV64ZVE32F-NEXT: andi a2, a1, 8
2383 ; RV64ZVE32F-NEXT: beqz a2, .LBB32_6
2384 ; RV64ZVE32F-NEXT: .LBB32_13: # %cond.store5
2385 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
2386 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
2387 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2388 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2389 ; RV64ZVE32F-NEXT: add a2, a0, a2
2390 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2391 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
2392 ; RV64ZVE32F-NEXT: vse32.v v10, (a2)
2393 ; RV64ZVE32F-NEXT: andi a2, a1, 16
2394 ; RV64ZVE32F-NEXT: beqz a2, .LBB32_7
2395 ; RV64ZVE32F-NEXT: .LBB32_14: # %cond.store7
2396 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
2397 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
2398 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2399 ; RV64ZVE32F-NEXT: add a2, a0, a2
2400 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
2401 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
2402 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2403 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
2404 ; RV64ZVE32F-NEXT: andi a2, a1, 32
2405 ; RV64ZVE32F-NEXT: bnez a2, .LBB32_8
2406 ; RV64ZVE32F-NEXT: j .LBB32_9
2407 ; RV64ZVE32F-NEXT: .LBB32_15: # %cond.store11
2408 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2409 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2410 ; RV64ZVE32F-NEXT: add a2, a0, a2
2411 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
2412 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6
2413 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2414 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
2415 ; RV64ZVE32F-NEXT: andi a1, a1, -128
2416 ; RV64ZVE32F-NEXT: beqz a1, .LBB32_11
2417 ; RV64ZVE32F-NEXT: .LBB32_16: # %cond.store13
2418 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
2419 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
2420 ; RV64ZVE32F-NEXT: vmv.x.s a1, v10
2421 ; RV64ZVE32F-NEXT: slli a1, a1, 2
2422 ; RV64ZVE32F-NEXT: add a0, a0, a1
2423 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
2424 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
2425 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2426 ; RV64ZVE32F-NEXT: vse32.v v8, (a0)
2427 ; RV64ZVE32F-NEXT: ret
2428 %ptrs = getelementptr inbounds i32, ptr %base, <8 x i16> %idxs
2429 call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %val, <8 x ptr> %ptrs, i32 4, <8 x i1> %m)
2433 define void @mscatter_baseidx_sext_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m) {
2434 ; RV32-LABEL: mscatter_baseidx_sext_v8i16_v8i32:
2436 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
2437 ; RV32-NEXT: vsext.vf2 v12, v10
2438 ; RV32-NEXT: vsll.vi v10, v12, 2
2439 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
2442 ; RV64V-LABEL: mscatter_baseidx_sext_v8i16_v8i32:
2444 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
2445 ; RV64V-NEXT: vsext.vf4 v12, v10
2446 ; RV64V-NEXT: vsll.vi v12, v12, 2
2447 ; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, ma
2448 ; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t
2451 ; RV64ZVE32F-LABEL: mscatter_baseidx_sext_v8i16_v8i32:
2452 ; RV64ZVE32F: # %bb.0:
2453 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
2454 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
2455 ; RV64ZVE32F-NEXT: andi a2, a1, 1
2456 ; RV64ZVE32F-NEXT: beqz a2, .LBB33_2
2457 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
2458 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
2459 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2460 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2461 ; RV64ZVE32F-NEXT: add a2, a0, a2
2462 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2463 ; RV64ZVE32F-NEXT: vse32.v v8, (a2)
2464 ; RV64ZVE32F-NEXT: .LBB33_2: # %else
2465 ; RV64ZVE32F-NEXT: andi a2, a1, 2
2466 ; RV64ZVE32F-NEXT: beqz a2, .LBB33_4
2467 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
2468 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
2469 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1
2470 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
2471 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2472 ; RV64ZVE32F-NEXT: add a2, a0, a2
2473 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2474 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 1
2475 ; RV64ZVE32F-NEXT: vse32.v v11, (a2)
2476 ; RV64ZVE32F-NEXT: .LBB33_4: # %else2
2477 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
2478 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4
2479 ; RV64ZVE32F-NEXT: andi a2, a1, 4
2480 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
2481 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
2482 ; RV64ZVE32F-NEXT: bnez a2, .LBB33_12
2483 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
2484 ; RV64ZVE32F-NEXT: andi a2, a1, 8
2485 ; RV64ZVE32F-NEXT: bnez a2, .LBB33_13
2486 ; RV64ZVE32F-NEXT: .LBB33_6: # %else6
2487 ; RV64ZVE32F-NEXT: andi a2, a1, 16
2488 ; RV64ZVE32F-NEXT: bnez a2, .LBB33_14
2489 ; RV64ZVE32F-NEXT: .LBB33_7: # %else8
2490 ; RV64ZVE32F-NEXT: andi a2, a1, 32
2491 ; RV64ZVE32F-NEXT: beqz a2, .LBB33_9
2492 ; RV64ZVE32F-NEXT: .LBB33_8: # %cond.store9
2493 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
2494 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 1
2495 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2496 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2497 ; RV64ZVE32F-NEXT: add a2, a0, a2
2498 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
2499 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5
2500 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2501 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
2502 ; RV64ZVE32F-NEXT: .LBB33_9: # %else10
2503 ; RV64ZVE32F-NEXT: andi a2, a1, 64
2504 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
2505 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2
2506 ; RV64ZVE32F-NEXT: bnez a2, .LBB33_15
2507 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
2508 ; RV64ZVE32F-NEXT: andi a1, a1, -128
2509 ; RV64ZVE32F-NEXT: bnez a1, .LBB33_16
2510 ; RV64ZVE32F-NEXT: .LBB33_11: # %else14
2511 ; RV64ZVE32F-NEXT: ret
2512 ; RV64ZVE32F-NEXT: .LBB33_12: # %cond.store3
2513 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2514 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2515 ; RV64ZVE32F-NEXT: add a2, a0, a2
2516 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2517 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2
2518 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2519 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
2520 ; RV64ZVE32F-NEXT: andi a2, a1, 8
2521 ; RV64ZVE32F-NEXT: beqz a2, .LBB33_6
2522 ; RV64ZVE32F-NEXT: .LBB33_13: # %cond.store5
2523 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
2524 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
2525 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2526 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2527 ; RV64ZVE32F-NEXT: add a2, a0, a2
2528 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2529 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
2530 ; RV64ZVE32F-NEXT: vse32.v v10, (a2)
2531 ; RV64ZVE32F-NEXT: andi a2, a1, 16
2532 ; RV64ZVE32F-NEXT: beqz a2, .LBB33_7
2533 ; RV64ZVE32F-NEXT: .LBB33_14: # %cond.store7
2534 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
2535 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
2536 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2537 ; RV64ZVE32F-NEXT: add a2, a0, a2
2538 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
2539 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
2540 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2541 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
2542 ; RV64ZVE32F-NEXT: andi a2, a1, 32
2543 ; RV64ZVE32F-NEXT: bnez a2, .LBB33_8
2544 ; RV64ZVE32F-NEXT: j .LBB33_9
2545 ; RV64ZVE32F-NEXT: .LBB33_15: # %cond.store11
2546 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2547 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2548 ; RV64ZVE32F-NEXT: add a2, a0, a2
2549 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
2550 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6
2551 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2552 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
2553 ; RV64ZVE32F-NEXT: andi a1, a1, -128
2554 ; RV64ZVE32F-NEXT: beqz a1, .LBB33_11
2555 ; RV64ZVE32F-NEXT: .LBB33_16: # %cond.store13
2556 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
2557 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
2558 ; RV64ZVE32F-NEXT: vmv.x.s a1, v10
2559 ; RV64ZVE32F-NEXT: slli a1, a1, 2
2560 ; RV64ZVE32F-NEXT: add a0, a0, a1
2561 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
2562 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
2563 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2564 ; RV64ZVE32F-NEXT: vse32.v v8, (a0)
2565 ; RV64ZVE32F-NEXT: ret
2566 %eidxs = sext <8 x i16> %idxs to <8 x i32>
2567 %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %eidxs
2568 call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %val, <8 x ptr> %ptrs, i32 4, <8 x i1> %m)
2572 define void @mscatter_baseidx_zext_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m) {
2573 ; RV32-LABEL: mscatter_baseidx_zext_v8i16_v8i32:
2575 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
2576 ; RV32-NEXT: vzext.vf2 v12, v10
2577 ; RV32-NEXT: vsll.vi v10, v12, 2
2578 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
2581 ; RV64V-LABEL: mscatter_baseidx_zext_v8i16_v8i32:
2583 ; RV64V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
2584 ; RV64V-NEXT: vzext.vf2 v12, v10
2585 ; RV64V-NEXT: vsll.vi v10, v12, 2
2586 ; RV64V-NEXT: vsoxei32.v v8, (a0), v10, v0.t
2589 ; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i16_v8i32:
2590 ; RV64ZVE32F: # %bb.0:
2591 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
2592 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
2593 ; RV64ZVE32F-NEXT: andi a2, a1, 1
2594 ; RV64ZVE32F-NEXT: beqz a2, .LBB34_2
2595 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
2596 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
2597 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2598 ; RV64ZVE32F-NEXT: slli a2, a2, 48
2599 ; RV64ZVE32F-NEXT: srli a2, a2, 46
2600 ; RV64ZVE32F-NEXT: add a2, a0, a2
2601 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2602 ; RV64ZVE32F-NEXT: vse32.v v8, (a2)
2603 ; RV64ZVE32F-NEXT: .LBB34_2: # %else
2604 ; RV64ZVE32F-NEXT: andi a2, a1, 2
2605 ; RV64ZVE32F-NEXT: beqz a2, .LBB34_4
2606 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
2607 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
2608 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1
2609 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
2610 ; RV64ZVE32F-NEXT: slli a2, a2, 48
2611 ; RV64ZVE32F-NEXT: srli a2, a2, 46
2612 ; RV64ZVE32F-NEXT: add a2, a0, a2
2613 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2614 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 1
2615 ; RV64ZVE32F-NEXT: vse32.v v11, (a2)
2616 ; RV64ZVE32F-NEXT: .LBB34_4: # %else2
2617 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
2618 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4
2619 ; RV64ZVE32F-NEXT: andi a2, a1, 4
2620 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
2621 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
2622 ; RV64ZVE32F-NEXT: bnez a2, .LBB34_12
2623 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
2624 ; RV64ZVE32F-NEXT: andi a2, a1, 8
2625 ; RV64ZVE32F-NEXT: bnez a2, .LBB34_13
2626 ; RV64ZVE32F-NEXT: .LBB34_6: # %else6
2627 ; RV64ZVE32F-NEXT: andi a2, a1, 16
2628 ; RV64ZVE32F-NEXT: bnez a2, .LBB34_14
2629 ; RV64ZVE32F-NEXT: .LBB34_7: # %else8
2630 ; RV64ZVE32F-NEXT: andi a2, a1, 32
2631 ; RV64ZVE32F-NEXT: beqz a2, .LBB34_9
2632 ; RV64ZVE32F-NEXT: .LBB34_8: # %cond.store9
2633 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
2634 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 1
2635 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2636 ; RV64ZVE32F-NEXT: slli a2, a2, 48
2637 ; RV64ZVE32F-NEXT: srli a2, a2, 46
2638 ; RV64ZVE32F-NEXT: add a2, a0, a2
2639 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
2640 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5
2641 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2642 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
2643 ; RV64ZVE32F-NEXT: .LBB34_9: # %else10
2644 ; RV64ZVE32F-NEXT: andi a2, a1, 64
2645 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
2646 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2
2647 ; RV64ZVE32F-NEXT: bnez a2, .LBB34_15
2648 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
2649 ; RV64ZVE32F-NEXT: andi a1, a1, -128
2650 ; RV64ZVE32F-NEXT: bnez a1, .LBB34_16
2651 ; RV64ZVE32F-NEXT: .LBB34_11: # %else14
2652 ; RV64ZVE32F-NEXT: ret
2653 ; RV64ZVE32F-NEXT: .LBB34_12: # %cond.store3
2654 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2655 ; RV64ZVE32F-NEXT: slli a2, a2, 48
2656 ; RV64ZVE32F-NEXT: srli a2, a2, 46
2657 ; RV64ZVE32F-NEXT: add a2, a0, a2
2658 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2659 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2
2660 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2661 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
2662 ; RV64ZVE32F-NEXT: andi a2, a1, 8
2663 ; RV64ZVE32F-NEXT: beqz a2, .LBB34_6
2664 ; RV64ZVE32F-NEXT: .LBB34_13: # %cond.store5
2665 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
2666 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
2667 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2668 ; RV64ZVE32F-NEXT: slli a2, a2, 48
2669 ; RV64ZVE32F-NEXT: srli a2, a2, 46
2670 ; RV64ZVE32F-NEXT: add a2, a0, a2
2671 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2672 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
2673 ; RV64ZVE32F-NEXT: vse32.v v10, (a2)
2674 ; RV64ZVE32F-NEXT: andi a2, a1, 16
2675 ; RV64ZVE32F-NEXT: beqz a2, .LBB34_7
2676 ; RV64ZVE32F-NEXT: .LBB34_14: # %cond.store7
2677 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
2678 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
2679 ; RV64ZVE32F-NEXT: slli a2, a2, 48
2680 ; RV64ZVE32F-NEXT: srli a2, a2, 46
2681 ; RV64ZVE32F-NEXT: add a2, a0, a2
2682 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
2683 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
2684 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2685 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
2686 ; RV64ZVE32F-NEXT: andi a2, a1, 32
2687 ; RV64ZVE32F-NEXT: bnez a2, .LBB34_8
2688 ; RV64ZVE32F-NEXT: j .LBB34_9
2689 ; RV64ZVE32F-NEXT: .LBB34_15: # %cond.store11
2690 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2691 ; RV64ZVE32F-NEXT: slli a2, a2, 48
2692 ; RV64ZVE32F-NEXT: srli a2, a2, 46
2693 ; RV64ZVE32F-NEXT: add a2, a0, a2
2694 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
2695 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6
2696 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2697 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
2698 ; RV64ZVE32F-NEXT: andi a1, a1, -128
2699 ; RV64ZVE32F-NEXT: beqz a1, .LBB34_11
2700 ; RV64ZVE32F-NEXT: .LBB34_16: # %cond.store13
2701 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
2702 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
2703 ; RV64ZVE32F-NEXT: vmv.x.s a1, v10
2704 ; RV64ZVE32F-NEXT: slli a1, a1, 48
2705 ; RV64ZVE32F-NEXT: srli a1, a1, 46
2706 ; RV64ZVE32F-NEXT: add a0, a0, a1
2707 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
2708 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
2709 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2710 ; RV64ZVE32F-NEXT: vse32.v v8, (a0)
2711 ; RV64ZVE32F-NEXT: ret
2712 %eidxs = zext <8 x i16> %idxs to <8 x i32>
2713 %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %eidxs
2714 call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %val, <8 x ptr> %ptrs, i32 4, <8 x i1> %m)
2718 define void @mscatter_baseidx_v8i32(<8 x i32> %val, ptr %base, <8 x i32> %idxs, <8 x i1> %m) {
2719 ; RV32-LABEL: mscatter_baseidx_v8i32:
2721 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
2722 ; RV32-NEXT: vsll.vi v10, v10, 2
2723 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
2726 ; RV64V-LABEL: mscatter_baseidx_v8i32:
2728 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
2729 ; RV64V-NEXT: vsext.vf2 v12, v10
2730 ; RV64V-NEXT: vsll.vi v12, v12, 2
2731 ; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, ma
2732 ; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t
2735 ; RV64ZVE32F-LABEL: mscatter_baseidx_v8i32:
2736 ; RV64ZVE32F: # %bb.0:
2737 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
2738 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
2739 ; RV64ZVE32F-NEXT: andi a2, a1, 1
2740 ; RV64ZVE32F-NEXT: beqz a2, .LBB35_2
2741 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
2742 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2743 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2744 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2745 ; RV64ZVE32F-NEXT: add a2, a0, a2
2746 ; RV64ZVE32F-NEXT: vse32.v v8, (a2)
2747 ; RV64ZVE32F-NEXT: .LBB35_2: # %else
2748 ; RV64ZVE32F-NEXT: andi a2, a1, 2
2749 ; RV64ZVE32F-NEXT: beqz a2, .LBB35_4
2750 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
2751 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2752 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v10, 1
2753 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12
2754 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2755 ; RV64ZVE32F-NEXT: add a2, a0, a2
2756 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 1
2757 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
2758 ; RV64ZVE32F-NEXT: .LBB35_4: # %else2
2759 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
2760 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v10, 4
2761 ; RV64ZVE32F-NEXT: andi a2, a1, 4
2762 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
2763 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
2764 ; RV64ZVE32F-NEXT: bnez a2, .LBB35_12
2765 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
2766 ; RV64ZVE32F-NEXT: andi a2, a1, 8
2767 ; RV64ZVE32F-NEXT: bnez a2, .LBB35_13
2768 ; RV64ZVE32F-NEXT: .LBB35_6: # %else6
2769 ; RV64ZVE32F-NEXT: andi a2, a1, 16
2770 ; RV64ZVE32F-NEXT: bnez a2, .LBB35_14
2771 ; RV64ZVE32F-NEXT: .LBB35_7: # %else8
2772 ; RV64ZVE32F-NEXT: andi a2, a1, 32
2773 ; RV64ZVE32F-NEXT: beqz a2, .LBB35_9
2774 ; RV64ZVE32F-NEXT: .LBB35_8: # %cond.store9
2775 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2776 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v12, 1
2777 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2778 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2779 ; RV64ZVE32F-NEXT: add a2, a0, a2
2780 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
2781 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 5
2782 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2783 ; RV64ZVE32F-NEXT: vse32.v v10, (a2)
2784 ; RV64ZVE32F-NEXT: .LBB35_9: # %else10
2785 ; RV64ZVE32F-NEXT: andi a2, a1, 64
2786 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
2787 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v12, 2
2788 ; RV64ZVE32F-NEXT: bnez a2, .LBB35_15
2789 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
2790 ; RV64ZVE32F-NEXT: andi a1, a1, -128
2791 ; RV64ZVE32F-NEXT: bnez a1, .LBB35_16
2792 ; RV64ZVE32F-NEXT: .LBB35_11: # %else14
2793 ; RV64ZVE32F-NEXT: ret
2794 ; RV64ZVE32F-NEXT: .LBB35_12: # %cond.store3
2795 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2796 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2797 ; RV64ZVE32F-NEXT: add a2, a0, a2
2798 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2
2799 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2800 ; RV64ZVE32F-NEXT: vse32.v v11, (a2)
2801 ; RV64ZVE32F-NEXT: andi a2, a1, 8
2802 ; RV64ZVE32F-NEXT: beqz a2, .LBB35_6
2803 ; RV64ZVE32F-NEXT: .LBB35_13: # %cond.store5
2804 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2805 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
2806 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2807 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2808 ; RV64ZVE32F-NEXT: add a2, a0, a2
2809 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
2810 ; RV64ZVE32F-NEXT: vse32.v v10, (a2)
2811 ; RV64ZVE32F-NEXT: andi a2, a1, 16
2812 ; RV64ZVE32F-NEXT: beqz a2, .LBB35_7
2813 ; RV64ZVE32F-NEXT: .LBB35_14: # %cond.store7
2814 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
2815 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12
2816 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2817 ; RV64ZVE32F-NEXT: add a2, a0, a2
2818 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
2819 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2820 ; RV64ZVE32F-NEXT: vse32.v v10, (a2)
2821 ; RV64ZVE32F-NEXT: andi a2, a1, 32
2822 ; RV64ZVE32F-NEXT: bnez a2, .LBB35_8
2823 ; RV64ZVE32F-NEXT: j .LBB35_9
2824 ; RV64ZVE32F-NEXT: .LBB35_15: # %cond.store11
2825 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2826 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2827 ; RV64ZVE32F-NEXT: add a2, a0, a2
2828 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
2829 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6
2830 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2831 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
2832 ; RV64ZVE32F-NEXT: andi a1, a1, -128
2833 ; RV64ZVE32F-NEXT: beqz a1, .LBB35_11
2834 ; RV64ZVE32F-NEXT: .LBB35_16: # %cond.store13
2835 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2836 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
2837 ; RV64ZVE32F-NEXT: vmv.x.s a1, v10
2838 ; RV64ZVE32F-NEXT: slli a1, a1, 2
2839 ; RV64ZVE32F-NEXT: add a0, a0, a1
2840 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
2841 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
2842 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2843 ; RV64ZVE32F-NEXT: vse32.v v8, (a0)
2844 ; RV64ZVE32F-NEXT: ret
2845 %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %idxs
2846 call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %val, <8 x ptr> %ptrs, i32 4, <8 x i1> %m)
2850 declare void @llvm.masked.scatter.v1i64.v1p0(<1 x i64>, <1 x ptr>, i32, <1 x i1>)
2852 define void @mscatter_v1i64(<1 x i64> %val, <1 x ptr> %ptrs, <1 x i1> %m) {
2853 ; RV32V-LABEL: mscatter_v1i64:
2855 ; RV32V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
2856 ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t
2859 ; RV64V-LABEL: mscatter_v1i64:
2861 ; RV64V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
2862 ; RV64V-NEXT: vsoxei64.v v8, (zero), v9, v0.t
2865 ; RV32ZVE32F-LABEL: mscatter_v1i64:
2866 ; RV32ZVE32F: # %bb.0:
2867 ; RV32ZVE32F-NEXT: vsetvli a2, zero, e8, mf4, ta, ma
2868 ; RV32ZVE32F-NEXT: vfirst.m a2, v0
2869 ; RV32ZVE32F-NEXT: bnez a2, .LBB36_2
2870 ; RV32ZVE32F-NEXT: # %bb.1: # %cond.store
2871 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2872 ; RV32ZVE32F-NEXT: vmv.x.s a2, v8
2873 ; RV32ZVE32F-NEXT: sw a0, 0(a2)
2874 ; RV32ZVE32F-NEXT: sw a1, 4(a2)
2875 ; RV32ZVE32F-NEXT: .LBB36_2: # %else
2876 ; RV32ZVE32F-NEXT: ret
2878 ; RV64ZVE32F-LABEL: mscatter_v1i64:
2879 ; RV64ZVE32F: # %bb.0:
2880 ; RV64ZVE32F-NEXT: vsetvli a2, zero, e8, mf4, ta, ma
2881 ; RV64ZVE32F-NEXT: vfirst.m a2, v0
2882 ; RV64ZVE32F-NEXT: bnez a2, .LBB36_2
2883 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
2884 ; RV64ZVE32F-NEXT: sd a0, 0(a1)
2885 ; RV64ZVE32F-NEXT: .LBB36_2: # %else
2886 ; RV64ZVE32F-NEXT: ret
2887 call void @llvm.masked.scatter.v1i64.v1p0(<1 x i64> %val, <1 x ptr> %ptrs, i32 8, <1 x i1> %m)
2891 declare void @llvm.masked.scatter.v2i64.v2p0(<2 x i64>, <2 x ptr>, i32, <2 x i1>)
2893 define void @mscatter_v2i64(<2 x i64> %val, <2 x ptr> %ptrs, <2 x i1> %m) {
2894 ; RV32V-LABEL: mscatter_v2i64:
2896 ; RV32V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
2897 ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t
2900 ; RV64V-LABEL: mscatter_v2i64:
2902 ; RV64V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
2903 ; RV64V-NEXT: vsoxei64.v v8, (zero), v9, v0.t
2906 ; RV32ZVE32F-LABEL: mscatter_v2i64:
2907 ; RV32ZVE32F: # %bb.0:
2908 ; RV32ZVE32F-NEXT: lw a1, 8(a0)
2909 ; RV32ZVE32F-NEXT: lw a2, 12(a0)
2910 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
2911 ; RV32ZVE32F-NEXT: vmv.x.s a3, v0
2912 ; RV32ZVE32F-NEXT: andi a4, a3, 1
2913 ; RV32ZVE32F-NEXT: bnez a4, .LBB37_3
2914 ; RV32ZVE32F-NEXT: # %bb.1: # %else
2915 ; RV32ZVE32F-NEXT: andi a3, a3, 2
2916 ; RV32ZVE32F-NEXT: bnez a3, .LBB37_4
2917 ; RV32ZVE32F-NEXT: .LBB37_2: # %else2
2918 ; RV32ZVE32F-NEXT: ret
2919 ; RV32ZVE32F-NEXT: .LBB37_3: # %cond.store
2920 ; RV32ZVE32F-NEXT: lw a4, 0(a0)
2921 ; RV32ZVE32F-NEXT: lw a0, 4(a0)
2922 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
2923 ; RV32ZVE32F-NEXT: vmv.x.s a5, v8
2924 ; RV32ZVE32F-NEXT: sw a4, 0(a5)
2925 ; RV32ZVE32F-NEXT: sw a0, 4(a5)
2926 ; RV32ZVE32F-NEXT: andi a3, a3, 2
2927 ; RV32ZVE32F-NEXT: beqz a3, .LBB37_2
2928 ; RV32ZVE32F-NEXT: .LBB37_4: # %cond.store1
2929 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2930 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 1
2931 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
2932 ; RV32ZVE32F-NEXT: sw a1, 0(a0)
2933 ; RV32ZVE32F-NEXT: sw a2, 4(a0)
2934 ; RV32ZVE32F-NEXT: ret
2936 ; RV64ZVE32F-LABEL: mscatter_v2i64:
2937 ; RV64ZVE32F: # %bb.0:
2938 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
2939 ; RV64ZVE32F-NEXT: vmv.x.s a4, v0
2940 ; RV64ZVE32F-NEXT: andi a5, a4, 1
2941 ; RV64ZVE32F-NEXT: bnez a5, .LBB37_3
2942 ; RV64ZVE32F-NEXT: # %bb.1: # %else
2943 ; RV64ZVE32F-NEXT: andi a4, a4, 2
2944 ; RV64ZVE32F-NEXT: bnez a4, .LBB37_4
2945 ; RV64ZVE32F-NEXT: .LBB37_2: # %else2
2946 ; RV64ZVE32F-NEXT: ret
2947 ; RV64ZVE32F-NEXT: .LBB37_3: # %cond.store
2948 ; RV64ZVE32F-NEXT: sd a0, 0(a2)
2949 ; RV64ZVE32F-NEXT: andi a4, a4, 2
2950 ; RV64ZVE32F-NEXT: beqz a4, .LBB37_2
2951 ; RV64ZVE32F-NEXT: .LBB37_4: # %cond.store1
2952 ; RV64ZVE32F-NEXT: sd a1, 0(a3)
2953 ; RV64ZVE32F-NEXT: ret
2954 call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> %val, <2 x ptr> %ptrs, i32 8, <2 x i1> %m)
2958 declare void @llvm.masked.scatter.v4i64.v4p0(<4 x i64>, <4 x ptr>, i32, <4 x i1>)
2960 define void @mscatter_v4i64(<4 x i64> %val, <4 x ptr> %ptrs, <4 x i1> %m) {
2961 ; RV32V-LABEL: mscatter_v4i64:
2963 ; RV32V-NEXT: vsetivli zero, 4, e64, m2, ta, ma
2964 ; RV32V-NEXT: vsoxei32.v v8, (zero), v10, v0.t
2967 ; RV64V-LABEL: mscatter_v4i64:
2969 ; RV64V-NEXT: vsetivli zero, 4, e64, m2, ta, ma
2970 ; RV64V-NEXT: vsoxei64.v v8, (zero), v10, v0.t
2973 ; RV32ZVE32F-LABEL: mscatter_v4i64:
2974 ; RV32ZVE32F: # %bb.0:
2975 ; RV32ZVE32F-NEXT: lw a1, 24(a0)
2976 ; RV32ZVE32F-NEXT: lw a2, 28(a0)
2977 ; RV32ZVE32F-NEXT: lw a6, 8(a0)
2978 ; RV32ZVE32F-NEXT: lw a7, 12(a0)
2979 ; RV32ZVE32F-NEXT: lw a3, 16(a0)
2980 ; RV32ZVE32F-NEXT: lw a4, 20(a0)
2981 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
2982 ; RV32ZVE32F-NEXT: vmv.x.s a5, v0
2983 ; RV32ZVE32F-NEXT: andi t0, a5, 1
2984 ; RV32ZVE32F-NEXT: bnez t0, .LBB38_5
2985 ; RV32ZVE32F-NEXT: # %bb.1: # %else
2986 ; RV32ZVE32F-NEXT: andi a0, a5, 2
2987 ; RV32ZVE32F-NEXT: bnez a0, .LBB38_6
2988 ; RV32ZVE32F-NEXT: .LBB38_2: # %else2
2989 ; RV32ZVE32F-NEXT: andi a0, a5, 4
2990 ; RV32ZVE32F-NEXT: bnez a0, .LBB38_7
2991 ; RV32ZVE32F-NEXT: .LBB38_3: # %else4
2992 ; RV32ZVE32F-NEXT: andi a5, a5, 8
2993 ; RV32ZVE32F-NEXT: bnez a5, .LBB38_8
2994 ; RV32ZVE32F-NEXT: .LBB38_4: # %else6
2995 ; RV32ZVE32F-NEXT: ret
2996 ; RV32ZVE32F-NEXT: .LBB38_5: # %cond.store
2997 ; RV32ZVE32F-NEXT: lw t0, 0(a0)
2998 ; RV32ZVE32F-NEXT: lw a0, 4(a0)
2999 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
3000 ; RV32ZVE32F-NEXT: vmv.x.s t1, v8
3001 ; RV32ZVE32F-NEXT: sw t0, 0(t1)
3002 ; RV32ZVE32F-NEXT: sw a0, 4(t1)
3003 ; RV32ZVE32F-NEXT: andi a0, a5, 2
3004 ; RV32ZVE32F-NEXT: beqz a0, .LBB38_2
3005 ; RV32ZVE32F-NEXT: .LBB38_6: # %cond.store1
3006 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
3007 ; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 1
3008 ; RV32ZVE32F-NEXT: vmv.x.s a0, v9
3009 ; RV32ZVE32F-NEXT: sw a6, 0(a0)
3010 ; RV32ZVE32F-NEXT: sw a7, 4(a0)
3011 ; RV32ZVE32F-NEXT: andi a0, a5, 4
3012 ; RV32ZVE32F-NEXT: beqz a0, .LBB38_3
3013 ; RV32ZVE32F-NEXT: .LBB38_7: # %cond.store3
3014 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
3015 ; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 2
3016 ; RV32ZVE32F-NEXT: vmv.x.s a0, v9
3017 ; RV32ZVE32F-NEXT: sw a3, 0(a0)
3018 ; RV32ZVE32F-NEXT: sw a4, 4(a0)
3019 ; RV32ZVE32F-NEXT: andi a5, a5, 8
3020 ; RV32ZVE32F-NEXT: beqz a5, .LBB38_4
3021 ; RV32ZVE32F-NEXT: .LBB38_8: # %cond.store5
3022 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
3023 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 3
3024 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
3025 ; RV32ZVE32F-NEXT: sw a1, 0(a0)
3026 ; RV32ZVE32F-NEXT: sw a2, 4(a0)
3027 ; RV32ZVE32F-NEXT: ret
3029 ; RV64ZVE32F-LABEL: mscatter_v4i64:
3030 ; RV64ZVE32F: # %bb.0:
3031 ; RV64ZVE32F-NEXT: ld a6, 8(a1)
3032 ; RV64ZVE32F-NEXT: ld a4, 16(a1)
3033 ; RV64ZVE32F-NEXT: ld a2, 24(a1)
3034 ; RV64ZVE32F-NEXT: ld t0, 8(a0)
3035 ; RV64ZVE32F-NEXT: ld a5, 16(a0)
3036 ; RV64ZVE32F-NEXT: ld a3, 24(a0)
3037 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
3038 ; RV64ZVE32F-NEXT: vmv.x.s a7, v0
3039 ; RV64ZVE32F-NEXT: andi t1, a7, 1
3040 ; RV64ZVE32F-NEXT: bnez t1, .LBB38_5
3041 ; RV64ZVE32F-NEXT: # %bb.1: # %else
3042 ; RV64ZVE32F-NEXT: andi a0, a7, 2
3043 ; RV64ZVE32F-NEXT: bnez a0, .LBB38_6
3044 ; RV64ZVE32F-NEXT: .LBB38_2: # %else2
3045 ; RV64ZVE32F-NEXT: andi a0, a7, 4
3046 ; RV64ZVE32F-NEXT: bnez a0, .LBB38_7
3047 ; RV64ZVE32F-NEXT: .LBB38_3: # %else4
3048 ; RV64ZVE32F-NEXT: andi a0, a7, 8
3049 ; RV64ZVE32F-NEXT: bnez a0, .LBB38_8
3050 ; RV64ZVE32F-NEXT: .LBB38_4: # %else6
3051 ; RV64ZVE32F-NEXT: ret
3052 ; RV64ZVE32F-NEXT: .LBB38_5: # %cond.store
3053 ; RV64ZVE32F-NEXT: ld a1, 0(a1)
3054 ; RV64ZVE32F-NEXT: ld a0, 0(a0)
3055 ; RV64ZVE32F-NEXT: sd a0, 0(a1)
3056 ; RV64ZVE32F-NEXT: andi a0, a7, 2
3057 ; RV64ZVE32F-NEXT: beqz a0, .LBB38_2
3058 ; RV64ZVE32F-NEXT: .LBB38_6: # %cond.store1
3059 ; RV64ZVE32F-NEXT: sd t0, 0(a6)
3060 ; RV64ZVE32F-NEXT: andi a0, a7, 4
3061 ; RV64ZVE32F-NEXT: beqz a0, .LBB38_3
3062 ; RV64ZVE32F-NEXT: .LBB38_7: # %cond.store3
3063 ; RV64ZVE32F-NEXT: sd a5, 0(a4)
3064 ; RV64ZVE32F-NEXT: andi a0, a7, 8
3065 ; RV64ZVE32F-NEXT: beqz a0, .LBB38_4
3066 ; RV64ZVE32F-NEXT: .LBB38_8: # %cond.store5
3067 ; RV64ZVE32F-NEXT: sd a3, 0(a2)
3068 ; RV64ZVE32F-NEXT: ret
3069 call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> %val, <4 x ptr> %ptrs, i32 8, <4 x i1> %m)
3073 define void @mscatter_truemask_v4i64(<4 x i64> %val, <4 x ptr> %ptrs) {
3074 ; RV32V-LABEL: mscatter_truemask_v4i64:
3076 ; RV32V-NEXT: vsetivli zero, 4, e64, m2, ta, ma
3077 ; RV32V-NEXT: vsoxei32.v v8, (zero), v10
3080 ; RV64V-LABEL: mscatter_truemask_v4i64:
3082 ; RV64V-NEXT: vsetivli zero, 4, e64, m2, ta, ma
3083 ; RV64V-NEXT: vsoxei64.v v8, (zero), v10
3086 ; RV32ZVE32F-LABEL: mscatter_truemask_v4i64:
3087 ; RV32ZVE32F: # %bb.0:
3088 ; RV32ZVE32F-NEXT: lw a1, 16(a0)
3089 ; RV32ZVE32F-NEXT: lw a2, 20(a0)
3090 ; RV32ZVE32F-NEXT: lw a3, 24(a0)
3091 ; RV32ZVE32F-NEXT: lw a4, 28(a0)
3092 ; RV32ZVE32F-NEXT: lw a5, 0(a0)
3093 ; RV32ZVE32F-NEXT: lw a6, 4(a0)
3094 ; RV32ZVE32F-NEXT: lw a7, 8(a0)
3095 ; RV32ZVE32F-NEXT: lw a0, 12(a0)
3096 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
3097 ; RV32ZVE32F-NEXT: vmv.x.s t0, v8
3098 ; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 1
3099 ; RV32ZVE32F-NEXT: vmv.x.s t1, v9
3100 ; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 2
3101 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 3
3102 ; RV32ZVE32F-NEXT: sw a5, 0(t0)
3103 ; RV32ZVE32F-NEXT: sw a6, 4(t0)
3104 ; RV32ZVE32F-NEXT: vmv.x.s a5, v9
3105 ; RV32ZVE32F-NEXT: vmv.x.s a6, v8
3106 ; RV32ZVE32F-NEXT: sw a7, 0(t1)
3107 ; RV32ZVE32F-NEXT: sw a0, 4(t1)
3108 ; RV32ZVE32F-NEXT: sw a1, 0(a5)
3109 ; RV32ZVE32F-NEXT: sw a2, 4(a5)
3110 ; RV32ZVE32F-NEXT: sw a3, 0(a6)
3111 ; RV32ZVE32F-NEXT: sw a4, 4(a6)
3112 ; RV32ZVE32F-NEXT: ret
3114 ; RV64ZVE32F-LABEL: mscatter_truemask_v4i64:
3115 ; RV64ZVE32F: # %bb.0:
3116 ; RV64ZVE32F-NEXT: ld a2, 0(a1)
3117 ; RV64ZVE32F-NEXT: ld a3, 8(a1)
3118 ; RV64ZVE32F-NEXT: ld a4, 16(a1)
3119 ; RV64ZVE32F-NEXT: ld a1, 24(a1)
3120 ; RV64ZVE32F-NEXT: ld a5, 0(a0)
3121 ; RV64ZVE32F-NEXT: ld a6, 8(a0)
3122 ; RV64ZVE32F-NEXT: ld a7, 16(a0)
3123 ; RV64ZVE32F-NEXT: ld a0, 24(a0)
3124 ; RV64ZVE32F-NEXT: sd a5, 0(a2)
3125 ; RV64ZVE32F-NEXT: sd a6, 0(a3)
3126 ; RV64ZVE32F-NEXT: sd a7, 0(a4)
3127 ; RV64ZVE32F-NEXT: sd a0, 0(a1)
3128 ; RV64ZVE32F-NEXT: ret
3129 call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> %val, <4 x ptr> %ptrs, i32 8, <4 x i1> splat (i1 1))
3133 define void @mscatter_falsemask_v4i64(<4 x i64> %val, <4 x ptr> %ptrs) {
3134 ; CHECK-LABEL: mscatter_falsemask_v4i64:
3137 call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> %val, <4 x ptr> %ptrs, i32 8, <4 x i1> zeroinitializer)
3141 declare void @llvm.masked.scatter.v8i64.v8p0(<8 x i64>, <8 x ptr>, i32, <8 x i1>)
3143 define void @mscatter_v8i64(<8 x i64> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
3144 ; RV32V-LABEL: mscatter_v8i64:
3146 ; RV32V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
3147 ; RV32V-NEXT: vsoxei32.v v8, (zero), v12, v0.t
3150 ; RV64V-LABEL: mscatter_v8i64:
3152 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
3153 ; RV64V-NEXT: vsoxei64.v v8, (zero), v12, v0.t
3156 ; RV32ZVE32F-LABEL: mscatter_v8i64:
3157 ; RV32ZVE32F: # %bb.0:
3158 ; RV32ZVE32F-NEXT: addi sp, sp, -16
3159 ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
3160 ; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
3161 ; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
3162 ; RV32ZVE32F-NEXT: sw s2, 4(sp) # 4-byte Folded Spill
3163 ; RV32ZVE32F-NEXT: .cfi_offset s0, -4
3164 ; RV32ZVE32F-NEXT: .cfi_offset s1, -8
3165 ; RV32ZVE32F-NEXT: .cfi_offset s2, -12
3166 ; RV32ZVE32F-NEXT: .cfi_remember_state
3167 ; RV32ZVE32F-NEXT: lw a1, 56(a0)
3168 ; RV32ZVE32F-NEXT: lw a2, 60(a0)
3169 ; RV32ZVE32F-NEXT: lw a5, 40(a0)
3170 ; RV32ZVE32F-NEXT: lw a6, 44(a0)
3171 ; RV32ZVE32F-NEXT: lw a3, 48(a0)
3172 ; RV32ZVE32F-NEXT: lw a4, 52(a0)
3173 ; RV32ZVE32F-NEXT: lw t2, 24(a0)
3174 ; RV32ZVE32F-NEXT: lw t3, 28(a0)
3175 ; RV32ZVE32F-NEXT: lw t0, 32(a0)
3176 ; RV32ZVE32F-NEXT: lw t1, 36(a0)
3177 ; RV32ZVE32F-NEXT: lw t6, 8(a0)
3178 ; RV32ZVE32F-NEXT: lw s0, 12(a0)
3179 ; RV32ZVE32F-NEXT: lw t4, 16(a0)
3180 ; RV32ZVE32F-NEXT: lw t5, 20(a0)
3181 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
3182 ; RV32ZVE32F-NEXT: vmv.x.s a7, v0
3183 ; RV32ZVE32F-NEXT: andi s1, a7, 1
3184 ; RV32ZVE32F-NEXT: bnez s1, .LBB41_10
3185 ; RV32ZVE32F-NEXT: # %bb.1: # %else
3186 ; RV32ZVE32F-NEXT: andi a0, a7, 2
3187 ; RV32ZVE32F-NEXT: bnez a0, .LBB41_11
3188 ; RV32ZVE32F-NEXT: .LBB41_2: # %else2
3189 ; RV32ZVE32F-NEXT: andi a0, a7, 4
3190 ; RV32ZVE32F-NEXT: bnez a0, .LBB41_12
3191 ; RV32ZVE32F-NEXT: .LBB41_3: # %else4
3192 ; RV32ZVE32F-NEXT: andi a0, a7, 8
3193 ; RV32ZVE32F-NEXT: bnez a0, .LBB41_13
3194 ; RV32ZVE32F-NEXT: .LBB41_4: # %else6
3195 ; RV32ZVE32F-NEXT: andi a0, a7, 16
3196 ; RV32ZVE32F-NEXT: bnez a0, .LBB41_14
3197 ; RV32ZVE32F-NEXT: .LBB41_5: # %else8
3198 ; RV32ZVE32F-NEXT: andi a0, a7, 32
3199 ; RV32ZVE32F-NEXT: bnez a0, .LBB41_15
3200 ; RV32ZVE32F-NEXT: .LBB41_6: # %else10
3201 ; RV32ZVE32F-NEXT: andi a0, a7, 64
3202 ; RV32ZVE32F-NEXT: bnez a0, .LBB41_16
3203 ; RV32ZVE32F-NEXT: .LBB41_7: # %else12
3204 ; RV32ZVE32F-NEXT: andi a0, a7, -128
3205 ; RV32ZVE32F-NEXT: beqz a0, .LBB41_9
3206 ; RV32ZVE32F-NEXT: .LBB41_8: # %cond.store13
3207 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
3208 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
3209 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
3210 ; RV32ZVE32F-NEXT: sw a1, 0(a0)
3211 ; RV32ZVE32F-NEXT: sw a2, 4(a0)
3212 ; RV32ZVE32F-NEXT: .LBB41_9: # %else14
3213 ; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
3214 ; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
3215 ; RV32ZVE32F-NEXT: lw s2, 4(sp) # 4-byte Folded Reload
3216 ; RV32ZVE32F-NEXT: .cfi_restore s0
3217 ; RV32ZVE32F-NEXT: .cfi_restore s1
3218 ; RV32ZVE32F-NEXT: .cfi_restore s2
3219 ; RV32ZVE32F-NEXT: addi sp, sp, 16
3220 ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 0
3221 ; RV32ZVE32F-NEXT: ret
3222 ; RV32ZVE32F-NEXT: .LBB41_10: # %cond.store
3223 ; RV32ZVE32F-NEXT: .cfi_restore_state
3224 ; RV32ZVE32F-NEXT: lw s1, 0(a0)
3225 ; RV32ZVE32F-NEXT: lw a0, 4(a0)
3226 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
3227 ; RV32ZVE32F-NEXT: vmv.x.s s2, v8
3228 ; RV32ZVE32F-NEXT: sw s1, 0(s2)
3229 ; RV32ZVE32F-NEXT: sw a0, 4(s2)
3230 ; RV32ZVE32F-NEXT: andi a0, a7, 2
3231 ; RV32ZVE32F-NEXT: beqz a0, .LBB41_2
3232 ; RV32ZVE32F-NEXT: .LBB41_11: # %cond.store1
3233 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
3234 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
3235 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
3236 ; RV32ZVE32F-NEXT: sw t6, 0(a0)
3237 ; RV32ZVE32F-NEXT: sw s0, 4(a0)
3238 ; RV32ZVE32F-NEXT: andi a0, a7, 4
3239 ; RV32ZVE32F-NEXT: beqz a0, .LBB41_3
3240 ; RV32ZVE32F-NEXT: .LBB41_12: # %cond.store3
3241 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
3242 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
3243 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
3244 ; RV32ZVE32F-NEXT: sw t4, 0(a0)
3245 ; RV32ZVE32F-NEXT: sw t5, 4(a0)
3246 ; RV32ZVE32F-NEXT: andi a0, a7, 8
3247 ; RV32ZVE32F-NEXT: beqz a0, .LBB41_4
3248 ; RV32ZVE32F-NEXT: .LBB41_13: # %cond.store5
3249 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
3250 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
3251 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
3252 ; RV32ZVE32F-NEXT: sw t2, 0(a0)
3253 ; RV32ZVE32F-NEXT: sw t3, 4(a0)
3254 ; RV32ZVE32F-NEXT: andi a0, a7, 16
3255 ; RV32ZVE32F-NEXT: beqz a0, .LBB41_5
3256 ; RV32ZVE32F-NEXT: .LBB41_14: # %cond.store7
3257 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
3258 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
3259 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
3260 ; RV32ZVE32F-NEXT: sw t0, 0(a0)
3261 ; RV32ZVE32F-NEXT: sw t1, 4(a0)
3262 ; RV32ZVE32F-NEXT: andi a0, a7, 32
3263 ; RV32ZVE32F-NEXT: beqz a0, .LBB41_6
3264 ; RV32ZVE32F-NEXT: .LBB41_15: # %cond.store9
3265 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
3266 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
3267 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
3268 ; RV32ZVE32F-NEXT: sw a5, 0(a0)
3269 ; RV32ZVE32F-NEXT: sw a6, 4(a0)
3270 ; RV32ZVE32F-NEXT: andi a0, a7, 64
3271 ; RV32ZVE32F-NEXT: beqz a0, .LBB41_7
3272 ; RV32ZVE32F-NEXT: .LBB41_16: # %cond.store11
3273 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
3274 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
3275 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
3276 ; RV32ZVE32F-NEXT: sw a3, 0(a0)
3277 ; RV32ZVE32F-NEXT: sw a4, 4(a0)
3278 ; RV32ZVE32F-NEXT: andi a0, a7, -128
3279 ; RV32ZVE32F-NEXT: bnez a0, .LBB41_8
3280 ; RV32ZVE32F-NEXT: j .LBB41_9
3282 ; RV64ZVE32F-LABEL: mscatter_v8i64:
3283 ; RV64ZVE32F: # %bb.0:
3284 ; RV64ZVE32F-NEXT: addi sp, sp, -32
3285 ; RV64ZVE32F-NEXT: .cfi_def_cfa_offset 32
3286 ; RV64ZVE32F-NEXT: sd s0, 24(sp) # 8-byte Folded Spill
3287 ; RV64ZVE32F-NEXT: sd s1, 16(sp) # 8-byte Folded Spill
3288 ; RV64ZVE32F-NEXT: sd s2, 8(sp) # 8-byte Folded Spill
3289 ; RV64ZVE32F-NEXT: .cfi_offset s0, -8
3290 ; RV64ZVE32F-NEXT: .cfi_offset s1, -16
3291 ; RV64ZVE32F-NEXT: .cfi_offset s2, -24
3292 ; RV64ZVE32F-NEXT: .cfi_remember_state
3293 ; RV64ZVE32F-NEXT: ld a4, 40(a1)
3294 ; RV64ZVE32F-NEXT: ld a3, 48(a1)
3295 ; RV64ZVE32F-NEXT: ld a2, 56(a1)
3296 ; RV64ZVE32F-NEXT: ld t5, 8(a1)
3297 ; RV64ZVE32F-NEXT: ld t3, 16(a1)
3298 ; RV64ZVE32F-NEXT: ld t2, 24(a1)
3299 ; RV64ZVE32F-NEXT: ld t0, 32(a1)
3300 ; RV64ZVE32F-NEXT: ld a7, 40(a0)
3301 ; RV64ZVE32F-NEXT: ld a6, 48(a0)
3302 ; RV64ZVE32F-NEXT: ld a5, 56(a0)
3303 ; RV64ZVE32F-NEXT: ld s1, 8(a0)
3304 ; RV64ZVE32F-NEXT: ld s0, 16(a0)
3305 ; RV64ZVE32F-NEXT: ld t6, 24(a0)
3306 ; RV64ZVE32F-NEXT: ld t4, 32(a0)
3307 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
3308 ; RV64ZVE32F-NEXT: vmv.x.s t1, v0
3309 ; RV64ZVE32F-NEXT: andi s2, t1, 1
3310 ; RV64ZVE32F-NEXT: bnez s2, .LBB41_10
3311 ; RV64ZVE32F-NEXT: # %bb.1: # %else
3312 ; RV64ZVE32F-NEXT: andi a0, t1, 2
3313 ; RV64ZVE32F-NEXT: bnez a0, .LBB41_11
3314 ; RV64ZVE32F-NEXT: .LBB41_2: # %else2
3315 ; RV64ZVE32F-NEXT: andi a0, t1, 4
3316 ; RV64ZVE32F-NEXT: bnez a0, .LBB41_12
3317 ; RV64ZVE32F-NEXT: .LBB41_3: # %else4
3318 ; RV64ZVE32F-NEXT: andi a0, t1, 8
3319 ; RV64ZVE32F-NEXT: bnez a0, .LBB41_13
3320 ; RV64ZVE32F-NEXT: .LBB41_4: # %else6
3321 ; RV64ZVE32F-NEXT: andi a0, t1, 16
3322 ; RV64ZVE32F-NEXT: bnez a0, .LBB41_14
3323 ; RV64ZVE32F-NEXT: .LBB41_5: # %else8
3324 ; RV64ZVE32F-NEXT: andi a0, t1, 32
3325 ; RV64ZVE32F-NEXT: bnez a0, .LBB41_15
3326 ; RV64ZVE32F-NEXT: .LBB41_6: # %else10
3327 ; RV64ZVE32F-NEXT: andi a0, t1, 64
3328 ; RV64ZVE32F-NEXT: bnez a0, .LBB41_16
3329 ; RV64ZVE32F-NEXT: .LBB41_7: # %else12
3330 ; RV64ZVE32F-NEXT: andi a0, t1, -128
3331 ; RV64ZVE32F-NEXT: beqz a0, .LBB41_9
3332 ; RV64ZVE32F-NEXT: .LBB41_8: # %cond.store13
3333 ; RV64ZVE32F-NEXT: sd a5, 0(a2)
3334 ; RV64ZVE32F-NEXT: .LBB41_9: # %else14
3335 ; RV64ZVE32F-NEXT: ld s0, 24(sp) # 8-byte Folded Reload
3336 ; RV64ZVE32F-NEXT: ld s1, 16(sp) # 8-byte Folded Reload
3337 ; RV64ZVE32F-NEXT: ld s2, 8(sp) # 8-byte Folded Reload
3338 ; RV64ZVE32F-NEXT: .cfi_restore s0
3339 ; RV64ZVE32F-NEXT: .cfi_restore s1
3340 ; RV64ZVE32F-NEXT: .cfi_restore s2
3341 ; RV64ZVE32F-NEXT: addi sp, sp, 32
3342 ; RV64ZVE32F-NEXT: .cfi_def_cfa_offset 0
3343 ; RV64ZVE32F-NEXT: ret
3344 ; RV64ZVE32F-NEXT: .LBB41_10: # %cond.store
3345 ; RV64ZVE32F-NEXT: .cfi_restore_state
3346 ; RV64ZVE32F-NEXT: ld a1, 0(a1)
3347 ; RV64ZVE32F-NEXT: ld a0, 0(a0)
3348 ; RV64ZVE32F-NEXT: sd a0, 0(a1)
3349 ; RV64ZVE32F-NEXT: andi a0, t1, 2
3350 ; RV64ZVE32F-NEXT: beqz a0, .LBB41_2
3351 ; RV64ZVE32F-NEXT: .LBB41_11: # %cond.store1
3352 ; RV64ZVE32F-NEXT: sd s1, 0(t5)
3353 ; RV64ZVE32F-NEXT: andi a0, t1, 4
3354 ; RV64ZVE32F-NEXT: beqz a0, .LBB41_3
3355 ; RV64ZVE32F-NEXT: .LBB41_12: # %cond.store3
3356 ; RV64ZVE32F-NEXT: sd s0, 0(t3)
3357 ; RV64ZVE32F-NEXT: andi a0, t1, 8
3358 ; RV64ZVE32F-NEXT: beqz a0, .LBB41_4
3359 ; RV64ZVE32F-NEXT: .LBB41_13: # %cond.store5
3360 ; RV64ZVE32F-NEXT: sd t6, 0(t2)
3361 ; RV64ZVE32F-NEXT: andi a0, t1, 16
3362 ; RV64ZVE32F-NEXT: beqz a0, .LBB41_5
3363 ; RV64ZVE32F-NEXT: .LBB41_14: # %cond.store7
3364 ; RV64ZVE32F-NEXT: sd t4, 0(t0)
3365 ; RV64ZVE32F-NEXT: andi a0, t1, 32
3366 ; RV64ZVE32F-NEXT: beqz a0, .LBB41_6
3367 ; RV64ZVE32F-NEXT: .LBB41_15: # %cond.store9
3368 ; RV64ZVE32F-NEXT: sd a7, 0(a4)
3369 ; RV64ZVE32F-NEXT: andi a0, t1, 64
3370 ; RV64ZVE32F-NEXT: beqz a0, .LBB41_7
3371 ; RV64ZVE32F-NEXT: .LBB41_16: # %cond.store11
3372 ; RV64ZVE32F-NEXT: sd a6, 0(a3)
3373 ; RV64ZVE32F-NEXT: andi a0, t1, -128
3374 ; RV64ZVE32F-NEXT: bnez a0, .LBB41_8
3375 ; RV64ZVE32F-NEXT: j .LBB41_9
3376 call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m)
3380 define void @mscatter_baseidx_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) {
3381 ; RV32V-LABEL: mscatter_baseidx_v8i8_v8i64:
3383 ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
3384 ; RV32V-NEXT: vsext.vf4 v14, v12
3385 ; RV32V-NEXT: vsll.vi v12, v14, 3
3386 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
3387 ; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t
3390 ; RV64V-LABEL: mscatter_baseidx_v8i8_v8i64:
3392 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
3393 ; RV64V-NEXT: vsext.vf8 v16, v12
3394 ; RV64V-NEXT: vsll.vi v12, v16, 3
3395 ; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t
3398 ; RV32ZVE32F-LABEL: mscatter_baseidx_v8i8_v8i64:
3399 ; RV32ZVE32F: # %bb.0:
3400 ; RV32ZVE32F-NEXT: addi sp, sp, -16
3401 ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
3402 ; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
3403 ; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
3404 ; RV32ZVE32F-NEXT: sw s2, 4(sp) # 4-byte Folded Spill
3405 ; RV32ZVE32F-NEXT: .cfi_offset s0, -4
3406 ; RV32ZVE32F-NEXT: .cfi_offset s1, -8
3407 ; RV32ZVE32F-NEXT: .cfi_offset s2, -12
3408 ; RV32ZVE32F-NEXT: .cfi_remember_state
3409 ; RV32ZVE32F-NEXT: lw a2, 56(a0)
3410 ; RV32ZVE32F-NEXT: lw a3, 60(a0)
3411 ; RV32ZVE32F-NEXT: lw a6, 40(a0)
3412 ; RV32ZVE32F-NEXT: lw a7, 44(a0)
3413 ; RV32ZVE32F-NEXT: lw a4, 48(a0)
3414 ; RV32ZVE32F-NEXT: lw a5, 52(a0)
3415 ; RV32ZVE32F-NEXT: lw t3, 24(a0)
3416 ; RV32ZVE32F-NEXT: lw t4, 28(a0)
3417 ; RV32ZVE32F-NEXT: lw t1, 32(a0)
3418 ; RV32ZVE32F-NEXT: lw t2, 36(a0)
3419 ; RV32ZVE32F-NEXT: lw s0, 8(a0)
3420 ; RV32ZVE32F-NEXT: lw s1, 12(a0)
3421 ; RV32ZVE32F-NEXT: lw t5, 16(a0)
3422 ; RV32ZVE32F-NEXT: lw t6, 20(a0)
3423 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
3424 ; RV32ZVE32F-NEXT: vsext.vf4 v10, v8
3425 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3
3426 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
3427 ; RV32ZVE32F-NEXT: vmv.x.s t0, v0
3428 ; RV32ZVE32F-NEXT: andi s2, t0, 1
3429 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
3430 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
3431 ; RV32ZVE32F-NEXT: bnez s2, .LBB42_10
3432 ; RV32ZVE32F-NEXT: # %bb.1: # %else
3433 ; RV32ZVE32F-NEXT: andi a0, t0, 2
3434 ; RV32ZVE32F-NEXT: bnez a0, .LBB42_11
3435 ; RV32ZVE32F-NEXT: .LBB42_2: # %else2
3436 ; RV32ZVE32F-NEXT: andi a0, t0, 4
3437 ; RV32ZVE32F-NEXT: bnez a0, .LBB42_12
3438 ; RV32ZVE32F-NEXT: .LBB42_3: # %else4
3439 ; RV32ZVE32F-NEXT: andi a0, t0, 8
3440 ; RV32ZVE32F-NEXT: bnez a0, .LBB42_13
3441 ; RV32ZVE32F-NEXT: .LBB42_4: # %else6
3442 ; RV32ZVE32F-NEXT: andi a0, t0, 16
3443 ; RV32ZVE32F-NEXT: bnez a0, .LBB42_14
3444 ; RV32ZVE32F-NEXT: .LBB42_5: # %else8
3445 ; RV32ZVE32F-NEXT: andi a0, t0, 32
3446 ; RV32ZVE32F-NEXT: bnez a0, .LBB42_15
3447 ; RV32ZVE32F-NEXT: .LBB42_6: # %else10
3448 ; RV32ZVE32F-NEXT: andi a0, t0, 64
3449 ; RV32ZVE32F-NEXT: bnez a0, .LBB42_16
3450 ; RV32ZVE32F-NEXT: .LBB42_7: # %else12
3451 ; RV32ZVE32F-NEXT: andi a0, t0, -128
3452 ; RV32ZVE32F-NEXT: beqz a0, .LBB42_9
3453 ; RV32ZVE32F-NEXT: .LBB42_8: # %cond.store13
3454 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
3455 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
3456 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
3457 ; RV32ZVE32F-NEXT: sw a2, 0(a0)
3458 ; RV32ZVE32F-NEXT: sw a3, 4(a0)
3459 ; RV32ZVE32F-NEXT: .LBB42_9: # %else14
3460 ; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
3461 ; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
3462 ; RV32ZVE32F-NEXT: lw s2, 4(sp) # 4-byte Folded Reload
3463 ; RV32ZVE32F-NEXT: .cfi_restore s0
3464 ; RV32ZVE32F-NEXT: .cfi_restore s1
3465 ; RV32ZVE32F-NEXT: .cfi_restore s2
3466 ; RV32ZVE32F-NEXT: addi sp, sp, 16
3467 ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 0
3468 ; RV32ZVE32F-NEXT: ret
3469 ; RV32ZVE32F-NEXT: .LBB42_10: # %cond.store
3470 ; RV32ZVE32F-NEXT: .cfi_restore_state
3471 ; RV32ZVE32F-NEXT: lw a1, 0(a0)
3472 ; RV32ZVE32F-NEXT: lw a0, 4(a0)
3473 ; RV32ZVE32F-NEXT: vmv.x.s s2, v8
3474 ; RV32ZVE32F-NEXT: sw a1, 0(s2)
3475 ; RV32ZVE32F-NEXT: sw a0, 4(s2)
3476 ; RV32ZVE32F-NEXT: andi a0, t0, 2
3477 ; RV32ZVE32F-NEXT: beqz a0, .LBB42_2
3478 ; RV32ZVE32F-NEXT: .LBB42_11: # %cond.store1
3479 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
3480 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
3481 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
3482 ; RV32ZVE32F-NEXT: sw s0, 0(a0)
3483 ; RV32ZVE32F-NEXT: sw s1, 4(a0)
3484 ; RV32ZVE32F-NEXT: andi a0, t0, 4
3485 ; RV32ZVE32F-NEXT: beqz a0, .LBB42_3
3486 ; RV32ZVE32F-NEXT: .LBB42_12: # %cond.store3
3487 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
3488 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
3489 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
3490 ; RV32ZVE32F-NEXT: sw t5, 0(a0)
3491 ; RV32ZVE32F-NEXT: sw t6, 4(a0)
3492 ; RV32ZVE32F-NEXT: andi a0, t0, 8
3493 ; RV32ZVE32F-NEXT: beqz a0, .LBB42_4
3494 ; RV32ZVE32F-NEXT: .LBB42_13: # %cond.store5
3495 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
3496 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
3497 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
3498 ; RV32ZVE32F-NEXT: sw t3, 0(a0)
3499 ; RV32ZVE32F-NEXT: sw t4, 4(a0)
3500 ; RV32ZVE32F-NEXT: andi a0, t0, 16
3501 ; RV32ZVE32F-NEXT: beqz a0, .LBB42_5
3502 ; RV32ZVE32F-NEXT: .LBB42_14: # %cond.store7
3503 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
3504 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
3505 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
3506 ; RV32ZVE32F-NEXT: sw t1, 0(a0)
3507 ; RV32ZVE32F-NEXT: sw t2, 4(a0)
3508 ; RV32ZVE32F-NEXT: andi a0, t0, 32
3509 ; RV32ZVE32F-NEXT: beqz a0, .LBB42_6
3510 ; RV32ZVE32F-NEXT: .LBB42_15: # %cond.store9
3511 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
3512 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
3513 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
3514 ; RV32ZVE32F-NEXT: sw a6, 0(a0)
3515 ; RV32ZVE32F-NEXT: sw a7, 4(a0)
3516 ; RV32ZVE32F-NEXT: andi a0, t0, 64
3517 ; RV32ZVE32F-NEXT: beqz a0, .LBB42_7
3518 ; RV32ZVE32F-NEXT: .LBB42_16: # %cond.store11
3519 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
3520 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
3521 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
3522 ; RV32ZVE32F-NEXT: sw a4, 0(a0)
3523 ; RV32ZVE32F-NEXT: sw a5, 4(a0)
3524 ; RV32ZVE32F-NEXT: andi a0, t0, -128
3525 ; RV32ZVE32F-NEXT: bnez a0, .LBB42_8
3526 ; RV32ZVE32F-NEXT: j .LBB42_9
3528 ; RV64ZVE32F-LABEL: mscatter_baseidx_v8i8_v8i64:
3529 ; RV64ZVE32F: # %bb.0:
3530 ; RV64ZVE32F-NEXT: ld a4, 40(a0)
3531 ; RV64ZVE32F-NEXT: ld a3, 48(a0)
3532 ; RV64ZVE32F-NEXT: ld a2, 56(a0)
3533 ; RV64ZVE32F-NEXT: ld t1, 8(a0)
3534 ; RV64ZVE32F-NEXT: ld t0, 16(a0)
3535 ; RV64ZVE32F-NEXT: ld a7, 24(a0)
3536 ; RV64ZVE32F-NEXT: ld a6, 32(a0)
3537 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
3538 ; RV64ZVE32F-NEXT: vmv.x.s a5, v0
3539 ; RV64ZVE32F-NEXT: andi t2, a5, 1
3540 ; RV64ZVE32F-NEXT: beqz t2, .LBB42_2
3541 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
3542 ; RV64ZVE32F-NEXT: ld a0, 0(a0)
3543 ; RV64ZVE32F-NEXT: vmv.x.s t2, v8
3544 ; RV64ZVE32F-NEXT: slli t2, t2, 3
3545 ; RV64ZVE32F-NEXT: add t2, a1, t2
3546 ; RV64ZVE32F-NEXT: sd a0, 0(t2)
3547 ; RV64ZVE32F-NEXT: .LBB42_2: # %else
3548 ; RV64ZVE32F-NEXT: andi a0, a5, 2
3549 ; RV64ZVE32F-NEXT: beqz a0, .LBB42_4
3550 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
3551 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
3552 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
3553 ; RV64ZVE32F-NEXT: vmv.x.s a0, v9
3554 ; RV64ZVE32F-NEXT: slli a0, a0, 3
3555 ; RV64ZVE32F-NEXT: add a0, a1, a0
3556 ; RV64ZVE32F-NEXT: sd t1, 0(a0)
3557 ; RV64ZVE32F-NEXT: .LBB42_4: # %else2
3558 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
3559 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
3560 ; RV64ZVE32F-NEXT: andi a0, a5, 4
3561 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
3562 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
3563 ; RV64ZVE32F-NEXT: bnez a0, .LBB42_12
3564 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
3565 ; RV64ZVE32F-NEXT: andi a0, a5, 8
3566 ; RV64ZVE32F-NEXT: bnez a0, .LBB42_13
3567 ; RV64ZVE32F-NEXT: .LBB42_6: # %else6
3568 ; RV64ZVE32F-NEXT: andi a0, a5, 16
3569 ; RV64ZVE32F-NEXT: bnez a0, .LBB42_14
3570 ; RV64ZVE32F-NEXT: .LBB42_7: # %else8
3571 ; RV64ZVE32F-NEXT: andi a0, a5, 32
3572 ; RV64ZVE32F-NEXT: beqz a0, .LBB42_9
3573 ; RV64ZVE32F-NEXT: .LBB42_8: # %cond.store9
3574 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
3575 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
3576 ; RV64ZVE32F-NEXT: slli a0, a0, 3
3577 ; RV64ZVE32F-NEXT: add a0, a1, a0
3578 ; RV64ZVE32F-NEXT: sd a4, 0(a0)
3579 ; RV64ZVE32F-NEXT: .LBB42_9: # %else10
3580 ; RV64ZVE32F-NEXT: andi a0, a5, 64
3581 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
3582 ; RV64ZVE32F-NEXT: bnez a0, .LBB42_15
3583 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
3584 ; RV64ZVE32F-NEXT: andi a0, a5, -128
3585 ; RV64ZVE32F-NEXT: bnez a0, .LBB42_16
3586 ; RV64ZVE32F-NEXT: .LBB42_11: # %else14
3587 ; RV64ZVE32F-NEXT: ret
3588 ; RV64ZVE32F-NEXT: .LBB42_12: # %cond.store3
3589 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
3590 ; RV64ZVE32F-NEXT: slli a0, a0, 3
3591 ; RV64ZVE32F-NEXT: add a0, a1, a0
3592 ; RV64ZVE32F-NEXT: sd t0, 0(a0)
3593 ; RV64ZVE32F-NEXT: andi a0, a5, 8
3594 ; RV64ZVE32F-NEXT: beqz a0, .LBB42_6
3595 ; RV64ZVE32F-NEXT: .LBB42_13: # %cond.store5
3596 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
3597 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
3598 ; RV64ZVE32F-NEXT: slli a0, a0, 3
3599 ; RV64ZVE32F-NEXT: add a0, a1, a0
3600 ; RV64ZVE32F-NEXT: sd a7, 0(a0)
3601 ; RV64ZVE32F-NEXT: andi a0, a5, 16
3602 ; RV64ZVE32F-NEXT: beqz a0, .LBB42_7
3603 ; RV64ZVE32F-NEXT: .LBB42_14: # %cond.store7
3604 ; RV64ZVE32F-NEXT: vmv.x.s a0, v9
3605 ; RV64ZVE32F-NEXT: slli a0, a0, 3
3606 ; RV64ZVE32F-NEXT: add a0, a1, a0
3607 ; RV64ZVE32F-NEXT: sd a6, 0(a0)
3608 ; RV64ZVE32F-NEXT: andi a0, a5, 32
3609 ; RV64ZVE32F-NEXT: bnez a0, .LBB42_8
3610 ; RV64ZVE32F-NEXT: j .LBB42_9
3611 ; RV64ZVE32F-NEXT: .LBB42_15: # %cond.store11
3612 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
3613 ; RV64ZVE32F-NEXT: slli a0, a0, 3
3614 ; RV64ZVE32F-NEXT: add a0, a1, a0
3615 ; RV64ZVE32F-NEXT: sd a3, 0(a0)
3616 ; RV64ZVE32F-NEXT: andi a0, a5, -128
3617 ; RV64ZVE32F-NEXT: beqz a0, .LBB42_11
3618 ; RV64ZVE32F-NEXT: .LBB42_16: # %cond.store13
3619 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
3620 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
3621 ; RV64ZVE32F-NEXT: slli a0, a0, 3
3622 ; RV64ZVE32F-NEXT: add a0, a1, a0
3623 ; RV64ZVE32F-NEXT: sd a2, 0(a0)
3624 ; RV64ZVE32F-NEXT: ret
3625 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i8> %idxs
3626 call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m)
3630 define void @mscatter_baseidx_sext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) {
3631 ; RV32V-LABEL: mscatter_baseidx_sext_v8i8_v8i64:
3633 ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
3634 ; RV32V-NEXT: vsext.vf4 v14, v12
3635 ; RV32V-NEXT: vsll.vi v12, v14, 3
3636 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
3637 ; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t
3640 ; RV64V-LABEL: mscatter_baseidx_sext_v8i8_v8i64:
3642 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
3643 ; RV64V-NEXT: vsext.vf8 v16, v12
3644 ; RV64V-NEXT: vsll.vi v12, v16, 3
3645 ; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t
3648 ; RV32ZVE32F-LABEL: mscatter_baseidx_sext_v8i8_v8i64:
3649 ; RV32ZVE32F: # %bb.0:
3650 ; RV32ZVE32F-NEXT: addi sp, sp, -16
3651 ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
3652 ; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
3653 ; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
3654 ; RV32ZVE32F-NEXT: sw s2, 4(sp) # 4-byte Folded Spill
3655 ; RV32ZVE32F-NEXT: .cfi_offset s0, -4
3656 ; RV32ZVE32F-NEXT: .cfi_offset s1, -8
3657 ; RV32ZVE32F-NEXT: .cfi_offset s2, -12
3658 ; RV32ZVE32F-NEXT: .cfi_remember_state
3659 ; RV32ZVE32F-NEXT: lw a2, 56(a0)
3660 ; RV32ZVE32F-NEXT: lw a3, 60(a0)
3661 ; RV32ZVE32F-NEXT: lw a6, 40(a0)
3662 ; RV32ZVE32F-NEXT: lw a7, 44(a0)
3663 ; RV32ZVE32F-NEXT: lw a4, 48(a0)
3664 ; RV32ZVE32F-NEXT: lw a5, 52(a0)
3665 ; RV32ZVE32F-NEXT: lw t3, 24(a0)
3666 ; RV32ZVE32F-NEXT: lw t4, 28(a0)
3667 ; RV32ZVE32F-NEXT: lw t1, 32(a0)
3668 ; RV32ZVE32F-NEXT: lw t2, 36(a0)
3669 ; RV32ZVE32F-NEXT: lw s0, 8(a0)
3670 ; RV32ZVE32F-NEXT: lw s1, 12(a0)
3671 ; RV32ZVE32F-NEXT: lw t5, 16(a0)
3672 ; RV32ZVE32F-NEXT: lw t6, 20(a0)
3673 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
3674 ; RV32ZVE32F-NEXT: vsext.vf4 v10, v8
3675 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3
3676 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
3677 ; RV32ZVE32F-NEXT: vmv.x.s t0, v0
3678 ; RV32ZVE32F-NEXT: andi s2, t0, 1
3679 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
3680 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
3681 ; RV32ZVE32F-NEXT: bnez s2, .LBB43_10
3682 ; RV32ZVE32F-NEXT: # %bb.1: # %else
3683 ; RV32ZVE32F-NEXT: andi a0, t0, 2
3684 ; RV32ZVE32F-NEXT: bnez a0, .LBB43_11
3685 ; RV32ZVE32F-NEXT: .LBB43_2: # %else2
3686 ; RV32ZVE32F-NEXT: andi a0, t0, 4
3687 ; RV32ZVE32F-NEXT: bnez a0, .LBB43_12
3688 ; RV32ZVE32F-NEXT: .LBB43_3: # %else4
3689 ; RV32ZVE32F-NEXT: andi a0, t0, 8
3690 ; RV32ZVE32F-NEXT: bnez a0, .LBB43_13
3691 ; RV32ZVE32F-NEXT: .LBB43_4: # %else6
3692 ; RV32ZVE32F-NEXT: andi a0, t0, 16
3693 ; RV32ZVE32F-NEXT: bnez a0, .LBB43_14
3694 ; RV32ZVE32F-NEXT: .LBB43_5: # %else8
3695 ; RV32ZVE32F-NEXT: andi a0, t0, 32
3696 ; RV32ZVE32F-NEXT: bnez a0, .LBB43_15
3697 ; RV32ZVE32F-NEXT: .LBB43_6: # %else10
3698 ; RV32ZVE32F-NEXT: andi a0, t0, 64
3699 ; RV32ZVE32F-NEXT: bnez a0, .LBB43_16
3700 ; RV32ZVE32F-NEXT: .LBB43_7: # %else12
3701 ; RV32ZVE32F-NEXT: andi a0, t0, -128
3702 ; RV32ZVE32F-NEXT: beqz a0, .LBB43_9
3703 ; RV32ZVE32F-NEXT: .LBB43_8: # %cond.store13
3704 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
3705 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
3706 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
3707 ; RV32ZVE32F-NEXT: sw a2, 0(a0)
3708 ; RV32ZVE32F-NEXT: sw a3, 4(a0)
3709 ; RV32ZVE32F-NEXT: .LBB43_9: # %else14
3710 ; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
3711 ; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
3712 ; RV32ZVE32F-NEXT: lw s2, 4(sp) # 4-byte Folded Reload
3713 ; RV32ZVE32F-NEXT: .cfi_restore s0
3714 ; RV32ZVE32F-NEXT: .cfi_restore s1
3715 ; RV32ZVE32F-NEXT: .cfi_restore s2
3716 ; RV32ZVE32F-NEXT: addi sp, sp, 16
3717 ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 0
3718 ; RV32ZVE32F-NEXT: ret
3719 ; RV32ZVE32F-NEXT: .LBB43_10: # %cond.store
3720 ; RV32ZVE32F-NEXT: .cfi_restore_state
3721 ; RV32ZVE32F-NEXT: lw a1, 0(a0)
3722 ; RV32ZVE32F-NEXT: lw a0, 4(a0)
3723 ; RV32ZVE32F-NEXT: vmv.x.s s2, v8
3724 ; RV32ZVE32F-NEXT: sw a1, 0(s2)
3725 ; RV32ZVE32F-NEXT: sw a0, 4(s2)
3726 ; RV32ZVE32F-NEXT: andi a0, t0, 2
3727 ; RV32ZVE32F-NEXT: beqz a0, .LBB43_2
3728 ; RV32ZVE32F-NEXT: .LBB43_11: # %cond.store1
3729 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
3730 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
3731 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
3732 ; RV32ZVE32F-NEXT: sw s0, 0(a0)
3733 ; RV32ZVE32F-NEXT: sw s1, 4(a0)
3734 ; RV32ZVE32F-NEXT: andi a0, t0, 4
3735 ; RV32ZVE32F-NEXT: beqz a0, .LBB43_3
3736 ; RV32ZVE32F-NEXT: .LBB43_12: # %cond.store3
3737 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
3738 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
3739 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
3740 ; RV32ZVE32F-NEXT: sw t5, 0(a0)
3741 ; RV32ZVE32F-NEXT: sw t6, 4(a0)
3742 ; RV32ZVE32F-NEXT: andi a0, t0, 8
3743 ; RV32ZVE32F-NEXT: beqz a0, .LBB43_4
3744 ; RV32ZVE32F-NEXT: .LBB43_13: # %cond.store5
3745 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
3746 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
3747 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
3748 ; RV32ZVE32F-NEXT: sw t3, 0(a0)
3749 ; RV32ZVE32F-NEXT: sw t4, 4(a0)
3750 ; RV32ZVE32F-NEXT: andi a0, t0, 16
3751 ; RV32ZVE32F-NEXT: beqz a0, .LBB43_5
3752 ; RV32ZVE32F-NEXT: .LBB43_14: # %cond.store7
3753 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
3754 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
3755 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
3756 ; RV32ZVE32F-NEXT: sw t1, 0(a0)
3757 ; RV32ZVE32F-NEXT: sw t2, 4(a0)
3758 ; RV32ZVE32F-NEXT: andi a0, t0, 32
3759 ; RV32ZVE32F-NEXT: beqz a0, .LBB43_6
3760 ; RV32ZVE32F-NEXT: .LBB43_15: # %cond.store9
3761 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
3762 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
3763 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
3764 ; RV32ZVE32F-NEXT: sw a6, 0(a0)
3765 ; RV32ZVE32F-NEXT: sw a7, 4(a0)
3766 ; RV32ZVE32F-NEXT: andi a0, t0, 64
3767 ; RV32ZVE32F-NEXT: beqz a0, .LBB43_7
3768 ; RV32ZVE32F-NEXT: .LBB43_16: # %cond.store11
3769 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
3770 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
3771 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
3772 ; RV32ZVE32F-NEXT: sw a4, 0(a0)
3773 ; RV32ZVE32F-NEXT: sw a5, 4(a0)
3774 ; RV32ZVE32F-NEXT: andi a0, t0, -128
3775 ; RV32ZVE32F-NEXT: bnez a0, .LBB43_8
3776 ; RV32ZVE32F-NEXT: j .LBB43_9
3778 ; RV64ZVE32F-LABEL: mscatter_baseidx_sext_v8i8_v8i64:
3779 ; RV64ZVE32F: # %bb.0:
3780 ; RV64ZVE32F-NEXT: ld a4, 40(a0)
3781 ; RV64ZVE32F-NEXT: ld a3, 48(a0)
3782 ; RV64ZVE32F-NEXT: ld a2, 56(a0)
3783 ; RV64ZVE32F-NEXT: ld t1, 8(a0)
3784 ; RV64ZVE32F-NEXT: ld t0, 16(a0)
3785 ; RV64ZVE32F-NEXT: ld a7, 24(a0)
3786 ; RV64ZVE32F-NEXT: ld a6, 32(a0)
3787 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
3788 ; RV64ZVE32F-NEXT: vmv.x.s a5, v0
3789 ; RV64ZVE32F-NEXT: andi t2, a5, 1
3790 ; RV64ZVE32F-NEXT: beqz t2, .LBB43_2
3791 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
3792 ; RV64ZVE32F-NEXT: ld a0, 0(a0)
3793 ; RV64ZVE32F-NEXT: vmv.x.s t2, v8
3794 ; RV64ZVE32F-NEXT: slli t2, t2, 3
3795 ; RV64ZVE32F-NEXT: add t2, a1, t2
3796 ; RV64ZVE32F-NEXT: sd a0, 0(t2)
3797 ; RV64ZVE32F-NEXT: .LBB43_2: # %else
3798 ; RV64ZVE32F-NEXT: andi a0, a5, 2
3799 ; RV64ZVE32F-NEXT: beqz a0, .LBB43_4
3800 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
3801 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
3802 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
3803 ; RV64ZVE32F-NEXT: vmv.x.s a0, v9
3804 ; RV64ZVE32F-NEXT: slli a0, a0, 3
3805 ; RV64ZVE32F-NEXT: add a0, a1, a0
3806 ; RV64ZVE32F-NEXT: sd t1, 0(a0)
3807 ; RV64ZVE32F-NEXT: .LBB43_4: # %else2
3808 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
3809 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
3810 ; RV64ZVE32F-NEXT: andi a0, a5, 4
3811 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
3812 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
3813 ; RV64ZVE32F-NEXT: bnez a0, .LBB43_12
3814 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
3815 ; RV64ZVE32F-NEXT: andi a0, a5, 8
3816 ; RV64ZVE32F-NEXT: bnez a0, .LBB43_13
3817 ; RV64ZVE32F-NEXT: .LBB43_6: # %else6
3818 ; RV64ZVE32F-NEXT: andi a0, a5, 16
3819 ; RV64ZVE32F-NEXT: bnez a0, .LBB43_14
3820 ; RV64ZVE32F-NEXT: .LBB43_7: # %else8
3821 ; RV64ZVE32F-NEXT: andi a0, a5, 32
3822 ; RV64ZVE32F-NEXT: beqz a0, .LBB43_9
3823 ; RV64ZVE32F-NEXT: .LBB43_8: # %cond.store9
3824 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
3825 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
3826 ; RV64ZVE32F-NEXT: slli a0, a0, 3
3827 ; RV64ZVE32F-NEXT: add a0, a1, a0
3828 ; RV64ZVE32F-NEXT: sd a4, 0(a0)
3829 ; RV64ZVE32F-NEXT: .LBB43_9: # %else10
3830 ; RV64ZVE32F-NEXT: andi a0, a5, 64
3831 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
3832 ; RV64ZVE32F-NEXT: bnez a0, .LBB43_15
3833 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
3834 ; RV64ZVE32F-NEXT: andi a0, a5, -128
3835 ; RV64ZVE32F-NEXT: bnez a0, .LBB43_16
3836 ; RV64ZVE32F-NEXT: .LBB43_11: # %else14
3837 ; RV64ZVE32F-NEXT: ret
3838 ; RV64ZVE32F-NEXT: .LBB43_12: # %cond.store3
3839 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
3840 ; RV64ZVE32F-NEXT: slli a0, a0, 3
3841 ; RV64ZVE32F-NEXT: add a0, a1, a0
3842 ; RV64ZVE32F-NEXT: sd t0, 0(a0)
3843 ; RV64ZVE32F-NEXT: andi a0, a5, 8
3844 ; RV64ZVE32F-NEXT: beqz a0, .LBB43_6
3845 ; RV64ZVE32F-NEXT: .LBB43_13: # %cond.store5
3846 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
3847 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
3848 ; RV64ZVE32F-NEXT: slli a0, a0, 3
3849 ; RV64ZVE32F-NEXT: add a0, a1, a0
3850 ; RV64ZVE32F-NEXT: sd a7, 0(a0)
3851 ; RV64ZVE32F-NEXT: andi a0, a5, 16
3852 ; RV64ZVE32F-NEXT: beqz a0, .LBB43_7
3853 ; RV64ZVE32F-NEXT: .LBB43_14: # %cond.store7
3854 ; RV64ZVE32F-NEXT: vmv.x.s a0, v9
3855 ; RV64ZVE32F-NEXT: slli a0, a0, 3
3856 ; RV64ZVE32F-NEXT: add a0, a1, a0
3857 ; RV64ZVE32F-NEXT: sd a6, 0(a0)
3858 ; RV64ZVE32F-NEXT: andi a0, a5, 32
3859 ; RV64ZVE32F-NEXT: bnez a0, .LBB43_8
3860 ; RV64ZVE32F-NEXT: j .LBB43_9
3861 ; RV64ZVE32F-NEXT: .LBB43_15: # %cond.store11
3862 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
3863 ; RV64ZVE32F-NEXT: slli a0, a0, 3
3864 ; RV64ZVE32F-NEXT: add a0, a1, a0
3865 ; RV64ZVE32F-NEXT: sd a3, 0(a0)
3866 ; RV64ZVE32F-NEXT: andi a0, a5, -128
3867 ; RV64ZVE32F-NEXT: beqz a0, .LBB43_11
3868 ; RV64ZVE32F-NEXT: .LBB43_16: # %cond.store13
3869 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
3870 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
3871 ; RV64ZVE32F-NEXT: slli a0, a0, 3
3872 ; RV64ZVE32F-NEXT: add a0, a1, a0
3873 ; RV64ZVE32F-NEXT: sd a2, 0(a0)
3874 ; RV64ZVE32F-NEXT: ret
3875 %eidxs = sext <8 x i8> %idxs to <8 x i64>
3876 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs
3877 call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m)
3881 define void @mscatter_baseidx_zext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) {
3882 ; RV32V-LABEL: mscatter_baseidx_zext_v8i8_v8i64:
3884 ; RV32V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
3885 ; RV32V-NEXT: vzext.vf2 v13, v12
3886 ; RV32V-NEXT: vsll.vi v12, v13, 3
3887 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
3888 ; RV32V-NEXT: vsoxei16.v v8, (a0), v12, v0.t
3891 ; RV64V-LABEL: mscatter_baseidx_zext_v8i8_v8i64:
3893 ; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
3894 ; RV64V-NEXT: vzext.vf2 v13, v12
3895 ; RV64V-NEXT: vsll.vi v12, v13, 3
3896 ; RV64V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
3897 ; RV64V-NEXT: vsoxei16.v v8, (a0), v12, v0.t
3900 ; RV32ZVE32F-LABEL: mscatter_baseidx_zext_v8i8_v8i64:
3901 ; RV32ZVE32F: # %bb.0:
3902 ; RV32ZVE32F-NEXT: addi sp, sp, -16
3903 ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
3904 ; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
3905 ; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
3906 ; RV32ZVE32F-NEXT: sw s2, 4(sp) # 4-byte Folded Spill
3907 ; RV32ZVE32F-NEXT: .cfi_offset s0, -4
3908 ; RV32ZVE32F-NEXT: .cfi_offset s1, -8
3909 ; RV32ZVE32F-NEXT: .cfi_offset s2, -12
3910 ; RV32ZVE32F-NEXT: .cfi_remember_state
3911 ; RV32ZVE32F-NEXT: lw a2, 56(a0)
3912 ; RV32ZVE32F-NEXT: lw a3, 60(a0)
3913 ; RV32ZVE32F-NEXT: lw a6, 40(a0)
3914 ; RV32ZVE32F-NEXT: lw a7, 44(a0)
3915 ; RV32ZVE32F-NEXT: lw a4, 48(a0)
3916 ; RV32ZVE32F-NEXT: lw a5, 52(a0)
3917 ; RV32ZVE32F-NEXT: lw t3, 24(a0)
3918 ; RV32ZVE32F-NEXT: lw t4, 28(a0)
3919 ; RV32ZVE32F-NEXT: lw t1, 32(a0)
3920 ; RV32ZVE32F-NEXT: lw t2, 36(a0)
3921 ; RV32ZVE32F-NEXT: lw s0, 8(a0)
3922 ; RV32ZVE32F-NEXT: lw s1, 12(a0)
3923 ; RV32ZVE32F-NEXT: lw t5, 16(a0)
3924 ; RV32ZVE32F-NEXT: lw t6, 20(a0)
3925 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
3926 ; RV32ZVE32F-NEXT: vzext.vf4 v10, v8
3927 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3
3928 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
3929 ; RV32ZVE32F-NEXT: vmv.x.s t0, v0
3930 ; RV32ZVE32F-NEXT: andi s2, t0, 1
3931 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
3932 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
3933 ; RV32ZVE32F-NEXT: bnez s2, .LBB44_10
3934 ; RV32ZVE32F-NEXT: # %bb.1: # %else
3935 ; RV32ZVE32F-NEXT: andi a0, t0, 2
3936 ; RV32ZVE32F-NEXT: bnez a0, .LBB44_11
3937 ; RV32ZVE32F-NEXT: .LBB44_2: # %else2
3938 ; RV32ZVE32F-NEXT: andi a0, t0, 4
3939 ; RV32ZVE32F-NEXT: bnez a0, .LBB44_12
3940 ; RV32ZVE32F-NEXT: .LBB44_3: # %else4
3941 ; RV32ZVE32F-NEXT: andi a0, t0, 8
3942 ; RV32ZVE32F-NEXT: bnez a0, .LBB44_13
3943 ; RV32ZVE32F-NEXT: .LBB44_4: # %else6
3944 ; RV32ZVE32F-NEXT: andi a0, t0, 16
3945 ; RV32ZVE32F-NEXT: bnez a0, .LBB44_14
3946 ; RV32ZVE32F-NEXT: .LBB44_5: # %else8
3947 ; RV32ZVE32F-NEXT: andi a0, t0, 32
3948 ; RV32ZVE32F-NEXT: bnez a0, .LBB44_15
3949 ; RV32ZVE32F-NEXT: .LBB44_6: # %else10
3950 ; RV32ZVE32F-NEXT: andi a0, t0, 64
3951 ; RV32ZVE32F-NEXT: bnez a0, .LBB44_16
3952 ; RV32ZVE32F-NEXT: .LBB44_7: # %else12
3953 ; RV32ZVE32F-NEXT: andi a0, t0, -128
3954 ; RV32ZVE32F-NEXT: beqz a0, .LBB44_9
3955 ; RV32ZVE32F-NEXT: .LBB44_8: # %cond.store13
3956 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
3957 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
3958 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
3959 ; RV32ZVE32F-NEXT: sw a2, 0(a0)
3960 ; RV32ZVE32F-NEXT: sw a3, 4(a0)
3961 ; RV32ZVE32F-NEXT: .LBB44_9: # %else14
3962 ; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
3963 ; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
3964 ; RV32ZVE32F-NEXT: lw s2, 4(sp) # 4-byte Folded Reload
3965 ; RV32ZVE32F-NEXT: .cfi_restore s0
3966 ; RV32ZVE32F-NEXT: .cfi_restore s1
3967 ; RV32ZVE32F-NEXT: .cfi_restore s2
3968 ; RV32ZVE32F-NEXT: addi sp, sp, 16
3969 ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 0
3970 ; RV32ZVE32F-NEXT: ret
3971 ; RV32ZVE32F-NEXT: .LBB44_10: # %cond.store
3972 ; RV32ZVE32F-NEXT: .cfi_restore_state
3973 ; RV32ZVE32F-NEXT: lw a1, 0(a0)
3974 ; RV32ZVE32F-NEXT: lw a0, 4(a0)
3975 ; RV32ZVE32F-NEXT: vmv.x.s s2, v8
3976 ; RV32ZVE32F-NEXT: sw a1, 0(s2)
3977 ; RV32ZVE32F-NEXT: sw a0, 4(s2)
3978 ; RV32ZVE32F-NEXT: andi a0, t0, 2
3979 ; RV32ZVE32F-NEXT: beqz a0, .LBB44_2
3980 ; RV32ZVE32F-NEXT: .LBB44_11: # %cond.store1
3981 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
3982 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
3983 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
3984 ; RV32ZVE32F-NEXT: sw s0, 0(a0)
3985 ; RV32ZVE32F-NEXT: sw s1, 4(a0)
3986 ; RV32ZVE32F-NEXT: andi a0, t0, 4
3987 ; RV32ZVE32F-NEXT: beqz a0, .LBB44_3
3988 ; RV32ZVE32F-NEXT: .LBB44_12: # %cond.store3
3989 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
3990 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
3991 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
3992 ; RV32ZVE32F-NEXT: sw t5, 0(a0)
3993 ; RV32ZVE32F-NEXT: sw t6, 4(a0)
3994 ; RV32ZVE32F-NEXT: andi a0, t0, 8
3995 ; RV32ZVE32F-NEXT: beqz a0, .LBB44_4
3996 ; RV32ZVE32F-NEXT: .LBB44_13: # %cond.store5
3997 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
3998 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
3999 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
4000 ; RV32ZVE32F-NEXT: sw t3, 0(a0)
4001 ; RV32ZVE32F-NEXT: sw t4, 4(a0)
4002 ; RV32ZVE32F-NEXT: andi a0, t0, 16
4003 ; RV32ZVE32F-NEXT: beqz a0, .LBB44_5
4004 ; RV32ZVE32F-NEXT: .LBB44_14: # %cond.store7
4005 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
4006 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
4007 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
4008 ; RV32ZVE32F-NEXT: sw t1, 0(a0)
4009 ; RV32ZVE32F-NEXT: sw t2, 4(a0)
4010 ; RV32ZVE32F-NEXT: andi a0, t0, 32
4011 ; RV32ZVE32F-NEXT: beqz a0, .LBB44_6
4012 ; RV32ZVE32F-NEXT: .LBB44_15: # %cond.store9
4013 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
4014 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
4015 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
4016 ; RV32ZVE32F-NEXT: sw a6, 0(a0)
4017 ; RV32ZVE32F-NEXT: sw a7, 4(a0)
4018 ; RV32ZVE32F-NEXT: andi a0, t0, 64
4019 ; RV32ZVE32F-NEXT: beqz a0, .LBB44_7
4020 ; RV32ZVE32F-NEXT: .LBB44_16: # %cond.store11
4021 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
4022 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
4023 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
4024 ; RV32ZVE32F-NEXT: sw a4, 0(a0)
4025 ; RV32ZVE32F-NEXT: sw a5, 4(a0)
4026 ; RV32ZVE32F-NEXT: andi a0, t0, -128
4027 ; RV32ZVE32F-NEXT: bnez a0, .LBB44_8
4028 ; RV32ZVE32F-NEXT: j .LBB44_9
4030 ; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i8_v8i64:
4031 ; RV64ZVE32F: # %bb.0:
4032 ; RV64ZVE32F-NEXT: ld a4, 40(a0)
4033 ; RV64ZVE32F-NEXT: ld a3, 48(a0)
4034 ; RV64ZVE32F-NEXT: ld a2, 56(a0)
4035 ; RV64ZVE32F-NEXT: ld t1, 8(a0)
4036 ; RV64ZVE32F-NEXT: ld t0, 16(a0)
4037 ; RV64ZVE32F-NEXT: ld a7, 24(a0)
4038 ; RV64ZVE32F-NEXT: ld a6, 32(a0)
4039 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
4040 ; RV64ZVE32F-NEXT: vmv.x.s a5, v0
4041 ; RV64ZVE32F-NEXT: andi t2, a5, 1
4042 ; RV64ZVE32F-NEXT: beqz t2, .LBB44_2
4043 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
4044 ; RV64ZVE32F-NEXT: ld a0, 0(a0)
4045 ; RV64ZVE32F-NEXT: vmv.x.s t2, v8
4046 ; RV64ZVE32F-NEXT: andi t2, t2, 255
4047 ; RV64ZVE32F-NEXT: slli t2, t2, 3
4048 ; RV64ZVE32F-NEXT: add t2, a1, t2
4049 ; RV64ZVE32F-NEXT: sd a0, 0(t2)
4050 ; RV64ZVE32F-NEXT: .LBB44_2: # %else
4051 ; RV64ZVE32F-NEXT: andi a0, a5, 2
4052 ; RV64ZVE32F-NEXT: beqz a0, .LBB44_4
4053 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
4054 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
4055 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
4056 ; RV64ZVE32F-NEXT: vmv.x.s a0, v9
4057 ; RV64ZVE32F-NEXT: andi a0, a0, 255
4058 ; RV64ZVE32F-NEXT: slli a0, a0, 3
4059 ; RV64ZVE32F-NEXT: add a0, a1, a0
4060 ; RV64ZVE32F-NEXT: sd t1, 0(a0)
4061 ; RV64ZVE32F-NEXT: .LBB44_4: # %else2
4062 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
4063 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
4064 ; RV64ZVE32F-NEXT: andi a0, a5, 4
4065 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
4066 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
4067 ; RV64ZVE32F-NEXT: bnez a0, .LBB44_12
4068 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
4069 ; RV64ZVE32F-NEXT: andi a0, a5, 8
4070 ; RV64ZVE32F-NEXT: bnez a0, .LBB44_13
4071 ; RV64ZVE32F-NEXT: .LBB44_6: # %else6
4072 ; RV64ZVE32F-NEXT: andi a0, a5, 16
4073 ; RV64ZVE32F-NEXT: bnez a0, .LBB44_14
4074 ; RV64ZVE32F-NEXT: .LBB44_7: # %else8
4075 ; RV64ZVE32F-NEXT: andi a0, a5, 32
4076 ; RV64ZVE32F-NEXT: beqz a0, .LBB44_9
4077 ; RV64ZVE32F-NEXT: .LBB44_8: # %cond.store9
4078 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
4079 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
4080 ; RV64ZVE32F-NEXT: andi a0, a0, 255
4081 ; RV64ZVE32F-NEXT: slli a0, a0, 3
4082 ; RV64ZVE32F-NEXT: add a0, a1, a0
4083 ; RV64ZVE32F-NEXT: sd a4, 0(a0)
4084 ; RV64ZVE32F-NEXT: .LBB44_9: # %else10
4085 ; RV64ZVE32F-NEXT: andi a0, a5, 64
4086 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
4087 ; RV64ZVE32F-NEXT: bnez a0, .LBB44_15
4088 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
4089 ; RV64ZVE32F-NEXT: andi a0, a5, -128
4090 ; RV64ZVE32F-NEXT: bnez a0, .LBB44_16
4091 ; RV64ZVE32F-NEXT: .LBB44_11: # %else14
4092 ; RV64ZVE32F-NEXT: ret
4093 ; RV64ZVE32F-NEXT: .LBB44_12: # %cond.store3
4094 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
4095 ; RV64ZVE32F-NEXT: andi a0, a0, 255
4096 ; RV64ZVE32F-NEXT: slli a0, a0, 3
4097 ; RV64ZVE32F-NEXT: add a0, a1, a0
4098 ; RV64ZVE32F-NEXT: sd t0, 0(a0)
4099 ; RV64ZVE32F-NEXT: andi a0, a5, 8
4100 ; RV64ZVE32F-NEXT: beqz a0, .LBB44_6
4101 ; RV64ZVE32F-NEXT: .LBB44_13: # %cond.store5
4102 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
4103 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
4104 ; RV64ZVE32F-NEXT: andi a0, a0, 255
4105 ; RV64ZVE32F-NEXT: slli a0, a0, 3
4106 ; RV64ZVE32F-NEXT: add a0, a1, a0
4107 ; RV64ZVE32F-NEXT: sd a7, 0(a0)
4108 ; RV64ZVE32F-NEXT: andi a0, a5, 16
4109 ; RV64ZVE32F-NEXT: beqz a0, .LBB44_7
4110 ; RV64ZVE32F-NEXT: .LBB44_14: # %cond.store7
4111 ; RV64ZVE32F-NEXT: vmv.x.s a0, v9
4112 ; RV64ZVE32F-NEXT: andi a0, a0, 255
4113 ; RV64ZVE32F-NEXT: slli a0, a0, 3
4114 ; RV64ZVE32F-NEXT: add a0, a1, a0
4115 ; RV64ZVE32F-NEXT: sd a6, 0(a0)
4116 ; RV64ZVE32F-NEXT: andi a0, a5, 32
4117 ; RV64ZVE32F-NEXT: bnez a0, .LBB44_8
4118 ; RV64ZVE32F-NEXT: j .LBB44_9
4119 ; RV64ZVE32F-NEXT: .LBB44_15: # %cond.store11
4120 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
4121 ; RV64ZVE32F-NEXT: andi a0, a0, 255
4122 ; RV64ZVE32F-NEXT: slli a0, a0, 3
4123 ; RV64ZVE32F-NEXT: add a0, a1, a0
4124 ; RV64ZVE32F-NEXT: sd a3, 0(a0)
4125 ; RV64ZVE32F-NEXT: andi a0, a5, -128
4126 ; RV64ZVE32F-NEXT: beqz a0, .LBB44_11
4127 ; RV64ZVE32F-NEXT: .LBB44_16: # %cond.store13
4128 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
4129 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
4130 ; RV64ZVE32F-NEXT: andi a0, a0, 255
4131 ; RV64ZVE32F-NEXT: slli a0, a0, 3
4132 ; RV64ZVE32F-NEXT: add a0, a1, a0
4133 ; RV64ZVE32F-NEXT: sd a2, 0(a0)
4134 ; RV64ZVE32F-NEXT: ret
4135 %eidxs = zext <8 x i8> %idxs to <8 x i64>
4136 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs
4137 call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m)
4141 define void @mscatter_baseidx_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m) {
4142 ; RV32V-LABEL: mscatter_baseidx_v8i16_v8i64:
4144 ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
4145 ; RV32V-NEXT: vsext.vf2 v14, v12
4146 ; RV32V-NEXT: vsll.vi v12, v14, 3
4147 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
4148 ; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t
4151 ; RV64V-LABEL: mscatter_baseidx_v8i16_v8i64:
4153 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
4154 ; RV64V-NEXT: vsext.vf4 v16, v12
4155 ; RV64V-NEXT: vsll.vi v12, v16, 3
4156 ; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t
4159 ; RV32ZVE32F-LABEL: mscatter_baseidx_v8i16_v8i64:
4160 ; RV32ZVE32F: # %bb.0:
4161 ; RV32ZVE32F-NEXT: addi sp, sp, -16
4162 ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
4163 ; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
4164 ; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
4165 ; RV32ZVE32F-NEXT: sw s2, 4(sp) # 4-byte Folded Spill
4166 ; RV32ZVE32F-NEXT: .cfi_offset s0, -4
4167 ; RV32ZVE32F-NEXT: .cfi_offset s1, -8
4168 ; RV32ZVE32F-NEXT: .cfi_offset s2, -12
4169 ; RV32ZVE32F-NEXT: .cfi_remember_state
4170 ; RV32ZVE32F-NEXT: lw a2, 56(a0)
4171 ; RV32ZVE32F-NEXT: lw a3, 60(a0)
4172 ; RV32ZVE32F-NEXT: lw a6, 40(a0)
4173 ; RV32ZVE32F-NEXT: lw a7, 44(a0)
4174 ; RV32ZVE32F-NEXT: lw a4, 48(a0)
4175 ; RV32ZVE32F-NEXT: lw a5, 52(a0)
4176 ; RV32ZVE32F-NEXT: lw t3, 24(a0)
4177 ; RV32ZVE32F-NEXT: lw t4, 28(a0)
4178 ; RV32ZVE32F-NEXT: lw t1, 32(a0)
4179 ; RV32ZVE32F-NEXT: lw t2, 36(a0)
4180 ; RV32ZVE32F-NEXT: lw s0, 8(a0)
4181 ; RV32ZVE32F-NEXT: lw s1, 12(a0)
4182 ; RV32ZVE32F-NEXT: lw t5, 16(a0)
4183 ; RV32ZVE32F-NEXT: lw t6, 20(a0)
4184 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
4185 ; RV32ZVE32F-NEXT: vsext.vf2 v10, v8
4186 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3
4187 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
4188 ; RV32ZVE32F-NEXT: vmv.x.s t0, v0
4189 ; RV32ZVE32F-NEXT: andi s2, t0, 1
4190 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
4191 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
4192 ; RV32ZVE32F-NEXT: bnez s2, .LBB45_10
4193 ; RV32ZVE32F-NEXT: # %bb.1: # %else
4194 ; RV32ZVE32F-NEXT: andi a0, t0, 2
4195 ; RV32ZVE32F-NEXT: bnez a0, .LBB45_11
4196 ; RV32ZVE32F-NEXT: .LBB45_2: # %else2
4197 ; RV32ZVE32F-NEXT: andi a0, t0, 4
4198 ; RV32ZVE32F-NEXT: bnez a0, .LBB45_12
4199 ; RV32ZVE32F-NEXT: .LBB45_3: # %else4
4200 ; RV32ZVE32F-NEXT: andi a0, t0, 8
4201 ; RV32ZVE32F-NEXT: bnez a0, .LBB45_13
4202 ; RV32ZVE32F-NEXT: .LBB45_4: # %else6
4203 ; RV32ZVE32F-NEXT: andi a0, t0, 16
4204 ; RV32ZVE32F-NEXT: bnez a0, .LBB45_14
4205 ; RV32ZVE32F-NEXT: .LBB45_5: # %else8
4206 ; RV32ZVE32F-NEXT: andi a0, t0, 32
4207 ; RV32ZVE32F-NEXT: bnez a0, .LBB45_15
4208 ; RV32ZVE32F-NEXT: .LBB45_6: # %else10
4209 ; RV32ZVE32F-NEXT: andi a0, t0, 64
4210 ; RV32ZVE32F-NEXT: bnez a0, .LBB45_16
4211 ; RV32ZVE32F-NEXT: .LBB45_7: # %else12
4212 ; RV32ZVE32F-NEXT: andi a0, t0, -128
4213 ; RV32ZVE32F-NEXT: beqz a0, .LBB45_9
4214 ; RV32ZVE32F-NEXT: .LBB45_8: # %cond.store13
4215 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
4216 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
4217 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
4218 ; RV32ZVE32F-NEXT: sw a2, 0(a0)
4219 ; RV32ZVE32F-NEXT: sw a3, 4(a0)
4220 ; RV32ZVE32F-NEXT: .LBB45_9: # %else14
4221 ; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
4222 ; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
4223 ; RV32ZVE32F-NEXT: lw s2, 4(sp) # 4-byte Folded Reload
4224 ; RV32ZVE32F-NEXT: .cfi_restore s0
4225 ; RV32ZVE32F-NEXT: .cfi_restore s1
4226 ; RV32ZVE32F-NEXT: .cfi_restore s2
4227 ; RV32ZVE32F-NEXT: addi sp, sp, 16
4228 ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 0
4229 ; RV32ZVE32F-NEXT: ret
4230 ; RV32ZVE32F-NEXT: .LBB45_10: # %cond.store
4231 ; RV32ZVE32F-NEXT: .cfi_restore_state
4232 ; RV32ZVE32F-NEXT: lw a1, 0(a0)
4233 ; RV32ZVE32F-NEXT: lw a0, 4(a0)
4234 ; RV32ZVE32F-NEXT: vmv.x.s s2, v8
4235 ; RV32ZVE32F-NEXT: sw a1, 0(s2)
4236 ; RV32ZVE32F-NEXT: sw a0, 4(s2)
4237 ; RV32ZVE32F-NEXT: andi a0, t0, 2
4238 ; RV32ZVE32F-NEXT: beqz a0, .LBB45_2
4239 ; RV32ZVE32F-NEXT: .LBB45_11: # %cond.store1
4240 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
4241 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
4242 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
4243 ; RV32ZVE32F-NEXT: sw s0, 0(a0)
4244 ; RV32ZVE32F-NEXT: sw s1, 4(a0)
4245 ; RV32ZVE32F-NEXT: andi a0, t0, 4
4246 ; RV32ZVE32F-NEXT: beqz a0, .LBB45_3
4247 ; RV32ZVE32F-NEXT: .LBB45_12: # %cond.store3
4248 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
4249 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
4250 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
4251 ; RV32ZVE32F-NEXT: sw t5, 0(a0)
4252 ; RV32ZVE32F-NEXT: sw t6, 4(a0)
4253 ; RV32ZVE32F-NEXT: andi a0, t0, 8
4254 ; RV32ZVE32F-NEXT: beqz a0, .LBB45_4
4255 ; RV32ZVE32F-NEXT: .LBB45_13: # %cond.store5
4256 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
4257 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
4258 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
4259 ; RV32ZVE32F-NEXT: sw t3, 0(a0)
4260 ; RV32ZVE32F-NEXT: sw t4, 4(a0)
4261 ; RV32ZVE32F-NEXT: andi a0, t0, 16
4262 ; RV32ZVE32F-NEXT: beqz a0, .LBB45_5
4263 ; RV32ZVE32F-NEXT: .LBB45_14: # %cond.store7
4264 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
4265 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
4266 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
4267 ; RV32ZVE32F-NEXT: sw t1, 0(a0)
4268 ; RV32ZVE32F-NEXT: sw t2, 4(a0)
4269 ; RV32ZVE32F-NEXT: andi a0, t0, 32
4270 ; RV32ZVE32F-NEXT: beqz a0, .LBB45_6
4271 ; RV32ZVE32F-NEXT: .LBB45_15: # %cond.store9
4272 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
4273 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
4274 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
4275 ; RV32ZVE32F-NEXT: sw a6, 0(a0)
4276 ; RV32ZVE32F-NEXT: sw a7, 4(a0)
4277 ; RV32ZVE32F-NEXT: andi a0, t0, 64
4278 ; RV32ZVE32F-NEXT: beqz a0, .LBB45_7
4279 ; RV32ZVE32F-NEXT: .LBB45_16: # %cond.store11
4280 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
4281 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
4282 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
4283 ; RV32ZVE32F-NEXT: sw a4, 0(a0)
4284 ; RV32ZVE32F-NEXT: sw a5, 4(a0)
4285 ; RV32ZVE32F-NEXT: andi a0, t0, -128
4286 ; RV32ZVE32F-NEXT: bnez a0, .LBB45_8
4287 ; RV32ZVE32F-NEXT: j .LBB45_9
4289 ; RV64ZVE32F-LABEL: mscatter_baseidx_v8i16_v8i64:
4290 ; RV64ZVE32F: # %bb.0:
4291 ; RV64ZVE32F-NEXT: ld a4, 40(a0)
4292 ; RV64ZVE32F-NEXT: ld a3, 48(a0)
4293 ; RV64ZVE32F-NEXT: ld a2, 56(a0)
4294 ; RV64ZVE32F-NEXT: ld t1, 8(a0)
4295 ; RV64ZVE32F-NEXT: ld t0, 16(a0)
4296 ; RV64ZVE32F-NEXT: ld a7, 24(a0)
4297 ; RV64ZVE32F-NEXT: ld a6, 32(a0)
4298 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
4299 ; RV64ZVE32F-NEXT: vmv.x.s a5, v0
4300 ; RV64ZVE32F-NEXT: andi t2, a5, 1
4301 ; RV64ZVE32F-NEXT: beqz t2, .LBB45_2
4302 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
4303 ; RV64ZVE32F-NEXT: ld a0, 0(a0)
4304 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
4305 ; RV64ZVE32F-NEXT: vmv.x.s t2, v8
4306 ; RV64ZVE32F-NEXT: slli t2, t2, 3
4307 ; RV64ZVE32F-NEXT: add t2, a1, t2
4308 ; RV64ZVE32F-NEXT: sd a0, 0(t2)
4309 ; RV64ZVE32F-NEXT: .LBB45_2: # %else
4310 ; RV64ZVE32F-NEXT: andi a0, a5, 2
4311 ; RV64ZVE32F-NEXT: beqz a0, .LBB45_4
4312 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
4313 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
4314 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
4315 ; RV64ZVE32F-NEXT: vmv.x.s a0, v9
4316 ; RV64ZVE32F-NEXT: slli a0, a0, 3
4317 ; RV64ZVE32F-NEXT: add a0, a1, a0
4318 ; RV64ZVE32F-NEXT: sd t1, 0(a0)
4319 ; RV64ZVE32F-NEXT: .LBB45_4: # %else2
4320 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
4321 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
4322 ; RV64ZVE32F-NEXT: andi a0, a5, 4
4323 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
4324 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
4325 ; RV64ZVE32F-NEXT: bnez a0, .LBB45_12
4326 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
4327 ; RV64ZVE32F-NEXT: andi a0, a5, 8
4328 ; RV64ZVE32F-NEXT: bnez a0, .LBB45_13
4329 ; RV64ZVE32F-NEXT: .LBB45_6: # %else6
4330 ; RV64ZVE32F-NEXT: andi a0, a5, 16
4331 ; RV64ZVE32F-NEXT: bnez a0, .LBB45_14
4332 ; RV64ZVE32F-NEXT: .LBB45_7: # %else8
4333 ; RV64ZVE32F-NEXT: andi a0, a5, 32
4334 ; RV64ZVE32F-NEXT: beqz a0, .LBB45_9
4335 ; RV64ZVE32F-NEXT: .LBB45_8: # %cond.store9
4336 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
4337 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
4338 ; RV64ZVE32F-NEXT: slli a0, a0, 3
4339 ; RV64ZVE32F-NEXT: add a0, a1, a0
4340 ; RV64ZVE32F-NEXT: sd a4, 0(a0)
4341 ; RV64ZVE32F-NEXT: .LBB45_9: # %else10
4342 ; RV64ZVE32F-NEXT: andi a0, a5, 64
4343 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
4344 ; RV64ZVE32F-NEXT: bnez a0, .LBB45_15
4345 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
4346 ; RV64ZVE32F-NEXT: andi a0, a5, -128
4347 ; RV64ZVE32F-NEXT: bnez a0, .LBB45_16
4348 ; RV64ZVE32F-NEXT: .LBB45_11: # %else14
4349 ; RV64ZVE32F-NEXT: ret
4350 ; RV64ZVE32F-NEXT: .LBB45_12: # %cond.store3
4351 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
4352 ; RV64ZVE32F-NEXT: slli a0, a0, 3
4353 ; RV64ZVE32F-NEXT: add a0, a1, a0
4354 ; RV64ZVE32F-NEXT: sd t0, 0(a0)
4355 ; RV64ZVE32F-NEXT: andi a0, a5, 8
4356 ; RV64ZVE32F-NEXT: beqz a0, .LBB45_6
4357 ; RV64ZVE32F-NEXT: .LBB45_13: # %cond.store5
4358 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
4359 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
4360 ; RV64ZVE32F-NEXT: slli a0, a0, 3
4361 ; RV64ZVE32F-NEXT: add a0, a1, a0
4362 ; RV64ZVE32F-NEXT: sd a7, 0(a0)
4363 ; RV64ZVE32F-NEXT: andi a0, a5, 16
4364 ; RV64ZVE32F-NEXT: beqz a0, .LBB45_7
4365 ; RV64ZVE32F-NEXT: .LBB45_14: # %cond.store7
4366 ; RV64ZVE32F-NEXT: vmv.x.s a0, v9
4367 ; RV64ZVE32F-NEXT: slli a0, a0, 3
4368 ; RV64ZVE32F-NEXT: add a0, a1, a0
4369 ; RV64ZVE32F-NEXT: sd a6, 0(a0)
4370 ; RV64ZVE32F-NEXT: andi a0, a5, 32
4371 ; RV64ZVE32F-NEXT: bnez a0, .LBB45_8
4372 ; RV64ZVE32F-NEXT: j .LBB45_9
4373 ; RV64ZVE32F-NEXT: .LBB45_15: # %cond.store11
4374 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
4375 ; RV64ZVE32F-NEXT: slli a0, a0, 3
4376 ; RV64ZVE32F-NEXT: add a0, a1, a0
4377 ; RV64ZVE32F-NEXT: sd a3, 0(a0)
4378 ; RV64ZVE32F-NEXT: andi a0, a5, -128
4379 ; RV64ZVE32F-NEXT: beqz a0, .LBB45_11
4380 ; RV64ZVE32F-NEXT: .LBB45_16: # %cond.store13
4381 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
4382 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
4383 ; RV64ZVE32F-NEXT: slli a0, a0, 3
4384 ; RV64ZVE32F-NEXT: add a0, a1, a0
4385 ; RV64ZVE32F-NEXT: sd a2, 0(a0)
4386 ; RV64ZVE32F-NEXT: ret
4387 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i16> %idxs
4388 call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m)
4392 define void @mscatter_baseidx_sext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m) {
4393 ; RV32V-LABEL: mscatter_baseidx_sext_v8i16_v8i64:
4395 ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
4396 ; RV32V-NEXT: vsext.vf2 v14, v12
4397 ; RV32V-NEXT: vsll.vi v12, v14, 3
4398 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
4399 ; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t
4402 ; RV64V-LABEL: mscatter_baseidx_sext_v8i16_v8i64:
4404 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
4405 ; RV64V-NEXT: vsext.vf4 v16, v12
4406 ; RV64V-NEXT: vsll.vi v12, v16, 3
4407 ; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t
4410 ; RV32ZVE32F-LABEL: mscatter_baseidx_sext_v8i16_v8i64:
4411 ; RV32ZVE32F: # %bb.0:
4412 ; RV32ZVE32F-NEXT: addi sp, sp, -16
4413 ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
4414 ; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
4415 ; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
4416 ; RV32ZVE32F-NEXT: sw s2, 4(sp) # 4-byte Folded Spill
4417 ; RV32ZVE32F-NEXT: .cfi_offset s0, -4
4418 ; RV32ZVE32F-NEXT: .cfi_offset s1, -8
4419 ; RV32ZVE32F-NEXT: .cfi_offset s2, -12
4420 ; RV32ZVE32F-NEXT: .cfi_remember_state
4421 ; RV32ZVE32F-NEXT: lw a2, 56(a0)
4422 ; RV32ZVE32F-NEXT: lw a3, 60(a0)
4423 ; RV32ZVE32F-NEXT: lw a6, 40(a0)
4424 ; RV32ZVE32F-NEXT: lw a7, 44(a0)
4425 ; RV32ZVE32F-NEXT: lw a4, 48(a0)
4426 ; RV32ZVE32F-NEXT: lw a5, 52(a0)
4427 ; RV32ZVE32F-NEXT: lw t3, 24(a0)
4428 ; RV32ZVE32F-NEXT: lw t4, 28(a0)
4429 ; RV32ZVE32F-NEXT: lw t1, 32(a0)
4430 ; RV32ZVE32F-NEXT: lw t2, 36(a0)
4431 ; RV32ZVE32F-NEXT: lw s0, 8(a0)
4432 ; RV32ZVE32F-NEXT: lw s1, 12(a0)
4433 ; RV32ZVE32F-NEXT: lw t5, 16(a0)
4434 ; RV32ZVE32F-NEXT: lw t6, 20(a0)
4435 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
4436 ; RV32ZVE32F-NEXT: vsext.vf2 v10, v8
4437 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3
4438 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
4439 ; RV32ZVE32F-NEXT: vmv.x.s t0, v0
4440 ; RV32ZVE32F-NEXT: andi s2, t0, 1
4441 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
4442 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
4443 ; RV32ZVE32F-NEXT: bnez s2, .LBB46_10
4444 ; RV32ZVE32F-NEXT: # %bb.1: # %else
4445 ; RV32ZVE32F-NEXT: andi a0, t0, 2
4446 ; RV32ZVE32F-NEXT: bnez a0, .LBB46_11
4447 ; RV32ZVE32F-NEXT: .LBB46_2: # %else2
4448 ; RV32ZVE32F-NEXT: andi a0, t0, 4
4449 ; RV32ZVE32F-NEXT: bnez a0, .LBB46_12
4450 ; RV32ZVE32F-NEXT: .LBB46_3: # %else4
4451 ; RV32ZVE32F-NEXT: andi a0, t0, 8
4452 ; RV32ZVE32F-NEXT: bnez a0, .LBB46_13
4453 ; RV32ZVE32F-NEXT: .LBB46_4: # %else6
4454 ; RV32ZVE32F-NEXT: andi a0, t0, 16
4455 ; RV32ZVE32F-NEXT: bnez a0, .LBB46_14
4456 ; RV32ZVE32F-NEXT: .LBB46_5: # %else8
4457 ; RV32ZVE32F-NEXT: andi a0, t0, 32
4458 ; RV32ZVE32F-NEXT: bnez a0, .LBB46_15
4459 ; RV32ZVE32F-NEXT: .LBB46_6: # %else10
4460 ; RV32ZVE32F-NEXT: andi a0, t0, 64
4461 ; RV32ZVE32F-NEXT: bnez a0, .LBB46_16
4462 ; RV32ZVE32F-NEXT: .LBB46_7: # %else12
4463 ; RV32ZVE32F-NEXT: andi a0, t0, -128
4464 ; RV32ZVE32F-NEXT: beqz a0, .LBB46_9
4465 ; RV32ZVE32F-NEXT: .LBB46_8: # %cond.store13
4466 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
4467 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
4468 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
4469 ; RV32ZVE32F-NEXT: sw a2, 0(a0)
4470 ; RV32ZVE32F-NEXT: sw a3, 4(a0)
4471 ; RV32ZVE32F-NEXT: .LBB46_9: # %else14
4472 ; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
4473 ; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
4474 ; RV32ZVE32F-NEXT: lw s2, 4(sp) # 4-byte Folded Reload
4475 ; RV32ZVE32F-NEXT: .cfi_restore s0
4476 ; RV32ZVE32F-NEXT: .cfi_restore s1
4477 ; RV32ZVE32F-NEXT: .cfi_restore s2
4478 ; RV32ZVE32F-NEXT: addi sp, sp, 16
4479 ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 0
4480 ; RV32ZVE32F-NEXT: ret
4481 ; RV32ZVE32F-NEXT: .LBB46_10: # %cond.store
4482 ; RV32ZVE32F-NEXT: .cfi_restore_state
4483 ; RV32ZVE32F-NEXT: lw a1, 0(a0)
4484 ; RV32ZVE32F-NEXT: lw a0, 4(a0)
4485 ; RV32ZVE32F-NEXT: vmv.x.s s2, v8
4486 ; RV32ZVE32F-NEXT: sw a1, 0(s2)
4487 ; RV32ZVE32F-NEXT: sw a0, 4(s2)
4488 ; RV32ZVE32F-NEXT: andi a0, t0, 2
4489 ; RV32ZVE32F-NEXT: beqz a0, .LBB46_2
4490 ; RV32ZVE32F-NEXT: .LBB46_11: # %cond.store1
4491 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
4492 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
4493 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
4494 ; RV32ZVE32F-NEXT: sw s0, 0(a0)
4495 ; RV32ZVE32F-NEXT: sw s1, 4(a0)
4496 ; RV32ZVE32F-NEXT: andi a0, t0, 4
4497 ; RV32ZVE32F-NEXT: beqz a0, .LBB46_3
4498 ; RV32ZVE32F-NEXT: .LBB46_12: # %cond.store3
4499 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
4500 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
4501 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
4502 ; RV32ZVE32F-NEXT: sw t5, 0(a0)
4503 ; RV32ZVE32F-NEXT: sw t6, 4(a0)
4504 ; RV32ZVE32F-NEXT: andi a0, t0, 8
4505 ; RV32ZVE32F-NEXT: beqz a0, .LBB46_4
4506 ; RV32ZVE32F-NEXT: .LBB46_13: # %cond.store5
4507 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
4508 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
4509 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
4510 ; RV32ZVE32F-NEXT: sw t3, 0(a0)
4511 ; RV32ZVE32F-NEXT: sw t4, 4(a0)
4512 ; RV32ZVE32F-NEXT: andi a0, t0, 16
4513 ; RV32ZVE32F-NEXT: beqz a0, .LBB46_5
4514 ; RV32ZVE32F-NEXT: .LBB46_14: # %cond.store7
4515 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
4516 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
4517 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
4518 ; RV32ZVE32F-NEXT: sw t1, 0(a0)
4519 ; RV32ZVE32F-NEXT: sw t2, 4(a0)
4520 ; RV32ZVE32F-NEXT: andi a0, t0, 32
4521 ; RV32ZVE32F-NEXT: beqz a0, .LBB46_6
4522 ; RV32ZVE32F-NEXT: .LBB46_15: # %cond.store9
4523 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
4524 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
4525 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
4526 ; RV32ZVE32F-NEXT: sw a6, 0(a0)
4527 ; RV32ZVE32F-NEXT: sw a7, 4(a0)
4528 ; RV32ZVE32F-NEXT: andi a0, t0, 64
4529 ; RV32ZVE32F-NEXT: beqz a0, .LBB46_7
4530 ; RV32ZVE32F-NEXT: .LBB46_16: # %cond.store11
4531 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
4532 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
4533 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
4534 ; RV32ZVE32F-NEXT: sw a4, 0(a0)
4535 ; RV32ZVE32F-NEXT: sw a5, 4(a0)
4536 ; RV32ZVE32F-NEXT: andi a0, t0, -128
4537 ; RV32ZVE32F-NEXT: bnez a0, .LBB46_8
4538 ; RV32ZVE32F-NEXT: j .LBB46_9
4540 ; RV64ZVE32F-LABEL: mscatter_baseidx_sext_v8i16_v8i64:
4541 ; RV64ZVE32F: # %bb.0:
4542 ; RV64ZVE32F-NEXT: ld a4, 40(a0)
4543 ; RV64ZVE32F-NEXT: ld a3, 48(a0)
4544 ; RV64ZVE32F-NEXT: ld a2, 56(a0)
4545 ; RV64ZVE32F-NEXT: ld t1, 8(a0)
4546 ; RV64ZVE32F-NEXT: ld t0, 16(a0)
4547 ; RV64ZVE32F-NEXT: ld a7, 24(a0)
4548 ; RV64ZVE32F-NEXT: ld a6, 32(a0)
4549 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
4550 ; RV64ZVE32F-NEXT: vmv.x.s a5, v0
4551 ; RV64ZVE32F-NEXT: andi t2, a5, 1
4552 ; RV64ZVE32F-NEXT: beqz t2, .LBB46_2
4553 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
4554 ; RV64ZVE32F-NEXT: ld a0, 0(a0)
4555 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
4556 ; RV64ZVE32F-NEXT: vmv.x.s t2, v8
4557 ; RV64ZVE32F-NEXT: slli t2, t2, 3
4558 ; RV64ZVE32F-NEXT: add t2, a1, t2
4559 ; RV64ZVE32F-NEXT: sd a0, 0(t2)
4560 ; RV64ZVE32F-NEXT: .LBB46_2: # %else
4561 ; RV64ZVE32F-NEXT: andi a0, a5, 2
4562 ; RV64ZVE32F-NEXT: beqz a0, .LBB46_4
4563 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
4564 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
4565 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
4566 ; RV64ZVE32F-NEXT: vmv.x.s a0, v9
4567 ; RV64ZVE32F-NEXT: slli a0, a0, 3
4568 ; RV64ZVE32F-NEXT: add a0, a1, a0
4569 ; RV64ZVE32F-NEXT: sd t1, 0(a0)
4570 ; RV64ZVE32F-NEXT: .LBB46_4: # %else2
4571 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
4572 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
4573 ; RV64ZVE32F-NEXT: andi a0, a5, 4
4574 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
4575 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
4576 ; RV64ZVE32F-NEXT: bnez a0, .LBB46_12
4577 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
4578 ; RV64ZVE32F-NEXT: andi a0, a5, 8
4579 ; RV64ZVE32F-NEXT: bnez a0, .LBB46_13
4580 ; RV64ZVE32F-NEXT: .LBB46_6: # %else6
4581 ; RV64ZVE32F-NEXT: andi a0, a5, 16
4582 ; RV64ZVE32F-NEXT: bnez a0, .LBB46_14
4583 ; RV64ZVE32F-NEXT: .LBB46_7: # %else8
4584 ; RV64ZVE32F-NEXT: andi a0, a5, 32
4585 ; RV64ZVE32F-NEXT: beqz a0, .LBB46_9
4586 ; RV64ZVE32F-NEXT: .LBB46_8: # %cond.store9
4587 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
4588 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
4589 ; RV64ZVE32F-NEXT: slli a0, a0, 3
4590 ; RV64ZVE32F-NEXT: add a0, a1, a0
4591 ; RV64ZVE32F-NEXT: sd a4, 0(a0)
4592 ; RV64ZVE32F-NEXT: .LBB46_9: # %else10
4593 ; RV64ZVE32F-NEXT: andi a0, a5, 64
4594 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
4595 ; RV64ZVE32F-NEXT: bnez a0, .LBB46_15
4596 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
4597 ; RV64ZVE32F-NEXT: andi a0, a5, -128
4598 ; RV64ZVE32F-NEXT: bnez a0, .LBB46_16
4599 ; RV64ZVE32F-NEXT: .LBB46_11: # %else14
4600 ; RV64ZVE32F-NEXT: ret
4601 ; RV64ZVE32F-NEXT: .LBB46_12: # %cond.store3
4602 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
4603 ; RV64ZVE32F-NEXT: slli a0, a0, 3
4604 ; RV64ZVE32F-NEXT: add a0, a1, a0
4605 ; RV64ZVE32F-NEXT: sd t0, 0(a0)
4606 ; RV64ZVE32F-NEXT: andi a0, a5, 8
4607 ; RV64ZVE32F-NEXT: beqz a0, .LBB46_6
4608 ; RV64ZVE32F-NEXT: .LBB46_13: # %cond.store5
4609 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
4610 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
4611 ; RV64ZVE32F-NEXT: slli a0, a0, 3
4612 ; RV64ZVE32F-NEXT: add a0, a1, a0
4613 ; RV64ZVE32F-NEXT: sd a7, 0(a0)
4614 ; RV64ZVE32F-NEXT: andi a0, a5, 16
4615 ; RV64ZVE32F-NEXT: beqz a0, .LBB46_7
4616 ; RV64ZVE32F-NEXT: .LBB46_14: # %cond.store7
4617 ; RV64ZVE32F-NEXT: vmv.x.s a0, v9
4618 ; RV64ZVE32F-NEXT: slli a0, a0, 3
4619 ; RV64ZVE32F-NEXT: add a0, a1, a0
4620 ; RV64ZVE32F-NEXT: sd a6, 0(a0)
4621 ; RV64ZVE32F-NEXT: andi a0, a5, 32
4622 ; RV64ZVE32F-NEXT: bnez a0, .LBB46_8
4623 ; RV64ZVE32F-NEXT: j .LBB46_9
4624 ; RV64ZVE32F-NEXT: .LBB46_15: # %cond.store11
4625 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
4626 ; RV64ZVE32F-NEXT: slli a0, a0, 3
4627 ; RV64ZVE32F-NEXT: add a0, a1, a0
4628 ; RV64ZVE32F-NEXT: sd a3, 0(a0)
4629 ; RV64ZVE32F-NEXT: andi a0, a5, -128
4630 ; RV64ZVE32F-NEXT: beqz a0, .LBB46_11
4631 ; RV64ZVE32F-NEXT: .LBB46_16: # %cond.store13
4632 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
4633 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
4634 ; RV64ZVE32F-NEXT: slli a0, a0, 3
4635 ; RV64ZVE32F-NEXT: add a0, a1, a0
4636 ; RV64ZVE32F-NEXT: sd a2, 0(a0)
4637 ; RV64ZVE32F-NEXT: ret
4638 %eidxs = sext <8 x i16> %idxs to <8 x i64>
4639 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs
4640 call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m)
4644 define void @mscatter_baseidx_zext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m) {
4645 ; RV32V-LABEL: mscatter_baseidx_zext_v8i16_v8i64:
4647 ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
4648 ; RV32V-NEXT: vzext.vf2 v14, v12
4649 ; RV32V-NEXT: vsll.vi v12, v14, 3
4650 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
4651 ; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t
4654 ; RV64V-LABEL: mscatter_baseidx_zext_v8i16_v8i64:
4656 ; RV64V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
4657 ; RV64V-NEXT: vzext.vf2 v14, v12
4658 ; RV64V-NEXT: vsll.vi v12, v14, 3
4659 ; RV64V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
4660 ; RV64V-NEXT: vsoxei32.v v8, (a0), v12, v0.t
4663 ; RV32ZVE32F-LABEL: mscatter_baseidx_zext_v8i16_v8i64:
4664 ; RV32ZVE32F: # %bb.0:
4665 ; RV32ZVE32F-NEXT: addi sp, sp, -16
4666 ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
4667 ; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
4668 ; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
4669 ; RV32ZVE32F-NEXT: sw s2, 4(sp) # 4-byte Folded Spill
4670 ; RV32ZVE32F-NEXT: .cfi_offset s0, -4
4671 ; RV32ZVE32F-NEXT: .cfi_offset s1, -8
4672 ; RV32ZVE32F-NEXT: .cfi_offset s2, -12
4673 ; RV32ZVE32F-NEXT: .cfi_remember_state
4674 ; RV32ZVE32F-NEXT: lw a2, 56(a0)
4675 ; RV32ZVE32F-NEXT: lw a3, 60(a0)
4676 ; RV32ZVE32F-NEXT: lw a6, 40(a0)
4677 ; RV32ZVE32F-NEXT: lw a7, 44(a0)
4678 ; RV32ZVE32F-NEXT: lw a4, 48(a0)
4679 ; RV32ZVE32F-NEXT: lw a5, 52(a0)
4680 ; RV32ZVE32F-NEXT: lw t3, 24(a0)
4681 ; RV32ZVE32F-NEXT: lw t4, 28(a0)
4682 ; RV32ZVE32F-NEXT: lw t1, 32(a0)
4683 ; RV32ZVE32F-NEXT: lw t2, 36(a0)
4684 ; RV32ZVE32F-NEXT: lw s0, 8(a0)
4685 ; RV32ZVE32F-NEXT: lw s1, 12(a0)
4686 ; RV32ZVE32F-NEXT: lw t5, 16(a0)
4687 ; RV32ZVE32F-NEXT: lw t6, 20(a0)
4688 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
4689 ; RV32ZVE32F-NEXT: vzext.vf2 v10, v8
4690 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3
4691 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
4692 ; RV32ZVE32F-NEXT: vmv.x.s t0, v0
4693 ; RV32ZVE32F-NEXT: andi s2, t0, 1
4694 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
4695 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
4696 ; RV32ZVE32F-NEXT: bnez s2, .LBB47_10
4697 ; RV32ZVE32F-NEXT: # %bb.1: # %else
4698 ; RV32ZVE32F-NEXT: andi a0, t0, 2
4699 ; RV32ZVE32F-NEXT: bnez a0, .LBB47_11
4700 ; RV32ZVE32F-NEXT: .LBB47_2: # %else2
4701 ; RV32ZVE32F-NEXT: andi a0, t0, 4
4702 ; RV32ZVE32F-NEXT: bnez a0, .LBB47_12
4703 ; RV32ZVE32F-NEXT: .LBB47_3: # %else4
4704 ; RV32ZVE32F-NEXT: andi a0, t0, 8
4705 ; RV32ZVE32F-NEXT: bnez a0, .LBB47_13
4706 ; RV32ZVE32F-NEXT: .LBB47_4: # %else6
4707 ; RV32ZVE32F-NEXT: andi a0, t0, 16
4708 ; RV32ZVE32F-NEXT: bnez a0, .LBB47_14
4709 ; RV32ZVE32F-NEXT: .LBB47_5: # %else8
4710 ; RV32ZVE32F-NEXT: andi a0, t0, 32
4711 ; RV32ZVE32F-NEXT: bnez a0, .LBB47_15
4712 ; RV32ZVE32F-NEXT: .LBB47_6: # %else10
4713 ; RV32ZVE32F-NEXT: andi a0, t0, 64
4714 ; RV32ZVE32F-NEXT: bnez a0, .LBB47_16
4715 ; RV32ZVE32F-NEXT: .LBB47_7: # %else12
4716 ; RV32ZVE32F-NEXT: andi a0, t0, -128
4717 ; RV32ZVE32F-NEXT: beqz a0, .LBB47_9
4718 ; RV32ZVE32F-NEXT: .LBB47_8: # %cond.store13
4719 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
4720 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
4721 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
4722 ; RV32ZVE32F-NEXT: sw a2, 0(a0)
4723 ; RV32ZVE32F-NEXT: sw a3, 4(a0)
4724 ; RV32ZVE32F-NEXT: .LBB47_9: # %else14
4725 ; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
4726 ; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
4727 ; RV32ZVE32F-NEXT: lw s2, 4(sp) # 4-byte Folded Reload
4728 ; RV32ZVE32F-NEXT: .cfi_restore s0
4729 ; RV32ZVE32F-NEXT: .cfi_restore s1
4730 ; RV32ZVE32F-NEXT: .cfi_restore s2
4731 ; RV32ZVE32F-NEXT: addi sp, sp, 16
4732 ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 0
4733 ; RV32ZVE32F-NEXT: ret
4734 ; RV32ZVE32F-NEXT: .LBB47_10: # %cond.store
4735 ; RV32ZVE32F-NEXT: .cfi_restore_state
4736 ; RV32ZVE32F-NEXT: lw a1, 0(a0)
4737 ; RV32ZVE32F-NEXT: lw a0, 4(a0)
4738 ; RV32ZVE32F-NEXT: vmv.x.s s2, v8
4739 ; RV32ZVE32F-NEXT: sw a1, 0(s2)
4740 ; RV32ZVE32F-NEXT: sw a0, 4(s2)
4741 ; RV32ZVE32F-NEXT: andi a0, t0, 2
4742 ; RV32ZVE32F-NEXT: beqz a0, .LBB47_2
4743 ; RV32ZVE32F-NEXT: .LBB47_11: # %cond.store1
4744 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
4745 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
4746 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
4747 ; RV32ZVE32F-NEXT: sw s0, 0(a0)
4748 ; RV32ZVE32F-NEXT: sw s1, 4(a0)
4749 ; RV32ZVE32F-NEXT: andi a0, t0, 4
4750 ; RV32ZVE32F-NEXT: beqz a0, .LBB47_3
4751 ; RV32ZVE32F-NEXT: .LBB47_12: # %cond.store3
4752 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
4753 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
4754 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
4755 ; RV32ZVE32F-NEXT: sw t5, 0(a0)
4756 ; RV32ZVE32F-NEXT: sw t6, 4(a0)
4757 ; RV32ZVE32F-NEXT: andi a0, t0, 8
4758 ; RV32ZVE32F-NEXT: beqz a0, .LBB47_4
4759 ; RV32ZVE32F-NEXT: .LBB47_13: # %cond.store5
4760 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
4761 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
4762 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
4763 ; RV32ZVE32F-NEXT: sw t3, 0(a0)
4764 ; RV32ZVE32F-NEXT: sw t4, 4(a0)
4765 ; RV32ZVE32F-NEXT: andi a0, t0, 16
4766 ; RV32ZVE32F-NEXT: beqz a0, .LBB47_5
4767 ; RV32ZVE32F-NEXT: .LBB47_14: # %cond.store7
4768 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
4769 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
4770 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
4771 ; RV32ZVE32F-NEXT: sw t1, 0(a0)
4772 ; RV32ZVE32F-NEXT: sw t2, 4(a0)
4773 ; RV32ZVE32F-NEXT: andi a0, t0, 32
4774 ; RV32ZVE32F-NEXT: beqz a0, .LBB47_6
4775 ; RV32ZVE32F-NEXT: .LBB47_15: # %cond.store9
4776 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
4777 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
4778 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
4779 ; RV32ZVE32F-NEXT: sw a6, 0(a0)
4780 ; RV32ZVE32F-NEXT: sw a7, 4(a0)
4781 ; RV32ZVE32F-NEXT: andi a0, t0, 64
4782 ; RV32ZVE32F-NEXT: beqz a0, .LBB47_7
4783 ; RV32ZVE32F-NEXT: .LBB47_16: # %cond.store11
4784 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
4785 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
4786 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
4787 ; RV32ZVE32F-NEXT: sw a4, 0(a0)
4788 ; RV32ZVE32F-NEXT: sw a5, 4(a0)
4789 ; RV32ZVE32F-NEXT: andi a0, t0, -128
4790 ; RV32ZVE32F-NEXT: bnez a0, .LBB47_8
4791 ; RV32ZVE32F-NEXT: j .LBB47_9
4793 ; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i16_v8i64:
4794 ; RV64ZVE32F: # %bb.0:
4795 ; RV64ZVE32F-NEXT: ld a4, 40(a0)
4796 ; RV64ZVE32F-NEXT: ld a3, 48(a0)
4797 ; RV64ZVE32F-NEXT: ld a2, 56(a0)
4798 ; RV64ZVE32F-NEXT: ld t1, 8(a0)
4799 ; RV64ZVE32F-NEXT: ld t0, 16(a0)
4800 ; RV64ZVE32F-NEXT: ld a7, 24(a0)
4801 ; RV64ZVE32F-NEXT: ld a6, 32(a0)
4802 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
4803 ; RV64ZVE32F-NEXT: vmv.x.s a5, v0
4804 ; RV64ZVE32F-NEXT: andi t2, a5, 1
4805 ; RV64ZVE32F-NEXT: beqz t2, .LBB47_2
4806 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
4807 ; RV64ZVE32F-NEXT: ld a0, 0(a0)
4808 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
4809 ; RV64ZVE32F-NEXT: vmv.x.s t2, v8
4810 ; RV64ZVE32F-NEXT: slli t2, t2, 48
4811 ; RV64ZVE32F-NEXT: srli t2, t2, 45
4812 ; RV64ZVE32F-NEXT: add t2, a1, t2
4813 ; RV64ZVE32F-NEXT: sd a0, 0(t2)
4814 ; RV64ZVE32F-NEXT: .LBB47_2: # %else
4815 ; RV64ZVE32F-NEXT: andi a0, a5, 2
4816 ; RV64ZVE32F-NEXT: beqz a0, .LBB47_4
4817 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
4818 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
4819 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
4820 ; RV64ZVE32F-NEXT: vmv.x.s a0, v9
4821 ; RV64ZVE32F-NEXT: slli a0, a0, 48
4822 ; RV64ZVE32F-NEXT: srli a0, a0, 45
4823 ; RV64ZVE32F-NEXT: add a0, a1, a0
4824 ; RV64ZVE32F-NEXT: sd t1, 0(a0)
4825 ; RV64ZVE32F-NEXT: .LBB47_4: # %else2
4826 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
4827 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
4828 ; RV64ZVE32F-NEXT: andi a0, a5, 4
4829 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
4830 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
4831 ; RV64ZVE32F-NEXT: bnez a0, .LBB47_12
4832 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
4833 ; RV64ZVE32F-NEXT: andi a0, a5, 8
4834 ; RV64ZVE32F-NEXT: bnez a0, .LBB47_13
4835 ; RV64ZVE32F-NEXT: .LBB47_6: # %else6
4836 ; RV64ZVE32F-NEXT: andi a0, a5, 16
4837 ; RV64ZVE32F-NEXT: bnez a0, .LBB47_14
4838 ; RV64ZVE32F-NEXT: .LBB47_7: # %else8
4839 ; RV64ZVE32F-NEXT: andi a0, a5, 32
4840 ; RV64ZVE32F-NEXT: beqz a0, .LBB47_9
4841 ; RV64ZVE32F-NEXT: .LBB47_8: # %cond.store9
4842 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
4843 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
4844 ; RV64ZVE32F-NEXT: slli a0, a0, 48
4845 ; RV64ZVE32F-NEXT: srli a0, a0, 45
4846 ; RV64ZVE32F-NEXT: add a0, a1, a0
4847 ; RV64ZVE32F-NEXT: sd a4, 0(a0)
4848 ; RV64ZVE32F-NEXT: .LBB47_9: # %else10
4849 ; RV64ZVE32F-NEXT: andi a0, a5, 64
4850 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
4851 ; RV64ZVE32F-NEXT: bnez a0, .LBB47_15
4852 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
4853 ; RV64ZVE32F-NEXT: andi a0, a5, -128
4854 ; RV64ZVE32F-NEXT: bnez a0, .LBB47_16
4855 ; RV64ZVE32F-NEXT: .LBB47_11: # %else14
4856 ; RV64ZVE32F-NEXT: ret
4857 ; RV64ZVE32F-NEXT: .LBB47_12: # %cond.store3
4858 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
4859 ; RV64ZVE32F-NEXT: slli a0, a0, 48
4860 ; RV64ZVE32F-NEXT: srli a0, a0, 45
4861 ; RV64ZVE32F-NEXT: add a0, a1, a0
4862 ; RV64ZVE32F-NEXT: sd t0, 0(a0)
4863 ; RV64ZVE32F-NEXT: andi a0, a5, 8
4864 ; RV64ZVE32F-NEXT: beqz a0, .LBB47_6
4865 ; RV64ZVE32F-NEXT: .LBB47_13: # %cond.store5
4866 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
4867 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
4868 ; RV64ZVE32F-NEXT: slli a0, a0, 48
4869 ; RV64ZVE32F-NEXT: srli a0, a0, 45
4870 ; RV64ZVE32F-NEXT: add a0, a1, a0
4871 ; RV64ZVE32F-NEXT: sd a7, 0(a0)
4872 ; RV64ZVE32F-NEXT: andi a0, a5, 16
4873 ; RV64ZVE32F-NEXT: beqz a0, .LBB47_7
4874 ; RV64ZVE32F-NEXT: .LBB47_14: # %cond.store7
4875 ; RV64ZVE32F-NEXT: vmv.x.s a0, v9
4876 ; RV64ZVE32F-NEXT: slli a0, a0, 48
4877 ; RV64ZVE32F-NEXT: srli a0, a0, 45
4878 ; RV64ZVE32F-NEXT: add a0, a1, a0
4879 ; RV64ZVE32F-NEXT: sd a6, 0(a0)
4880 ; RV64ZVE32F-NEXT: andi a0, a5, 32
4881 ; RV64ZVE32F-NEXT: bnez a0, .LBB47_8
4882 ; RV64ZVE32F-NEXT: j .LBB47_9
4883 ; RV64ZVE32F-NEXT: .LBB47_15: # %cond.store11
4884 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
4885 ; RV64ZVE32F-NEXT: slli a0, a0, 48
4886 ; RV64ZVE32F-NEXT: srli a0, a0, 45
4887 ; RV64ZVE32F-NEXT: add a0, a1, a0
4888 ; RV64ZVE32F-NEXT: sd a3, 0(a0)
4889 ; RV64ZVE32F-NEXT: andi a0, a5, -128
4890 ; RV64ZVE32F-NEXT: beqz a0, .LBB47_11
4891 ; RV64ZVE32F-NEXT: .LBB47_16: # %cond.store13
4892 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
4893 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
4894 ; RV64ZVE32F-NEXT: slli a0, a0, 48
4895 ; RV64ZVE32F-NEXT: srli a0, a0, 45
4896 ; RV64ZVE32F-NEXT: add a0, a1, a0
4897 ; RV64ZVE32F-NEXT: sd a2, 0(a0)
4898 ; RV64ZVE32F-NEXT: ret
4899 %eidxs = zext <8 x i16> %idxs to <8 x i64>
4900 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs
4901 call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m)
4905 define void @mscatter_baseidx_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i32> %idxs, <8 x i1> %m) {
4906 ; RV32V-LABEL: mscatter_baseidx_v8i32_v8i64:
4908 ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
4909 ; RV32V-NEXT: vsll.vi v12, v12, 3
4910 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
4911 ; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t
4914 ; RV64V-LABEL: mscatter_baseidx_v8i32_v8i64:
4916 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
4917 ; RV64V-NEXT: vsext.vf2 v16, v12
4918 ; RV64V-NEXT: vsll.vi v12, v16, 3
4919 ; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t
4922 ; RV32ZVE32F-LABEL: mscatter_baseidx_v8i32_v8i64:
4923 ; RV32ZVE32F: # %bb.0:
4924 ; RV32ZVE32F-NEXT: addi sp, sp, -16
4925 ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
4926 ; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
4927 ; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
4928 ; RV32ZVE32F-NEXT: sw s2, 4(sp) # 4-byte Folded Spill
4929 ; RV32ZVE32F-NEXT: .cfi_offset s0, -4
4930 ; RV32ZVE32F-NEXT: .cfi_offset s1, -8
4931 ; RV32ZVE32F-NEXT: .cfi_offset s2, -12
4932 ; RV32ZVE32F-NEXT: .cfi_remember_state
4933 ; RV32ZVE32F-NEXT: lw a2, 56(a0)
4934 ; RV32ZVE32F-NEXT: lw a3, 60(a0)
4935 ; RV32ZVE32F-NEXT: lw a6, 40(a0)
4936 ; RV32ZVE32F-NEXT: lw a7, 44(a0)
4937 ; RV32ZVE32F-NEXT: lw a4, 48(a0)
4938 ; RV32ZVE32F-NEXT: lw a5, 52(a0)
4939 ; RV32ZVE32F-NEXT: lw t3, 24(a0)
4940 ; RV32ZVE32F-NEXT: lw t4, 28(a0)
4941 ; RV32ZVE32F-NEXT: lw t1, 32(a0)
4942 ; RV32ZVE32F-NEXT: lw t2, 36(a0)
4943 ; RV32ZVE32F-NEXT: lw s0, 8(a0)
4944 ; RV32ZVE32F-NEXT: lw s1, 12(a0)
4945 ; RV32ZVE32F-NEXT: lw t5, 16(a0)
4946 ; RV32ZVE32F-NEXT: lw t6, 20(a0)
4947 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
4948 ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3
4949 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
4950 ; RV32ZVE32F-NEXT: vmv.x.s t0, v0
4951 ; RV32ZVE32F-NEXT: andi s2, t0, 1
4952 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
4953 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
4954 ; RV32ZVE32F-NEXT: bnez s2, .LBB48_10
4955 ; RV32ZVE32F-NEXT: # %bb.1: # %else
4956 ; RV32ZVE32F-NEXT: andi a0, t0, 2
4957 ; RV32ZVE32F-NEXT: bnez a0, .LBB48_11
4958 ; RV32ZVE32F-NEXT: .LBB48_2: # %else2
4959 ; RV32ZVE32F-NEXT: andi a0, t0, 4
4960 ; RV32ZVE32F-NEXT: bnez a0, .LBB48_12
4961 ; RV32ZVE32F-NEXT: .LBB48_3: # %else4
4962 ; RV32ZVE32F-NEXT: andi a0, t0, 8
4963 ; RV32ZVE32F-NEXT: bnez a0, .LBB48_13
4964 ; RV32ZVE32F-NEXT: .LBB48_4: # %else6
4965 ; RV32ZVE32F-NEXT: andi a0, t0, 16
4966 ; RV32ZVE32F-NEXT: bnez a0, .LBB48_14
4967 ; RV32ZVE32F-NEXT: .LBB48_5: # %else8
4968 ; RV32ZVE32F-NEXT: andi a0, t0, 32
4969 ; RV32ZVE32F-NEXT: bnez a0, .LBB48_15
4970 ; RV32ZVE32F-NEXT: .LBB48_6: # %else10
4971 ; RV32ZVE32F-NEXT: andi a0, t0, 64
4972 ; RV32ZVE32F-NEXT: bnez a0, .LBB48_16
4973 ; RV32ZVE32F-NEXT: .LBB48_7: # %else12
4974 ; RV32ZVE32F-NEXT: andi a0, t0, -128
4975 ; RV32ZVE32F-NEXT: beqz a0, .LBB48_9
4976 ; RV32ZVE32F-NEXT: .LBB48_8: # %cond.store13
4977 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
4978 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
4979 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
4980 ; RV32ZVE32F-NEXT: sw a2, 0(a0)
4981 ; RV32ZVE32F-NEXT: sw a3, 4(a0)
4982 ; RV32ZVE32F-NEXT: .LBB48_9: # %else14
4983 ; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
4984 ; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
4985 ; RV32ZVE32F-NEXT: lw s2, 4(sp) # 4-byte Folded Reload
4986 ; RV32ZVE32F-NEXT: .cfi_restore s0
4987 ; RV32ZVE32F-NEXT: .cfi_restore s1
4988 ; RV32ZVE32F-NEXT: .cfi_restore s2
4989 ; RV32ZVE32F-NEXT: addi sp, sp, 16
4990 ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 0
4991 ; RV32ZVE32F-NEXT: ret
4992 ; RV32ZVE32F-NEXT: .LBB48_10: # %cond.store
4993 ; RV32ZVE32F-NEXT: .cfi_restore_state
4994 ; RV32ZVE32F-NEXT: lw a1, 0(a0)
4995 ; RV32ZVE32F-NEXT: lw a0, 4(a0)
4996 ; RV32ZVE32F-NEXT: vmv.x.s s2, v8
4997 ; RV32ZVE32F-NEXT: sw a1, 0(s2)
4998 ; RV32ZVE32F-NEXT: sw a0, 4(s2)
4999 ; RV32ZVE32F-NEXT: andi a0, t0, 2
5000 ; RV32ZVE32F-NEXT: beqz a0, .LBB48_2
5001 ; RV32ZVE32F-NEXT: .LBB48_11: # %cond.store1
5002 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
5003 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
5004 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
5005 ; RV32ZVE32F-NEXT: sw s0, 0(a0)
5006 ; RV32ZVE32F-NEXT: sw s1, 4(a0)
5007 ; RV32ZVE32F-NEXT: andi a0, t0, 4
5008 ; RV32ZVE32F-NEXT: beqz a0, .LBB48_3
5009 ; RV32ZVE32F-NEXT: .LBB48_12: # %cond.store3
5010 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
5011 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
5012 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
5013 ; RV32ZVE32F-NEXT: sw t5, 0(a0)
5014 ; RV32ZVE32F-NEXT: sw t6, 4(a0)
5015 ; RV32ZVE32F-NEXT: andi a0, t0, 8
5016 ; RV32ZVE32F-NEXT: beqz a0, .LBB48_4
5017 ; RV32ZVE32F-NEXT: .LBB48_13: # %cond.store5
5018 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
5019 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
5020 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
5021 ; RV32ZVE32F-NEXT: sw t3, 0(a0)
5022 ; RV32ZVE32F-NEXT: sw t4, 4(a0)
5023 ; RV32ZVE32F-NEXT: andi a0, t0, 16
5024 ; RV32ZVE32F-NEXT: beqz a0, .LBB48_5
5025 ; RV32ZVE32F-NEXT: .LBB48_14: # %cond.store7
5026 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
5027 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
5028 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
5029 ; RV32ZVE32F-NEXT: sw t1, 0(a0)
5030 ; RV32ZVE32F-NEXT: sw t2, 4(a0)
5031 ; RV32ZVE32F-NEXT: andi a0, t0, 32
5032 ; RV32ZVE32F-NEXT: beqz a0, .LBB48_6
5033 ; RV32ZVE32F-NEXT: .LBB48_15: # %cond.store9
5034 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
5035 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
5036 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
5037 ; RV32ZVE32F-NEXT: sw a6, 0(a0)
5038 ; RV32ZVE32F-NEXT: sw a7, 4(a0)
5039 ; RV32ZVE32F-NEXT: andi a0, t0, 64
5040 ; RV32ZVE32F-NEXT: beqz a0, .LBB48_7
5041 ; RV32ZVE32F-NEXT: .LBB48_16: # %cond.store11
5042 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
5043 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
5044 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
5045 ; RV32ZVE32F-NEXT: sw a4, 0(a0)
5046 ; RV32ZVE32F-NEXT: sw a5, 4(a0)
5047 ; RV32ZVE32F-NEXT: andi a0, t0, -128
5048 ; RV32ZVE32F-NEXT: bnez a0, .LBB48_8
5049 ; RV32ZVE32F-NEXT: j .LBB48_9
5051 ; RV64ZVE32F-LABEL: mscatter_baseidx_v8i32_v8i64:
5052 ; RV64ZVE32F: # %bb.0:
5053 ; RV64ZVE32F-NEXT: ld a4, 40(a0)
5054 ; RV64ZVE32F-NEXT: ld a3, 48(a0)
5055 ; RV64ZVE32F-NEXT: ld a2, 56(a0)
5056 ; RV64ZVE32F-NEXT: ld t1, 8(a0)
5057 ; RV64ZVE32F-NEXT: ld t0, 16(a0)
5058 ; RV64ZVE32F-NEXT: ld a7, 24(a0)
5059 ; RV64ZVE32F-NEXT: ld a6, 32(a0)
5060 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
5061 ; RV64ZVE32F-NEXT: vmv.x.s a5, v0
5062 ; RV64ZVE32F-NEXT: andi t2, a5, 1
5063 ; RV64ZVE32F-NEXT: beqz t2, .LBB48_2
5064 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
5065 ; RV64ZVE32F-NEXT: ld a0, 0(a0)
5066 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
5067 ; RV64ZVE32F-NEXT: vmv.x.s t2, v8
5068 ; RV64ZVE32F-NEXT: slli t2, t2, 3
5069 ; RV64ZVE32F-NEXT: add t2, a1, t2
5070 ; RV64ZVE32F-NEXT: sd a0, 0(t2)
5071 ; RV64ZVE32F-NEXT: .LBB48_2: # %else
5072 ; RV64ZVE32F-NEXT: andi a0, a5, 2
5073 ; RV64ZVE32F-NEXT: beqz a0, .LBB48_4
5074 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
5075 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
5076 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
5077 ; RV64ZVE32F-NEXT: vmv.x.s a0, v10
5078 ; RV64ZVE32F-NEXT: slli a0, a0, 3
5079 ; RV64ZVE32F-NEXT: add a0, a1, a0
5080 ; RV64ZVE32F-NEXT: sd t1, 0(a0)
5081 ; RV64ZVE32F-NEXT: .LBB48_4: # %else2
5082 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
5083 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
5084 ; RV64ZVE32F-NEXT: andi a0, a5, 4
5085 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
5086 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
5087 ; RV64ZVE32F-NEXT: bnez a0, .LBB48_12
5088 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
5089 ; RV64ZVE32F-NEXT: andi a0, a5, 8
5090 ; RV64ZVE32F-NEXT: bnez a0, .LBB48_13
5091 ; RV64ZVE32F-NEXT: .LBB48_6: # %else6
5092 ; RV64ZVE32F-NEXT: andi a0, a5, 16
5093 ; RV64ZVE32F-NEXT: bnez a0, .LBB48_14
5094 ; RV64ZVE32F-NEXT: .LBB48_7: # %else8
5095 ; RV64ZVE32F-NEXT: andi a0, a5, 32
5096 ; RV64ZVE32F-NEXT: beqz a0, .LBB48_9
5097 ; RV64ZVE32F-NEXT: .LBB48_8: # %cond.store9
5098 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
5099 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
5100 ; RV64ZVE32F-NEXT: slli a0, a0, 3
5101 ; RV64ZVE32F-NEXT: add a0, a1, a0
5102 ; RV64ZVE32F-NEXT: sd a4, 0(a0)
5103 ; RV64ZVE32F-NEXT: .LBB48_9: # %else10
5104 ; RV64ZVE32F-NEXT: andi a0, a5, 64
5105 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
5106 ; RV64ZVE32F-NEXT: bnez a0, .LBB48_15
5107 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
5108 ; RV64ZVE32F-NEXT: andi a0, a5, -128
5109 ; RV64ZVE32F-NEXT: bnez a0, .LBB48_16
5110 ; RV64ZVE32F-NEXT: .LBB48_11: # %else14
5111 ; RV64ZVE32F-NEXT: ret
5112 ; RV64ZVE32F-NEXT: .LBB48_12: # %cond.store3
5113 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
5114 ; RV64ZVE32F-NEXT: slli a0, a0, 3
5115 ; RV64ZVE32F-NEXT: add a0, a1, a0
5116 ; RV64ZVE32F-NEXT: sd t0, 0(a0)
5117 ; RV64ZVE32F-NEXT: andi a0, a5, 8
5118 ; RV64ZVE32F-NEXT: beqz a0, .LBB48_6
5119 ; RV64ZVE32F-NEXT: .LBB48_13: # %cond.store5
5120 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
5121 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
5122 ; RV64ZVE32F-NEXT: slli a0, a0, 3
5123 ; RV64ZVE32F-NEXT: add a0, a1, a0
5124 ; RV64ZVE32F-NEXT: sd a7, 0(a0)
5125 ; RV64ZVE32F-NEXT: andi a0, a5, 16
5126 ; RV64ZVE32F-NEXT: beqz a0, .LBB48_7
5127 ; RV64ZVE32F-NEXT: .LBB48_14: # %cond.store7
5128 ; RV64ZVE32F-NEXT: vmv.x.s a0, v10
5129 ; RV64ZVE32F-NEXT: slli a0, a0, 3
5130 ; RV64ZVE32F-NEXT: add a0, a1, a0
5131 ; RV64ZVE32F-NEXT: sd a6, 0(a0)
5132 ; RV64ZVE32F-NEXT: andi a0, a5, 32
5133 ; RV64ZVE32F-NEXT: bnez a0, .LBB48_8
5134 ; RV64ZVE32F-NEXT: j .LBB48_9
5135 ; RV64ZVE32F-NEXT: .LBB48_15: # %cond.store11
5136 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
5137 ; RV64ZVE32F-NEXT: slli a0, a0, 3
5138 ; RV64ZVE32F-NEXT: add a0, a1, a0
5139 ; RV64ZVE32F-NEXT: sd a3, 0(a0)
5140 ; RV64ZVE32F-NEXT: andi a0, a5, -128
5141 ; RV64ZVE32F-NEXT: beqz a0, .LBB48_11
5142 ; RV64ZVE32F-NEXT: .LBB48_16: # %cond.store13
5143 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
5144 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
5145 ; RV64ZVE32F-NEXT: slli a0, a0, 3
5146 ; RV64ZVE32F-NEXT: add a0, a1, a0
5147 ; RV64ZVE32F-NEXT: sd a2, 0(a0)
5148 ; RV64ZVE32F-NEXT: ret
5149 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i32> %idxs
5150 call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m)
5154 define void @mscatter_baseidx_sext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i32> %idxs, <8 x i1> %m) {
5155 ; RV32V-LABEL: mscatter_baseidx_sext_v8i32_v8i64:
5157 ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
5158 ; RV32V-NEXT: vsll.vi v12, v12, 3
5159 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
5160 ; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t
5163 ; RV64V-LABEL: mscatter_baseidx_sext_v8i32_v8i64:
5165 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
5166 ; RV64V-NEXT: vsext.vf2 v16, v12
5167 ; RV64V-NEXT: vsll.vi v12, v16, 3
5168 ; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t
5171 ; RV32ZVE32F-LABEL: mscatter_baseidx_sext_v8i32_v8i64:
5172 ; RV32ZVE32F: # %bb.0:
5173 ; RV32ZVE32F-NEXT: addi sp, sp, -16
5174 ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
5175 ; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
5176 ; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
5177 ; RV32ZVE32F-NEXT: sw s2, 4(sp) # 4-byte Folded Spill
5178 ; RV32ZVE32F-NEXT: .cfi_offset s0, -4
5179 ; RV32ZVE32F-NEXT: .cfi_offset s1, -8
5180 ; RV32ZVE32F-NEXT: .cfi_offset s2, -12
5181 ; RV32ZVE32F-NEXT: .cfi_remember_state
5182 ; RV32ZVE32F-NEXT: lw a2, 56(a0)
5183 ; RV32ZVE32F-NEXT: lw a3, 60(a0)
5184 ; RV32ZVE32F-NEXT: lw a6, 40(a0)
5185 ; RV32ZVE32F-NEXT: lw a7, 44(a0)
5186 ; RV32ZVE32F-NEXT: lw a4, 48(a0)
5187 ; RV32ZVE32F-NEXT: lw a5, 52(a0)
5188 ; RV32ZVE32F-NEXT: lw t3, 24(a0)
5189 ; RV32ZVE32F-NEXT: lw t4, 28(a0)
5190 ; RV32ZVE32F-NEXT: lw t1, 32(a0)
5191 ; RV32ZVE32F-NEXT: lw t2, 36(a0)
5192 ; RV32ZVE32F-NEXT: lw s0, 8(a0)
5193 ; RV32ZVE32F-NEXT: lw s1, 12(a0)
5194 ; RV32ZVE32F-NEXT: lw t5, 16(a0)
5195 ; RV32ZVE32F-NEXT: lw t6, 20(a0)
5196 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
5197 ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3
5198 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
5199 ; RV32ZVE32F-NEXT: vmv.x.s t0, v0
5200 ; RV32ZVE32F-NEXT: andi s2, t0, 1
5201 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
5202 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
5203 ; RV32ZVE32F-NEXT: bnez s2, .LBB49_10
5204 ; RV32ZVE32F-NEXT: # %bb.1: # %else
5205 ; RV32ZVE32F-NEXT: andi a0, t0, 2
5206 ; RV32ZVE32F-NEXT: bnez a0, .LBB49_11
5207 ; RV32ZVE32F-NEXT: .LBB49_2: # %else2
5208 ; RV32ZVE32F-NEXT: andi a0, t0, 4
5209 ; RV32ZVE32F-NEXT: bnez a0, .LBB49_12
5210 ; RV32ZVE32F-NEXT: .LBB49_3: # %else4
5211 ; RV32ZVE32F-NEXT: andi a0, t0, 8
5212 ; RV32ZVE32F-NEXT: bnez a0, .LBB49_13
5213 ; RV32ZVE32F-NEXT: .LBB49_4: # %else6
5214 ; RV32ZVE32F-NEXT: andi a0, t0, 16
5215 ; RV32ZVE32F-NEXT: bnez a0, .LBB49_14
5216 ; RV32ZVE32F-NEXT: .LBB49_5: # %else8
5217 ; RV32ZVE32F-NEXT: andi a0, t0, 32
5218 ; RV32ZVE32F-NEXT: bnez a0, .LBB49_15
5219 ; RV32ZVE32F-NEXT: .LBB49_6: # %else10
5220 ; RV32ZVE32F-NEXT: andi a0, t0, 64
5221 ; RV32ZVE32F-NEXT: bnez a0, .LBB49_16
5222 ; RV32ZVE32F-NEXT: .LBB49_7: # %else12
5223 ; RV32ZVE32F-NEXT: andi a0, t0, -128
5224 ; RV32ZVE32F-NEXT: beqz a0, .LBB49_9
5225 ; RV32ZVE32F-NEXT: .LBB49_8: # %cond.store13
5226 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
5227 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
5228 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
5229 ; RV32ZVE32F-NEXT: sw a2, 0(a0)
5230 ; RV32ZVE32F-NEXT: sw a3, 4(a0)
5231 ; RV32ZVE32F-NEXT: .LBB49_9: # %else14
5232 ; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
5233 ; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
5234 ; RV32ZVE32F-NEXT: lw s2, 4(sp) # 4-byte Folded Reload
5235 ; RV32ZVE32F-NEXT: .cfi_restore s0
5236 ; RV32ZVE32F-NEXT: .cfi_restore s1
5237 ; RV32ZVE32F-NEXT: .cfi_restore s2
5238 ; RV32ZVE32F-NEXT: addi sp, sp, 16
5239 ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 0
5240 ; RV32ZVE32F-NEXT: ret
5241 ; RV32ZVE32F-NEXT: .LBB49_10: # %cond.store
5242 ; RV32ZVE32F-NEXT: .cfi_restore_state
5243 ; RV32ZVE32F-NEXT: lw a1, 0(a0)
5244 ; RV32ZVE32F-NEXT: lw a0, 4(a0)
5245 ; RV32ZVE32F-NEXT: vmv.x.s s2, v8
5246 ; RV32ZVE32F-NEXT: sw a1, 0(s2)
5247 ; RV32ZVE32F-NEXT: sw a0, 4(s2)
5248 ; RV32ZVE32F-NEXT: andi a0, t0, 2
5249 ; RV32ZVE32F-NEXT: beqz a0, .LBB49_2
5250 ; RV32ZVE32F-NEXT: .LBB49_11: # %cond.store1
5251 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
5252 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
5253 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
5254 ; RV32ZVE32F-NEXT: sw s0, 0(a0)
5255 ; RV32ZVE32F-NEXT: sw s1, 4(a0)
5256 ; RV32ZVE32F-NEXT: andi a0, t0, 4
5257 ; RV32ZVE32F-NEXT: beqz a0, .LBB49_3
5258 ; RV32ZVE32F-NEXT: .LBB49_12: # %cond.store3
5259 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
5260 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
5261 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
5262 ; RV32ZVE32F-NEXT: sw t5, 0(a0)
5263 ; RV32ZVE32F-NEXT: sw t6, 4(a0)
5264 ; RV32ZVE32F-NEXT: andi a0, t0, 8
5265 ; RV32ZVE32F-NEXT: beqz a0, .LBB49_4
5266 ; RV32ZVE32F-NEXT: .LBB49_13: # %cond.store5
5267 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
5268 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
5269 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
5270 ; RV32ZVE32F-NEXT: sw t3, 0(a0)
5271 ; RV32ZVE32F-NEXT: sw t4, 4(a0)
5272 ; RV32ZVE32F-NEXT: andi a0, t0, 16
5273 ; RV32ZVE32F-NEXT: beqz a0, .LBB49_5
5274 ; RV32ZVE32F-NEXT: .LBB49_14: # %cond.store7
5275 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
5276 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
5277 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
5278 ; RV32ZVE32F-NEXT: sw t1, 0(a0)
5279 ; RV32ZVE32F-NEXT: sw t2, 4(a0)
5280 ; RV32ZVE32F-NEXT: andi a0, t0, 32
5281 ; RV32ZVE32F-NEXT: beqz a0, .LBB49_6
5282 ; RV32ZVE32F-NEXT: .LBB49_15: # %cond.store9
5283 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
5284 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
5285 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
5286 ; RV32ZVE32F-NEXT: sw a6, 0(a0)
5287 ; RV32ZVE32F-NEXT: sw a7, 4(a0)
5288 ; RV32ZVE32F-NEXT: andi a0, t0, 64
5289 ; RV32ZVE32F-NEXT: beqz a0, .LBB49_7
5290 ; RV32ZVE32F-NEXT: .LBB49_16: # %cond.store11
5291 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
5292 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
5293 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
5294 ; RV32ZVE32F-NEXT: sw a4, 0(a0)
5295 ; RV32ZVE32F-NEXT: sw a5, 4(a0)
5296 ; RV32ZVE32F-NEXT: andi a0, t0, -128
5297 ; RV32ZVE32F-NEXT: bnez a0, .LBB49_8
5298 ; RV32ZVE32F-NEXT: j .LBB49_9
5300 ; RV64ZVE32F-LABEL: mscatter_baseidx_sext_v8i32_v8i64:
5301 ; RV64ZVE32F: # %bb.0:
5302 ; RV64ZVE32F-NEXT: ld a4, 40(a0)
5303 ; RV64ZVE32F-NEXT: ld a3, 48(a0)
5304 ; RV64ZVE32F-NEXT: ld a2, 56(a0)
5305 ; RV64ZVE32F-NEXT: ld t1, 8(a0)
5306 ; RV64ZVE32F-NEXT: ld t0, 16(a0)
5307 ; RV64ZVE32F-NEXT: ld a7, 24(a0)
5308 ; RV64ZVE32F-NEXT: ld a6, 32(a0)
5309 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
5310 ; RV64ZVE32F-NEXT: vmv.x.s a5, v0
5311 ; RV64ZVE32F-NEXT: andi t2, a5, 1
5312 ; RV64ZVE32F-NEXT: beqz t2, .LBB49_2
5313 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
5314 ; RV64ZVE32F-NEXT: ld a0, 0(a0)
5315 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
5316 ; RV64ZVE32F-NEXT: vmv.x.s t2, v8
5317 ; RV64ZVE32F-NEXT: slli t2, t2, 3
5318 ; RV64ZVE32F-NEXT: add t2, a1, t2
5319 ; RV64ZVE32F-NEXT: sd a0, 0(t2)
5320 ; RV64ZVE32F-NEXT: .LBB49_2: # %else
5321 ; RV64ZVE32F-NEXT: andi a0, a5, 2
5322 ; RV64ZVE32F-NEXT: beqz a0, .LBB49_4
5323 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
5324 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
5325 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
5326 ; RV64ZVE32F-NEXT: vmv.x.s a0, v10
5327 ; RV64ZVE32F-NEXT: slli a0, a0, 3
5328 ; RV64ZVE32F-NEXT: add a0, a1, a0
5329 ; RV64ZVE32F-NEXT: sd t1, 0(a0)
5330 ; RV64ZVE32F-NEXT: .LBB49_4: # %else2
5331 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
5332 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
5333 ; RV64ZVE32F-NEXT: andi a0, a5, 4
5334 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
5335 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
5336 ; RV64ZVE32F-NEXT: bnez a0, .LBB49_12
5337 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
5338 ; RV64ZVE32F-NEXT: andi a0, a5, 8
5339 ; RV64ZVE32F-NEXT: bnez a0, .LBB49_13
5340 ; RV64ZVE32F-NEXT: .LBB49_6: # %else6
5341 ; RV64ZVE32F-NEXT: andi a0, a5, 16
5342 ; RV64ZVE32F-NEXT: bnez a0, .LBB49_14
5343 ; RV64ZVE32F-NEXT: .LBB49_7: # %else8
5344 ; RV64ZVE32F-NEXT: andi a0, a5, 32
5345 ; RV64ZVE32F-NEXT: beqz a0, .LBB49_9
5346 ; RV64ZVE32F-NEXT: .LBB49_8: # %cond.store9
5347 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
5348 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
5349 ; RV64ZVE32F-NEXT: slli a0, a0, 3
5350 ; RV64ZVE32F-NEXT: add a0, a1, a0
5351 ; RV64ZVE32F-NEXT: sd a4, 0(a0)
5352 ; RV64ZVE32F-NEXT: .LBB49_9: # %else10
5353 ; RV64ZVE32F-NEXT: andi a0, a5, 64
5354 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
5355 ; RV64ZVE32F-NEXT: bnez a0, .LBB49_15
5356 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
5357 ; RV64ZVE32F-NEXT: andi a0, a5, -128
5358 ; RV64ZVE32F-NEXT: bnez a0, .LBB49_16
5359 ; RV64ZVE32F-NEXT: .LBB49_11: # %else14
5360 ; RV64ZVE32F-NEXT: ret
5361 ; RV64ZVE32F-NEXT: .LBB49_12: # %cond.store3
5362 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
5363 ; RV64ZVE32F-NEXT: slli a0, a0, 3
5364 ; RV64ZVE32F-NEXT: add a0, a1, a0
5365 ; RV64ZVE32F-NEXT: sd t0, 0(a0)
5366 ; RV64ZVE32F-NEXT: andi a0, a5, 8
5367 ; RV64ZVE32F-NEXT: beqz a0, .LBB49_6
5368 ; RV64ZVE32F-NEXT: .LBB49_13: # %cond.store5
5369 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
5370 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
5371 ; RV64ZVE32F-NEXT: slli a0, a0, 3
5372 ; RV64ZVE32F-NEXT: add a0, a1, a0
5373 ; RV64ZVE32F-NEXT: sd a7, 0(a0)
5374 ; RV64ZVE32F-NEXT: andi a0, a5, 16
5375 ; RV64ZVE32F-NEXT: beqz a0, .LBB49_7
5376 ; RV64ZVE32F-NEXT: .LBB49_14: # %cond.store7
5377 ; RV64ZVE32F-NEXT: vmv.x.s a0, v10
5378 ; RV64ZVE32F-NEXT: slli a0, a0, 3
5379 ; RV64ZVE32F-NEXT: add a0, a1, a0
5380 ; RV64ZVE32F-NEXT: sd a6, 0(a0)
5381 ; RV64ZVE32F-NEXT: andi a0, a5, 32
5382 ; RV64ZVE32F-NEXT: bnez a0, .LBB49_8
5383 ; RV64ZVE32F-NEXT: j .LBB49_9
5384 ; RV64ZVE32F-NEXT: .LBB49_15: # %cond.store11
5385 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
5386 ; RV64ZVE32F-NEXT: slli a0, a0, 3
5387 ; RV64ZVE32F-NEXT: add a0, a1, a0
5388 ; RV64ZVE32F-NEXT: sd a3, 0(a0)
5389 ; RV64ZVE32F-NEXT: andi a0, a5, -128
5390 ; RV64ZVE32F-NEXT: beqz a0, .LBB49_11
5391 ; RV64ZVE32F-NEXT: .LBB49_16: # %cond.store13
5392 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
5393 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
5394 ; RV64ZVE32F-NEXT: slli a0, a0, 3
5395 ; RV64ZVE32F-NEXT: add a0, a1, a0
5396 ; RV64ZVE32F-NEXT: sd a2, 0(a0)
5397 ; RV64ZVE32F-NEXT: ret
5398 %eidxs = sext <8 x i32> %idxs to <8 x i64>
5399 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs
5400 call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m)
5404 define void @mscatter_baseidx_zext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i32> %idxs, <8 x i1> %m) {
5405 ; RV32V-LABEL: mscatter_baseidx_zext_v8i32_v8i64:
5407 ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
5408 ; RV32V-NEXT: vsll.vi v12, v12, 3
5409 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
5410 ; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t
5413 ; RV64V-LABEL: mscatter_baseidx_zext_v8i32_v8i64:
5415 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
5416 ; RV64V-NEXT: vzext.vf2 v16, v12
5417 ; RV64V-NEXT: vsll.vi v12, v16, 3
5418 ; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t
5421 ; RV32ZVE32F-LABEL: mscatter_baseidx_zext_v8i32_v8i64:
5422 ; RV32ZVE32F: # %bb.0:
5423 ; RV32ZVE32F-NEXT: addi sp, sp, -16
5424 ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
5425 ; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
5426 ; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
5427 ; RV32ZVE32F-NEXT: sw s2, 4(sp) # 4-byte Folded Spill
5428 ; RV32ZVE32F-NEXT: .cfi_offset s0, -4
5429 ; RV32ZVE32F-NEXT: .cfi_offset s1, -8
5430 ; RV32ZVE32F-NEXT: .cfi_offset s2, -12
5431 ; RV32ZVE32F-NEXT: .cfi_remember_state
5432 ; RV32ZVE32F-NEXT: lw a2, 56(a0)
5433 ; RV32ZVE32F-NEXT: lw a3, 60(a0)
5434 ; RV32ZVE32F-NEXT: lw a6, 40(a0)
5435 ; RV32ZVE32F-NEXT: lw a7, 44(a0)
5436 ; RV32ZVE32F-NEXT: lw a4, 48(a0)
5437 ; RV32ZVE32F-NEXT: lw a5, 52(a0)
5438 ; RV32ZVE32F-NEXT: lw t3, 24(a0)
5439 ; RV32ZVE32F-NEXT: lw t4, 28(a0)
5440 ; RV32ZVE32F-NEXT: lw t1, 32(a0)
5441 ; RV32ZVE32F-NEXT: lw t2, 36(a0)
5442 ; RV32ZVE32F-NEXT: lw s0, 8(a0)
5443 ; RV32ZVE32F-NEXT: lw s1, 12(a0)
5444 ; RV32ZVE32F-NEXT: lw t5, 16(a0)
5445 ; RV32ZVE32F-NEXT: lw t6, 20(a0)
5446 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
5447 ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3
5448 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
5449 ; RV32ZVE32F-NEXT: vmv.x.s t0, v0
5450 ; RV32ZVE32F-NEXT: andi s2, t0, 1
5451 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
5452 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
5453 ; RV32ZVE32F-NEXT: bnez s2, .LBB50_10
5454 ; RV32ZVE32F-NEXT: # %bb.1: # %else
5455 ; RV32ZVE32F-NEXT: andi a0, t0, 2
5456 ; RV32ZVE32F-NEXT: bnez a0, .LBB50_11
5457 ; RV32ZVE32F-NEXT: .LBB50_2: # %else2
5458 ; RV32ZVE32F-NEXT: andi a0, t0, 4
5459 ; RV32ZVE32F-NEXT: bnez a0, .LBB50_12
5460 ; RV32ZVE32F-NEXT: .LBB50_3: # %else4
5461 ; RV32ZVE32F-NEXT: andi a0, t0, 8
5462 ; RV32ZVE32F-NEXT: bnez a0, .LBB50_13
5463 ; RV32ZVE32F-NEXT: .LBB50_4: # %else6
5464 ; RV32ZVE32F-NEXT: andi a0, t0, 16
5465 ; RV32ZVE32F-NEXT: bnez a0, .LBB50_14
5466 ; RV32ZVE32F-NEXT: .LBB50_5: # %else8
5467 ; RV32ZVE32F-NEXT: andi a0, t0, 32
5468 ; RV32ZVE32F-NEXT: bnez a0, .LBB50_15
5469 ; RV32ZVE32F-NEXT: .LBB50_6: # %else10
5470 ; RV32ZVE32F-NEXT: andi a0, t0, 64
5471 ; RV32ZVE32F-NEXT: bnez a0, .LBB50_16
5472 ; RV32ZVE32F-NEXT: .LBB50_7: # %else12
5473 ; RV32ZVE32F-NEXT: andi a0, t0, -128
5474 ; RV32ZVE32F-NEXT: beqz a0, .LBB50_9
5475 ; RV32ZVE32F-NEXT: .LBB50_8: # %cond.store13
5476 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
5477 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
5478 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
5479 ; RV32ZVE32F-NEXT: sw a2, 0(a0)
5480 ; RV32ZVE32F-NEXT: sw a3, 4(a0)
5481 ; RV32ZVE32F-NEXT: .LBB50_9: # %else14
5482 ; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
5483 ; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
5484 ; RV32ZVE32F-NEXT: lw s2, 4(sp) # 4-byte Folded Reload
5485 ; RV32ZVE32F-NEXT: .cfi_restore s0
5486 ; RV32ZVE32F-NEXT: .cfi_restore s1
5487 ; RV32ZVE32F-NEXT: .cfi_restore s2
5488 ; RV32ZVE32F-NEXT: addi sp, sp, 16
5489 ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 0
5490 ; RV32ZVE32F-NEXT: ret
5491 ; RV32ZVE32F-NEXT: .LBB50_10: # %cond.store
5492 ; RV32ZVE32F-NEXT: .cfi_restore_state
5493 ; RV32ZVE32F-NEXT: lw a1, 0(a0)
5494 ; RV32ZVE32F-NEXT: lw a0, 4(a0)
5495 ; RV32ZVE32F-NEXT: vmv.x.s s2, v8
5496 ; RV32ZVE32F-NEXT: sw a1, 0(s2)
5497 ; RV32ZVE32F-NEXT: sw a0, 4(s2)
5498 ; RV32ZVE32F-NEXT: andi a0, t0, 2
5499 ; RV32ZVE32F-NEXT: beqz a0, .LBB50_2
5500 ; RV32ZVE32F-NEXT: .LBB50_11: # %cond.store1
5501 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
5502 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
5503 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
5504 ; RV32ZVE32F-NEXT: sw s0, 0(a0)
5505 ; RV32ZVE32F-NEXT: sw s1, 4(a0)
5506 ; RV32ZVE32F-NEXT: andi a0, t0, 4
5507 ; RV32ZVE32F-NEXT: beqz a0, .LBB50_3
5508 ; RV32ZVE32F-NEXT: .LBB50_12: # %cond.store3
5509 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
5510 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
5511 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
5512 ; RV32ZVE32F-NEXT: sw t5, 0(a0)
5513 ; RV32ZVE32F-NEXT: sw t6, 4(a0)
5514 ; RV32ZVE32F-NEXT: andi a0, t0, 8
5515 ; RV32ZVE32F-NEXT: beqz a0, .LBB50_4
5516 ; RV32ZVE32F-NEXT: .LBB50_13: # %cond.store5
5517 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
5518 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
5519 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
5520 ; RV32ZVE32F-NEXT: sw t3, 0(a0)
5521 ; RV32ZVE32F-NEXT: sw t4, 4(a0)
5522 ; RV32ZVE32F-NEXT: andi a0, t0, 16
5523 ; RV32ZVE32F-NEXT: beqz a0, .LBB50_5
5524 ; RV32ZVE32F-NEXT: .LBB50_14: # %cond.store7
5525 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
5526 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
5527 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
5528 ; RV32ZVE32F-NEXT: sw t1, 0(a0)
5529 ; RV32ZVE32F-NEXT: sw t2, 4(a0)
5530 ; RV32ZVE32F-NEXT: andi a0, t0, 32
5531 ; RV32ZVE32F-NEXT: beqz a0, .LBB50_6
5532 ; RV32ZVE32F-NEXT: .LBB50_15: # %cond.store9
5533 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
5534 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
5535 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
5536 ; RV32ZVE32F-NEXT: sw a6, 0(a0)
5537 ; RV32ZVE32F-NEXT: sw a7, 4(a0)
5538 ; RV32ZVE32F-NEXT: andi a0, t0, 64
5539 ; RV32ZVE32F-NEXT: beqz a0, .LBB50_7
5540 ; RV32ZVE32F-NEXT: .LBB50_16: # %cond.store11
5541 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
5542 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
5543 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
5544 ; RV32ZVE32F-NEXT: sw a4, 0(a0)
5545 ; RV32ZVE32F-NEXT: sw a5, 4(a0)
5546 ; RV32ZVE32F-NEXT: andi a0, t0, -128
5547 ; RV32ZVE32F-NEXT: bnez a0, .LBB50_8
5548 ; RV32ZVE32F-NEXT: j .LBB50_9
5550 ; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i32_v8i64:
5551 ; RV64ZVE32F: # %bb.0:
5552 ; RV64ZVE32F-NEXT: ld a4, 40(a0)
5553 ; RV64ZVE32F-NEXT: ld a3, 48(a0)
5554 ; RV64ZVE32F-NEXT: ld a2, 56(a0)
5555 ; RV64ZVE32F-NEXT: ld t1, 8(a0)
5556 ; RV64ZVE32F-NEXT: ld t0, 16(a0)
5557 ; RV64ZVE32F-NEXT: ld a7, 24(a0)
5558 ; RV64ZVE32F-NEXT: ld a6, 32(a0)
5559 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
5560 ; RV64ZVE32F-NEXT: vmv.x.s a5, v0
5561 ; RV64ZVE32F-NEXT: andi t2, a5, 1
5562 ; RV64ZVE32F-NEXT: beqz t2, .LBB50_2
5563 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
5564 ; RV64ZVE32F-NEXT: ld a0, 0(a0)
5565 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
5566 ; RV64ZVE32F-NEXT: vmv.x.s t2, v8
5567 ; RV64ZVE32F-NEXT: slli t2, t2, 32
5568 ; RV64ZVE32F-NEXT: srli t2, t2, 29
5569 ; RV64ZVE32F-NEXT: add t2, a1, t2
5570 ; RV64ZVE32F-NEXT: sd a0, 0(t2)
5571 ; RV64ZVE32F-NEXT: .LBB50_2: # %else
5572 ; RV64ZVE32F-NEXT: andi a0, a5, 2
5573 ; RV64ZVE32F-NEXT: beqz a0, .LBB50_4
5574 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
5575 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
5576 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
5577 ; RV64ZVE32F-NEXT: vmv.x.s a0, v10
5578 ; RV64ZVE32F-NEXT: slli a0, a0, 32
5579 ; RV64ZVE32F-NEXT: srli a0, a0, 29
5580 ; RV64ZVE32F-NEXT: add a0, a1, a0
5581 ; RV64ZVE32F-NEXT: sd t1, 0(a0)
5582 ; RV64ZVE32F-NEXT: .LBB50_4: # %else2
5583 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
5584 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
5585 ; RV64ZVE32F-NEXT: andi a0, a5, 4
5586 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
5587 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
5588 ; RV64ZVE32F-NEXT: bnez a0, .LBB50_12
5589 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
5590 ; RV64ZVE32F-NEXT: andi a0, a5, 8
5591 ; RV64ZVE32F-NEXT: bnez a0, .LBB50_13
5592 ; RV64ZVE32F-NEXT: .LBB50_6: # %else6
5593 ; RV64ZVE32F-NEXT: andi a0, a5, 16
5594 ; RV64ZVE32F-NEXT: bnez a0, .LBB50_14
5595 ; RV64ZVE32F-NEXT: .LBB50_7: # %else8
5596 ; RV64ZVE32F-NEXT: andi a0, a5, 32
5597 ; RV64ZVE32F-NEXT: beqz a0, .LBB50_9
5598 ; RV64ZVE32F-NEXT: .LBB50_8: # %cond.store9
5599 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
5600 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
5601 ; RV64ZVE32F-NEXT: slli a0, a0, 32
5602 ; RV64ZVE32F-NEXT: srli a0, a0, 29
5603 ; RV64ZVE32F-NEXT: add a0, a1, a0
5604 ; RV64ZVE32F-NEXT: sd a4, 0(a0)
5605 ; RV64ZVE32F-NEXT: .LBB50_9: # %else10
5606 ; RV64ZVE32F-NEXT: andi a0, a5, 64
5607 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
5608 ; RV64ZVE32F-NEXT: bnez a0, .LBB50_15
5609 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
5610 ; RV64ZVE32F-NEXT: andi a0, a5, -128
5611 ; RV64ZVE32F-NEXT: bnez a0, .LBB50_16
5612 ; RV64ZVE32F-NEXT: .LBB50_11: # %else14
5613 ; RV64ZVE32F-NEXT: ret
5614 ; RV64ZVE32F-NEXT: .LBB50_12: # %cond.store3
5615 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
5616 ; RV64ZVE32F-NEXT: slli a0, a0, 32
5617 ; RV64ZVE32F-NEXT: srli a0, a0, 29
5618 ; RV64ZVE32F-NEXT: add a0, a1, a0
5619 ; RV64ZVE32F-NEXT: sd t0, 0(a0)
5620 ; RV64ZVE32F-NEXT: andi a0, a5, 8
5621 ; RV64ZVE32F-NEXT: beqz a0, .LBB50_6
5622 ; RV64ZVE32F-NEXT: .LBB50_13: # %cond.store5
5623 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
5624 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
5625 ; RV64ZVE32F-NEXT: slli a0, a0, 32
5626 ; RV64ZVE32F-NEXT: srli a0, a0, 29
5627 ; RV64ZVE32F-NEXT: add a0, a1, a0
5628 ; RV64ZVE32F-NEXT: sd a7, 0(a0)
5629 ; RV64ZVE32F-NEXT: andi a0, a5, 16
5630 ; RV64ZVE32F-NEXT: beqz a0, .LBB50_7
5631 ; RV64ZVE32F-NEXT: .LBB50_14: # %cond.store7
5632 ; RV64ZVE32F-NEXT: vmv.x.s a0, v10
5633 ; RV64ZVE32F-NEXT: slli a0, a0, 32
5634 ; RV64ZVE32F-NEXT: srli a0, a0, 29
5635 ; RV64ZVE32F-NEXT: add a0, a1, a0
5636 ; RV64ZVE32F-NEXT: sd a6, 0(a0)
5637 ; RV64ZVE32F-NEXT: andi a0, a5, 32
5638 ; RV64ZVE32F-NEXT: bnez a0, .LBB50_8
5639 ; RV64ZVE32F-NEXT: j .LBB50_9
5640 ; RV64ZVE32F-NEXT: .LBB50_15: # %cond.store11
5641 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
5642 ; RV64ZVE32F-NEXT: slli a0, a0, 32
5643 ; RV64ZVE32F-NEXT: srli a0, a0, 29
5644 ; RV64ZVE32F-NEXT: add a0, a1, a0
5645 ; RV64ZVE32F-NEXT: sd a3, 0(a0)
5646 ; RV64ZVE32F-NEXT: andi a0, a5, -128
5647 ; RV64ZVE32F-NEXT: beqz a0, .LBB50_11
5648 ; RV64ZVE32F-NEXT: .LBB50_16: # %cond.store13
5649 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
5650 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
5651 ; RV64ZVE32F-NEXT: slli a0, a0, 32
5652 ; RV64ZVE32F-NEXT: srli a0, a0, 29
5653 ; RV64ZVE32F-NEXT: add a0, a1, a0
5654 ; RV64ZVE32F-NEXT: sd a2, 0(a0)
5655 ; RV64ZVE32F-NEXT: ret
5656 %eidxs = zext <8 x i32> %idxs to <8 x i64>
5657 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs
5658 call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m)
5662 define void @mscatter_baseidx_v8i64(<8 x i64> %val, ptr %base, <8 x i64> %idxs, <8 x i1> %m) {
5663 ; RV32V-LABEL: mscatter_baseidx_v8i64:
5665 ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
5666 ; RV32V-NEXT: vnsrl.wi v16, v12, 0
5667 ; RV32V-NEXT: vsll.vi v12, v16, 3
5668 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
5669 ; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t
5672 ; RV64V-LABEL: mscatter_baseidx_v8i64:
5674 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
5675 ; RV64V-NEXT: vsll.vi v12, v12, 3
5676 ; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t
5679 ; RV32ZVE32F-LABEL: mscatter_baseidx_v8i64:
5680 ; RV32ZVE32F: # %bb.0:
5681 ; RV32ZVE32F-NEXT: addi sp, sp, -48
5682 ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 48
5683 ; RV32ZVE32F-NEXT: sw s0, 44(sp) # 4-byte Folded Spill
5684 ; RV32ZVE32F-NEXT: sw s1, 40(sp) # 4-byte Folded Spill
5685 ; RV32ZVE32F-NEXT: sw s2, 36(sp) # 4-byte Folded Spill
5686 ; RV32ZVE32F-NEXT: sw s3, 32(sp) # 4-byte Folded Spill
5687 ; RV32ZVE32F-NEXT: sw s4, 28(sp) # 4-byte Folded Spill
5688 ; RV32ZVE32F-NEXT: sw s5, 24(sp) # 4-byte Folded Spill
5689 ; RV32ZVE32F-NEXT: sw s6, 20(sp) # 4-byte Folded Spill
5690 ; RV32ZVE32F-NEXT: sw s7, 16(sp) # 4-byte Folded Spill
5691 ; RV32ZVE32F-NEXT: sw s8, 12(sp) # 4-byte Folded Spill
5692 ; RV32ZVE32F-NEXT: sw s9, 8(sp) # 4-byte Folded Spill
5693 ; RV32ZVE32F-NEXT: .cfi_offset s0, -4
5694 ; RV32ZVE32F-NEXT: .cfi_offset s1, -8
5695 ; RV32ZVE32F-NEXT: .cfi_offset s2, -12
5696 ; RV32ZVE32F-NEXT: .cfi_offset s3, -16
5697 ; RV32ZVE32F-NEXT: .cfi_offset s4, -20
5698 ; RV32ZVE32F-NEXT: .cfi_offset s5, -24
5699 ; RV32ZVE32F-NEXT: .cfi_offset s6, -28
5700 ; RV32ZVE32F-NEXT: .cfi_offset s7, -32
5701 ; RV32ZVE32F-NEXT: .cfi_offset s8, -36
5702 ; RV32ZVE32F-NEXT: .cfi_offset s9, -40
5703 ; RV32ZVE32F-NEXT: .cfi_remember_state
5704 ; RV32ZVE32F-NEXT: lw a3, 56(a0)
5705 ; RV32ZVE32F-NEXT: lw a4, 60(a0)
5706 ; RV32ZVE32F-NEXT: lw a7, 40(a0)
5707 ; RV32ZVE32F-NEXT: lw t0, 44(a0)
5708 ; RV32ZVE32F-NEXT: lw a5, 48(a0)
5709 ; RV32ZVE32F-NEXT: lw a6, 52(a0)
5710 ; RV32ZVE32F-NEXT: lw t3, 24(a0)
5711 ; RV32ZVE32F-NEXT: lw t4, 28(a0)
5712 ; RV32ZVE32F-NEXT: lw t1, 32(a0)
5713 ; RV32ZVE32F-NEXT: lw t2, 36(a0)
5714 ; RV32ZVE32F-NEXT: lw s0, 8(a0)
5715 ; RV32ZVE32F-NEXT: lw s1, 12(a0)
5716 ; RV32ZVE32F-NEXT: lw t5, 16(a0)
5717 ; RV32ZVE32F-NEXT: lw t6, 20(a0)
5718 ; RV32ZVE32F-NEXT: lw s2, 32(a2)
5719 ; RV32ZVE32F-NEXT: lw s3, 40(a2)
5720 ; RV32ZVE32F-NEXT: lw s4, 48(a2)
5721 ; RV32ZVE32F-NEXT: lw s5, 56(a2)
5722 ; RV32ZVE32F-NEXT: lw s6, 0(a2)
5723 ; RV32ZVE32F-NEXT: lw s7, 8(a2)
5724 ; RV32ZVE32F-NEXT: lw s8, 16(a2)
5725 ; RV32ZVE32F-NEXT: lw s9, 24(a2)
5726 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
5727 ; RV32ZVE32F-NEXT: vmv.v.x v8, s6
5728 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
5729 ; RV32ZVE32F-NEXT: vmv.x.s a2, v0
5730 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
5731 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, s7
5732 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, s8
5733 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, s9
5734 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, s2
5735 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, s3
5736 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, s4
5737 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, s5
5738 ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3
5739 ; RV32ZVE32F-NEXT: andi s2, a2, 1
5740 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
5741 ; RV32ZVE32F-NEXT: bnez s2, .LBB51_10
5742 ; RV32ZVE32F-NEXT: # %bb.1: # %else
5743 ; RV32ZVE32F-NEXT: andi a0, a2, 2
5744 ; RV32ZVE32F-NEXT: bnez a0, .LBB51_11
5745 ; RV32ZVE32F-NEXT: .LBB51_2: # %else2
5746 ; RV32ZVE32F-NEXT: andi a0, a2, 4
5747 ; RV32ZVE32F-NEXT: bnez a0, .LBB51_12
5748 ; RV32ZVE32F-NEXT: .LBB51_3: # %else4
5749 ; RV32ZVE32F-NEXT: andi a0, a2, 8
5750 ; RV32ZVE32F-NEXT: bnez a0, .LBB51_13
5751 ; RV32ZVE32F-NEXT: .LBB51_4: # %else6
5752 ; RV32ZVE32F-NEXT: andi a0, a2, 16
5753 ; RV32ZVE32F-NEXT: bnez a0, .LBB51_14
5754 ; RV32ZVE32F-NEXT: .LBB51_5: # %else8
5755 ; RV32ZVE32F-NEXT: andi a0, a2, 32
5756 ; RV32ZVE32F-NEXT: bnez a0, .LBB51_15
5757 ; RV32ZVE32F-NEXT: .LBB51_6: # %else10
5758 ; RV32ZVE32F-NEXT: andi a0, a2, 64
5759 ; RV32ZVE32F-NEXT: bnez a0, .LBB51_16
5760 ; RV32ZVE32F-NEXT: .LBB51_7: # %else12
5761 ; RV32ZVE32F-NEXT: andi a0, a2, -128
5762 ; RV32ZVE32F-NEXT: beqz a0, .LBB51_9
5763 ; RV32ZVE32F-NEXT: .LBB51_8: # %cond.store13
5764 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
5765 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
5766 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
5767 ; RV32ZVE32F-NEXT: sw a3, 0(a0)
5768 ; RV32ZVE32F-NEXT: sw a4, 4(a0)
5769 ; RV32ZVE32F-NEXT: .LBB51_9: # %else14
5770 ; RV32ZVE32F-NEXT: lw s0, 44(sp) # 4-byte Folded Reload
5771 ; RV32ZVE32F-NEXT: lw s1, 40(sp) # 4-byte Folded Reload
5772 ; RV32ZVE32F-NEXT: lw s2, 36(sp) # 4-byte Folded Reload
5773 ; RV32ZVE32F-NEXT: lw s3, 32(sp) # 4-byte Folded Reload
5774 ; RV32ZVE32F-NEXT: lw s4, 28(sp) # 4-byte Folded Reload
5775 ; RV32ZVE32F-NEXT: lw s5, 24(sp) # 4-byte Folded Reload
5776 ; RV32ZVE32F-NEXT: lw s6, 20(sp) # 4-byte Folded Reload
5777 ; RV32ZVE32F-NEXT: lw s7, 16(sp) # 4-byte Folded Reload
5778 ; RV32ZVE32F-NEXT: lw s8, 12(sp) # 4-byte Folded Reload
5779 ; RV32ZVE32F-NEXT: lw s9, 8(sp) # 4-byte Folded Reload
5780 ; RV32ZVE32F-NEXT: .cfi_restore s0
5781 ; RV32ZVE32F-NEXT: .cfi_restore s1
5782 ; RV32ZVE32F-NEXT: .cfi_restore s2
5783 ; RV32ZVE32F-NEXT: .cfi_restore s3
5784 ; RV32ZVE32F-NEXT: .cfi_restore s4
5785 ; RV32ZVE32F-NEXT: .cfi_restore s5
5786 ; RV32ZVE32F-NEXT: .cfi_restore s6
5787 ; RV32ZVE32F-NEXT: .cfi_restore s7
5788 ; RV32ZVE32F-NEXT: .cfi_restore s8
5789 ; RV32ZVE32F-NEXT: .cfi_restore s9
5790 ; RV32ZVE32F-NEXT: addi sp, sp, 48
5791 ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 0
5792 ; RV32ZVE32F-NEXT: ret
5793 ; RV32ZVE32F-NEXT: .LBB51_10: # %cond.store
5794 ; RV32ZVE32F-NEXT: .cfi_restore_state
5795 ; RV32ZVE32F-NEXT: lw a1, 0(a0)
5796 ; RV32ZVE32F-NEXT: lw a0, 4(a0)
5797 ; RV32ZVE32F-NEXT: vmv.x.s s2, v8
5798 ; RV32ZVE32F-NEXT: sw a1, 0(s2)
5799 ; RV32ZVE32F-NEXT: sw a0, 4(s2)
5800 ; RV32ZVE32F-NEXT: andi a0, a2, 2
5801 ; RV32ZVE32F-NEXT: beqz a0, .LBB51_2
5802 ; RV32ZVE32F-NEXT: .LBB51_11: # %cond.store1
5803 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
5804 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
5805 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
5806 ; RV32ZVE32F-NEXT: sw s0, 0(a0)
5807 ; RV32ZVE32F-NEXT: sw s1, 4(a0)
5808 ; RV32ZVE32F-NEXT: andi a0, a2, 4
5809 ; RV32ZVE32F-NEXT: beqz a0, .LBB51_3
5810 ; RV32ZVE32F-NEXT: .LBB51_12: # %cond.store3
5811 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
5812 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
5813 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
5814 ; RV32ZVE32F-NEXT: sw t5, 0(a0)
5815 ; RV32ZVE32F-NEXT: sw t6, 4(a0)
5816 ; RV32ZVE32F-NEXT: andi a0, a2, 8
5817 ; RV32ZVE32F-NEXT: beqz a0, .LBB51_4
5818 ; RV32ZVE32F-NEXT: .LBB51_13: # %cond.store5
5819 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
5820 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
5821 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
5822 ; RV32ZVE32F-NEXT: sw t3, 0(a0)
5823 ; RV32ZVE32F-NEXT: sw t4, 4(a0)
5824 ; RV32ZVE32F-NEXT: andi a0, a2, 16
5825 ; RV32ZVE32F-NEXT: beqz a0, .LBB51_5
5826 ; RV32ZVE32F-NEXT: .LBB51_14: # %cond.store7
5827 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
5828 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
5829 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
5830 ; RV32ZVE32F-NEXT: sw t1, 0(a0)
5831 ; RV32ZVE32F-NEXT: sw t2, 4(a0)
5832 ; RV32ZVE32F-NEXT: andi a0, a2, 32
5833 ; RV32ZVE32F-NEXT: beqz a0, .LBB51_6
5834 ; RV32ZVE32F-NEXT: .LBB51_15: # %cond.store9
5835 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
5836 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
5837 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
5838 ; RV32ZVE32F-NEXT: sw a7, 0(a0)
5839 ; RV32ZVE32F-NEXT: sw t0, 4(a0)
5840 ; RV32ZVE32F-NEXT: andi a0, a2, 64
5841 ; RV32ZVE32F-NEXT: beqz a0, .LBB51_7
5842 ; RV32ZVE32F-NEXT: .LBB51_16: # %cond.store11
5843 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
5844 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
5845 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
5846 ; RV32ZVE32F-NEXT: sw a5, 0(a0)
5847 ; RV32ZVE32F-NEXT: sw a6, 4(a0)
5848 ; RV32ZVE32F-NEXT: andi a0, a2, -128
5849 ; RV32ZVE32F-NEXT: bnez a0, .LBB51_8
5850 ; RV32ZVE32F-NEXT: j .LBB51_9
5852 ; RV64ZVE32F-LABEL: mscatter_baseidx_v8i64:
5853 ; RV64ZVE32F: # %bb.0:
5854 ; RV64ZVE32F-NEXT: addi sp, sp, -32
5855 ; RV64ZVE32F-NEXT: .cfi_def_cfa_offset 32
5856 ; RV64ZVE32F-NEXT: sd s0, 24(sp) # 8-byte Folded Spill
5857 ; RV64ZVE32F-NEXT: sd s1, 16(sp) # 8-byte Folded Spill
5858 ; RV64ZVE32F-NEXT: sd s2, 8(sp) # 8-byte Folded Spill
5859 ; RV64ZVE32F-NEXT: sd s3, 0(sp) # 8-byte Folded Spill
5860 ; RV64ZVE32F-NEXT: .cfi_offset s0, -8
5861 ; RV64ZVE32F-NEXT: .cfi_offset s1, -16
5862 ; RV64ZVE32F-NEXT: .cfi_offset s2, -24
5863 ; RV64ZVE32F-NEXT: .cfi_offset s3, -32
5864 ; RV64ZVE32F-NEXT: .cfi_remember_state
5865 ; RV64ZVE32F-NEXT: ld a5, 40(a0)
5866 ; RV64ZVE32F-NEXT: ld a4, 48(a0)
5867 ; RV64ZVE32F-NEXT: ld a3, 56(a0)
5868 ; RV64ZVE32F-NEXT: ld s0, 8(a0)
5869 ; RV64ZVE32F-NEXT: ld t5, 16(a0)
5870 ; RV64ZVE32F-NEXT: ld t3, 24(a0)
5871 ; RV64ZVE32F-NEXT: ld t1, 32(a0)
5872 ; RV64ZVE32F-NEXT: ld s2, 8(a2)
5873 ; RV64ZVE32F-NEXT: ld s1, 16(a2)
5874 ; RV64ZVE32F-NEXT: ld t6, 24(a2)
5875 ; RV64ZVE32F-NEXT: ld t4, 32(a2)
5876 ; RV64ZVE32F-NEXT: ld t2, 40(a2)
5877 ; RV64ZVE32F-NEXT: ld t0, 48(a2)
5878 ; RV64ZVE32F-NEXT: ld a6, 56(a2)
5879 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
5880 ; RV64ZVE32F-NEXT: vmv.x.s a7, v0
5881 ; RV64ZVE32F-NEXT: andi s3, a7, 1
5882 ; RV64ZVE32F-NEXT: bnez s3, .LBB51_10
5883 ; RV64ZVE32F-NEXT: # %bb.1: # %else
5884 ; RV64ZVE32F-NEXT: andi a0, a7, 2
5885 ; RV64ZVE32F-NEXT: bnez a0, .LBB51_11
5886 ; RV64ZVE32F-NEXT: .LBB51_2: # %else2
5887 ; RV64ZVE32F-NEXT: andi a0, a7, 4
5888 ; RV64ZVE32F-NEXT: bnez a0, .LBB51_12
5889 ; RV64ZVE32F-NEXT: .LBB51_3: # %else4
5890 ; RV64ZVE32F-NEXT: andi a0, a7, 8
5891 ; RV64ZVE32F-NEXT: bnez a0, .LBB51_13
5892 ; RV64ZVE32F-NEXT: .LBB51_4: # %else6
5893 ; RV64ZVE32F-NEXT: andi a0, a7, 16
5894 ; RV64ZVE32F-NEXT: bnez a0, .LBB51_14
5895 ; RV64ZVE32F-NEXT: .LBB51_5: # %else8
5896 ; RV64ZVE32F-NEXT: andi a0, a7, 32
5897 ; RV64ZVE32F-NEXT: bnez a0, .LBB51_15
5898 ; RV64ZVE32F-NEXT: .LBB51_6: # %else10
5899 ; RV64ZVE32F-NEXT: andi a0, a7, 64
5900 ; RV64ZVE32F-NEXT: bnez a0, .LBB51_16
5901 ; RV64ZVE32F-NEXT: .LBB51_7: # %else12
5902 ; RV64ZVE32F-NEXT: andi a0, a7, -128
5903 ; RV64ZVE32F-NEXT: beqz a0, .LBB51_9
5904 ; RV64ZVE32F-NEXT: .LBB51_8: # %cond.store13
5905 ; RV64ZVE32F-NEXT: slli a6, a6, 3
5906 ; RV64ZVE32F-NEXT: add a1, a1, a6
5907 ; RV64ZVE32F-NEXT: sd a3, 0(a1)
5908 ; RV64ZVE32F-NEXT: .LBB51_9: # %else14
5909 ; RV64ZVE32F-NEXT: ld s0, 24(sp) # 8-byte Folded Reload
5910 ; RV64ZVE32F-NEXT: ld s1, 16(sp) # 8-byte Folded Reload
5911 ; RV64ZVE32F-NEXT: ld s2, 8(sp) # 8-byte Folded Reload
5912 ; RV64ZVE32F-NEXT: ld s3, 0(sp) # 8-byte Folded Reload
5913 ; RV64ZVE32F-NEXT: .cfi_restore s0
5914 ; RV64ZVE32F-NEXT: .cfi_restore s1
5915 ; RV64ZVE32F-NEXT: .cfi_restore s2
5916 ; RV64ZVE32F-NEXT: .cfi_restore s3
5917 ; RV64ZVE32F-NEXT: addi sp, sp, 32
5918 ; RV64ZVE32F-NEXT: .cfi_def_cfa_offset 0
5919 ; RV64ZVE32F-NEXT: ret
5920 ; RV64ZVE32F-NEXT: .LBB51_10: # %cond.store
5921 ; RV64ZVE32F-NEXT: .cfi_restore_state
5922 ; RV64ZVE32F-NEXT: ld a2, 0(a2)
5923 ; RV64ZVE32F-NEXT: ld a0, 0(a0)
5924 ; RV64ZVE32F-NEXT: slli a2, a2, 3
5925 ; RV64ZVE32F-NEXT: add a2, a1, a2
5926 ; RV64ZVE32F-NEXT: sd a0, 0(a2)
5927 ; RV64ZVE32F-NEXT: andi a0, a7, 2
5928 ; RV64ZVE32F-NEXT: beqz a0, .LBB51_2
5929 ; RV64ZVE32F-NEXT: .LBB51_11: # %cond.store1
5930 ; RV64ZVE32F-NEXT: slli s2, s2, 3
5931 ; RV64ZVE32F-NEXT: add s2, a1, s2
5932 ; RV64ZVE32F-NEXT: sd s0, 0(s2)
5933 ; RV64ZVE32F-NEXT: andi a0, a7, 4
5934 ; RV64ZVE32F-NEXT: beqz a0, .LBB51_3
5935 ; RV64ZVE32F-NEXT: .LBB51_12: # %cond.store3
5936 ; RV64ZVE32F-NEXT: slli s1, s1, 3
5937 ; RV64ZVE32F-NEXT: add s1, a1, s1
5938 ; RV64ZVE32F-NEXT: sd t5, 0(s1)
5939 ; RV64ZVE32F-NEXT: andi a0, a7, 8
5940 ; RV64ZVE32F-NEXT: beqz a0, .LBB51_4
5941 ; RV64ZVE32F-NEXT: .LBB51_13: # %cond.store5
5942 ; RV64ZVE32F-NEXT: slli t6, t6, 3
5943 ; RV64ZVE32F-NEXT: add t6, a1, t6
5944 ; RV64ZVE32F-NEXT: sd t3, 0(t6)
5945 ; RV64ZVE32F-NEXT: andi a0, a7, 16
5946 ; RV64ZVE32F-NEXT: beqz a0, .LBB51_5
5947 ; RV64ZVE32F-NEXT: .LBB51_14: # %cond.store7
5948 ; RV64ZVE32F-NEXT: slli t4, t4, 3
5949 ; RV64ZVE32F-NEXT: add t4, a1, t4
5950 ; RV64ZVE32F-NEXT: sd t1, 0(t4)
5951 ; RV64ZVE32F-NEXT: andi a0, a7, 32
5952 ; RV64ZVE32F-NEXT: beqz a0, .LBB51_6
5953 ; RV64ZVE32F-NEXT: .LBB51_15: # %cond.store9
5954 ; RV64ZVE32F-NEXT: slli t2, t2, 3
5955 ; RV64ZVE32F-NEXT: add t2, a1, t2
5956 ; RV64ZVE32F-NEXT: sd a5, 0(t2)
5957 ; RV64ZVE32F-NEXT: andi a0, a7, 64
5958 ; RV64ZVE32F-NEXT: beqz a0, .LBB51_7
5959 ; RV64ZVE32F-NEXT: .LBB51_16: # %cond.store11
5960 ; RV64ZVE32F-NEXT: slli t0, t0, 3
5961 ; RV64ZVE32F-NEXT: add t0, a1, t0
5962 ; RV64ZVE32F-NEXT: sd a4, 0(t0)
5963 ; RV64ZVE32F-NEXT: andi a0, a7, -128
5964 ; RV64ZVE32F-NEXT: bnez a0, .LBB51_8
5965 ; RV64ZVE32F-NEXT: j .LBB51_9
5966 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %idxs
5967 call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m)
5971 declare void @llvm.masked.scatter.v1bf16.v1p0(<1 x bfloat>, <1 x ptr>, i32, <1 x i1>)
5973 define void @mscatter_v1bf16(<1 x bfloat> %val, <1 x ptr> %ptrs, <1 x i1> %m) {
5974 ; RV32V-LABEL: mscatter_v1bf16:
5976 ; RV32V-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
5977 ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t
5980 ; RV64V-LABEL: mscatter_v1bf16:
5982 ; RV64V-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
5983 ; RV64V-NEXT: vsoxei64.v v8, (zero), v9, v0.t
5986 ; RV32ZVE32F-LABEL: mscatter_v1bf16:
5987 ; RV32ZVE32F: # %bb.0:
5988 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
5989 ; RV32ZVE32F-NEXT: vsoxei32.v v8, (zero), v9, v0.t
5990 ; RV32ZVE32F-NEXT: ret
5992 ; RV64ZVE32F-LABEL: mscatter_v1bf16:
5993 ; RV64ZVE32F: # %bb.0:
5994 ; RV64ZVE32F-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
5995 ; RV64ZVE32F-NEXT: vfirst.m a1, v0
5996 ; RV64ZVE32F-NEXT: bnez a1, .LBB52_2
5997 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
5998 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
5999 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8
6000 ; RV64ZVE32F-NEXT: fmv.h.x fa5, a1
6001 ; RV64ZVE32F-NEXT: fsh fa5, 0(a0)
6002 ; RV64ZVE32F-NEXT: .LBB52_2: # %else
6003 ; RV64ZVE32F-NEXT: ret
6004 call void @llvm.masked.scatter.v1bf16.v1p0(<1 x bfloat> %val, <1 x ptr> %ptrs, i32 2, <1 x i1> %m)
6008 declare void @llvm.masked.scatter.v2bf16.v2p0(<2 x bfloat>, <2 x ptr>, i32, <2 x i1>)
6010 define void @mscatter_v2bf16(<2 x bfloat> %val, <2 x ptr> %ptrs, <2 x i1> %m) {
6011 ; RV32V-LABEL: mscatter_v2bf16:
6013 ; RV32V-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
6014 ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t
6017 ; RV64V-LABEL: mscatter_v2bf16:
6019 ; RV64V-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
6020 ; RV64V-NEXT: vsoxei64.v v8, (zero), v9, v0.t
6023 ; RV32ZVE32F-LABEL: mscatter_v2bf16:
6024 ; RV32ZVE32F: # %bb.0:
6025 ; RV32ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
6026 ; RV32ZVE32F-NEXT: vsoxei32.v v8, (zero), v9, v0.t
6027 ; RV32ZVE32F-NEXT: ret
6029 ; RV64ZVE32F-LABEL: mscatter_v2bf16:
6030 ; RV64ZVE32F: # %bb.0:
6031 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
6032 ; RV64ZVE32F-NEXT: vmv.x.s a2, v0
6033 ; RV64ZVE32F-NEXT: andi a3, a2, 1
6034 ; RV64ZVE32F-NEXT: bnez a3, .LBB53_3
6035 ; RV64ZVE32F-NEXT: # %bb.1: # %else
6036 ; RV64ZVE32F-NEXT: andi a2, a2, 2
6037 ; RV64ZVE32F-NEXT: bnez a2, .LBB53_4
6038 ; RV64ZVE32F-NEXT: .LBB53_2: # %else2
6039 ; RV64ZVE32F-NEXT: ret
6040 ; RV64ZVE32F-NEXT: .LBB53_3: # %cond.store
6041 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
6042 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
6043 ; RV64ZVE32F-NEXT: fmv.h.x fa5, a3
6044 ; RV64ZVE32F-NEXT: fsh fa5, 0(a0)
6045 ; RV64ZVE32F-NEXT: andi a2, a2, 2
6046 ; RV64ZVE32F-NEXT: beqz a2, .LBB53_2
6047 ; RV64ZVE32F-NEXT: .LBB53_4: # %cond.store1
6048 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
6049 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
6050 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
6051 ; RV64ZVE32F-NEXT: fmv.h.x fa5, a0
6052 ; RV64ZVE32F-NEXT: fsh fa5, 0(a1)
6053 ; RV64ZVE32F-NEXT: ret
6054 call void @llvm.masked.scatter.v2bf16.v2p0(<2 x bfloat> %val, <2 x ptr> %ptrs, i32 2, <2 x i1> %m)
6058 declare void @llvm.masked.scatter.v4bf16.v4p0(<4 x bfloat>, <4 x ptr>, i32, <4 x i1>)
6060 define void @mscatter_v4bf16(<4 x bfloat> %val, <4 x ptr> %ptrs, <4 x i1> %m) {
6061 ; RV32-LABEL: mscatter_v4bf16:
6063 ; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
6064 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
6067 ; RV64V-LABEL: mscatter_v4bf16:
6069 ; RV64V-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
6070 ; RV64V-NEXT: vsoxei64.v v8, (zero), v10, v0.t
6073 ; RV64ZVE32F-LABEL: mscatter_v4bf16:
6074 ; RV64ZVE32F: # %bb.0:
6075 ; RV64ZVE32F-NEXT: ld a4, 8(a0)
6076 ; RV64ZVE32F-NEXT: ld a2, 16(a0)
6077 ; RV64ZVE32F-NEXT: ld a1, 24(a0)
6078 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
6079 ; RV64ZVE32F-NEXT: vmv.x.s a3, v0
6080 ; RV64ZVE32F-NEXT: andi a5, a3, 1
6081 ; RV64ZVE32F-NEXT: bnez a5, .LBB54_5
6082 ; RV64ZVE32F-NEXT: # %bb.1: # %else
6083 ; RV64ZVE32F-NEXT: andi a0, a3, 2
6084 ; RV64ZVE32F-NEXT: bnez a0, .LBB54_6
6085 ; RV64ZVE32F-NEXT: .LBB54_2: # %else2
6086 ; RV64ZVE32F-NEXT: andi a0, a3, 4
6087 ; RV64ZVE32F-NEXT: bnez a0, .LBB54_7
6088 ; RV64ZVE32F-NEXT: .LBB54_3: # %else4
6089 ; RV64ZVE32F-NEXT: andi a3, a3, 8
6090 ; RV64ZVE32F-NEXT: bnez a3, .LBB54_8
6091 ; RV64ZVE32F-NEXT: .LBB54_4: # %else6
6092 ; RV64ZVE32F-NEXT: ret
6093 ; RV64ZVE32F-NEXT: .LBB54_5: # %cond.store
6094 ; RV64ZVE32F-NEXT: ld a0, 0(a0)
6095 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
6096 ; RV64ZVE32F-NEXT: vmv.x.s a5, v8
6097 ; RV64ZVE32F-NEXT: fmv.h.x fa5, a5
6098 ; RV64ZVE32F-NEXT: fsh fa5, 0(a0)
6099 ; RV64ZVE32F-NEXT: andi a0, a3, 2
6100 ; RV64ZVE32F-NEXT: beqz a0, .LBB54_2
6101 ; RV64ZVE32F-NEXT: .LBB54_6: # %cond.store1
6102 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
6103 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
6104 ; RV64ZVE32F-NEXT: vmv.x.s a0, v9
6105 ; RV64ZVE32F-NEXT: fmv.h.x fa5, a0
6106 ; RV64ZVE32F-NEXT: fsh fa5, 0(a4)
6107 ; RV64ZVE32F-NEXT: andi a0, a3, 4
6108 ; RV64ZVE32F-NEXT: beqz a0, .LBB54_3
6109 ; RV64ZVE32F-NEXT: .LBB54_7: # %cond.store3
6110 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
6111 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
6112 ; RV64ZVE32F-NEXT: vmv.x.s a0, v9
6113 ; RV64ZVE32F-NEXT: fmv.h.x fa5, a0
6114 ; RV64ZVE32F-NEXT: fsh fa5, 0(a2)
6115 ; RV64ZVE32F-NEXT: andi a3, a3, 8
6116 ; RV64ZVE32F-NEXT: beqz a3, .LBB54_4
6117 ; RV64ZVE32F-NEXT: .LBB54_8: # %cond.store5
6118 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
6119 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 3
6120 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
6121 ; RV64ZVE32F-NEXT: fmv.h.x fa5, a0
6122 ; RV64ZVE32F-NEXT: fsh fa5, 0(a1)
6123 ; RV64ZVE32F-NEXT: ret
6124 call void @llvm.masked.scatter.v4bf16.v4p0(<4 x bfloat> %val, <4 x ptr> %ptrs, i32 2, <4 x i1> %m)
6128 define void @mscatter_truemask_v4bf16(<4 x bfloat> %val, <4 x ptr> %ptrs) {
6129 ; RV32-LABEL: mscatter_truemask_v4bf16:
6131 ; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
6132 ; RV32-NEXT: vsoxei32.v v8, (zero), v9
6135 ; RV64V-LABEL: mscatter_truemask_v4bf16:
6137 ; RV64V-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
6138 ; RV64V-NEXT: vsoxei64.v v8, (zero), v10
6141 ; RV64ZVE32F-LABEL: mscatter_truemask_v4bf16:
6142 ; RV64ZVE32F: # %bb.0:
6143 ; RV64ZVE32F-NEXT: ld a1, 0(a0)
6144 ; RV64ZVE32F-NEXT: ld a2, 8(a0)
6145 ; RV64ZVE32F-NEXT: ld a3, 16(a0)
6146 ; RV64ZVE32F-NEXT: ld a0, 24(a0)
6147 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
6148 ; RV64ZVE32F-NEXT: vmv.x.s a4, v8
6149 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
6150 ; RV64ZVE32F-NEXT: fmv.h.x fa5, a4
6151 ; RV64ZVE32F-NEXT: vmv.x.s a4, v9
6152 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
6153 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 3
6154 ; RV64ZVE32F-NEXT: fsh fa5, 0(a1)
6155 ; RV64ZVE32F-NEXT: vmv.x.s a1, v9
6156 ; RV64ZVE32F-NEXT: fmv.h.x fa5, a4
6157 ; RV64ZVE32F-NEXT: vmv.x.s a4, v8
6158 ; RV64ZVE32F-NEXT: fsh fa5, 0(a2)
6159 ; RV64ZVE32F-NEXT: fmv.h.x fa5, a1
6160 ; RV64ZVE32F-NEXT: fsh fa5, 0(a3)
6161 ; RV64ZVE32F-NEXT: fmv.h.x fa5, a4
6162 ; RV64ZVE32F-NEXT: fsh fa5, 0(a0)
6163 ; RV64ZVE32F-NEXT: ret
6164 call void @llvm.masked.scatter.v4bf16.v4p0(<4 x bfloat> %val, <4 x ptr> %ptrs, i32 2, <4 x i1> splat (i1 1))
6168 define void @mscatter_falsemask_v4bf16(<4 x bfloat> %val, <4 x ptr> %ptrs) {
6169 ; CHECK-LABEL: mscatter_falsemask_v4bf16:
6172 call void @llvm.masked.scatter.v4bf16.v4p0(<4 x bfloat> %val, <4 x ptr> %ptrs, i32 2, <4 x i1> zeroinitializer)
6176 declare void @llvm.masked.scatter.v8bf16.v8p0(<8 x bfloat>, <8 x ptr>, i32, <8 x i1>)
6178 define void @mscatter_v8bf16(<8 x bfloat> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
6179 ; RV32-LABEL: mscatter_v8bf16:
6181 ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
6182 ; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t
6185 ; RV64V-LABEL: mscatter_v8bf16:
6187 ; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
6188 ; RV64V-NEXT: vsoxei64.v v8, (zero), v12, v0.t
6191 ; RV64ZVE32F-LABEL: mscatter_v8bf16:
6192 ; RV64ZVE32F: # %bb.0:
6193 ; RV64ZVE32F-NEXT: ld a3, 40(a0)
6194 ; RV64ZVE32F-NEXT: ld a2, 48(a0)
6195 ; RV64ZVE32F-NEXT: ld a1, 56(a0)
6196 ; RV64ZVE32F-NEXT: ld t0, 8(a0)
6197 ; RV64ZVE32F-NEXT: ld a7, 16(a0)
6198 ; RV64ZVE32F-NEXT: ld a6, 24(a0)
6199 ; RV64ZVE32F-NEXT: ld a5, 32(a0)
6200 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
6201 ; RV64ZVE32F-NEXT: vmv.x.s a4, v0
6202 ; RV64ZVE32F-NEXT: andi t1, a4, 1
6203 ; RV64ZVE32F-NEXT: bnez t1, .LBB57_9
6204 ; RV64ZVE32F-NEXT: # %bb.1: # %else
6205 ; RV64ZVE32F-NEXT: andi a0, a4, 2
6206 ; RV64ZVE32F-NEXT: bnez a0, .LBB57_10
6207 ; RV64ZVE32F-NEXT: .LBB57_2: # %else2
6208 ; RV64ZVE32F-NEXT: andi a0, a4, 4
6209 ; RV64ZVE32F-NEXT: bnez a0, .LBB57_11
6210 ; RV64ZVE32F-NEXT: .LBB57_3: # %else4
6211 ; RV64ZVE32F-NEXT: andi a0, a4, 8
6212 ; RV64ZVE32F-NEXT: bnez a0, .LBB57_12
6213 ; RV64ZVE32F-NEXT: .LBB57_4: # %else6
6214 ; RV64ZVE32F-NEXT: andi a0, a4, 16
6215 ; RV64ZVE32F-NEXT: bnez a0, .LBB57_13
6216 ; RV64ZVE32F-NEXT: .LBB57_5: # %else8
6217 ; RV64ZVE32F-NEXT: andi a0, a4, 32
6218 ; RV64ZVE32F-NEXT: bnez a0, .LBB57_14
6219 ; RV64ZVE32F-NEXT: .LBB57_6: # %else10
6220 ; RV64ZVE32F-NEXT: andi a0, a4, 64
6221 ; RV64ZVE32F-NEXT: bnez a0, .LBB57_15
6222 ; RV64ZVE32F-NEXT: .LBB57_7: # %else12
6223 ; RV64ZVE32F-NEXT: andi a0, a4, -128
6224 ; RV64ZVE32F-NEXT: bnez a0, .LBB57_16
6225 ; RV64ZVE32F-NEXT: .LBB57_8: # %else14
6226 ; RV64ZVE32F-NEXT: ret
6227 ; RV64ZVE32F-NEXT: .LBB57_9: # %cond.store
6228 ; RV64ZVE32F-NEXT: ld a0, 0(a0)
6229 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
6230 ; RV64ZVE32F-NEXT: vmv.x.s t1, v8
6231 ; RV64ZVE32F-NEXT: fmv.h.x fa5, t1
6232 ; RV64ZVE32F-NEXT: fsh fa5, 0(a0)
6233 ; RV64ZVE32F-NEXT: andi a0, a4, 2
6234 ; RV64ZVE32F-NEXT: beqz a0, .LBB57_2
6235 ; RV64ZVE32F-NEXT: .LBB57_10: # %cond.store1
6236 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6237 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
6238 ; RV64ZVE32F-NEXT: vmv.x.s a0, v9
6239 ; RV64ZVE32F-NEXT: fmv.h.x fa5, a0
6240 ; RV64ZVE32F-NEXT: fsh fa5, 0(t0)
6241 ; RV64ZVE32F-NEXT: andi a0, a4, 4
6242 ; RV64ZVE32F-NEXT: beqz a0, .LBB57_3
6243 ; RV64ZVE32F-NEXT: .LBB57_11: # %cond.store3
6244 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6245 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
6246 ; RV64ZVE32F-NEXT: vmv.x.s a0, v9
6247 ; RV64ZVE32F-NEXT: fmv.h.x fa5, a0
6248 ; RV64ZVE32F-NEXT: fsh fa5, 0(a7)
6249 ; RV64ZVE32F-NEXT: andi a0, a4, 8
6250 ; RV64ZVE32F-NEXT: beqz a0, .LBB57_4
6251 ; RV64ZVE32F-NEXT: .LBB57_12: # %cond.store5
6252 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6253 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 3
6254 ; RV64ZVE32F-NEXT: vmv.x.s a0, v9
6255 ; RV64ZVE32F-NEXT: fmv.h.x fa5, a0
6256 ; RV64ZVE32F-NEXT: fsh fa5, 0(a6)
6257 ; RV64ZVE32F-NEXT: andi a0, a4, 16
6258 ; RV64ZVE32F-NEXT: beqz a0, .LBB57_5
6259 ; RV64ZVE32F-NEXT: .LBB57_13: # %cond.store7
6260 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6261 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
6262 ; RV64ZVE32F-NEXT: vmv.x.s a0, v9
6263 ; RV64ZVE32F-NEXT: fmv.h.x fa5, a0
6264 ; RV64ZVE32F-NEXT: fsh fa5, 0(a5)
6265 ; RV64ZVE32F-NEXT: andi a0, a4, 32
6266 ; RV64ZVE32F-NEXT: beqz a0, .LBB57_6
6267 ; RV64ZVE32F-NEXT: .LBB57_14: # %cond.store9
6268 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6269 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5
6270 ; RV64ZVE32F-NEXT: vmv.x.s a0, v9
6271 ; RV64ZVE32F-NEXT: fmv.h.x fa5, a0
6272 ; RV64ZVE32F-NEXT: fsh fa5, 0(a3)
6273 ; RV64ZVE32F-NEXT: andi a0, a4, 64
6274 ; RV64ZVE32F-NEXT: beqz a0, .LBB57_7
6275 ; RV64ZVE32F-NEXT: .LBB57_15: # %cond.store11
6276 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6277 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 6
6278 ; RV64ZVE32F-NEXT: vmv.x.s a0, v9
6279 ; RV64ZVE32F-NEXT: fmv.h.x fa5, a0
6280 ; RV64ZVE32F-NEXT: fsh fa5, 0(a2)
6281 ; RV64ZVE32F-NEXT: andi a0, a4, -128
6282 ; RV64ZVE32F-NEXT: beqz a0, .LBB57_8
6283 ; RV64ZVE32F-NEXT: .LBB57_16: # %cond.store13
6284 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6285 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
6286 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
6287 ; RV64ZVE32F-NEXT: fmv.h.x fa5, a0
6288 ; RV64ZVE32F-NEXT: fsh fa5, 0(a1)
6289 ; RV64ZVE32F-NEXT: ret
6290 call void @llvm.masked.scatter.v8bf16.v8p0(<8 x bfloat> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> %m)
6294 define void @mscatter_baseidx_v8i8_v8bf16(<8 x bfloat> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) {
6295 ; RV32-LABEL: mscatter_baseidx_v8i8_v8bf16:
6297 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
6298 ; RV32-NEXT: vsext.vf4 v10, v9
6299 ; RV32-NEXT: vadd.vv v10, v10, v10
6300 ; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma
6301 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
6304 ; RV64V-LABEL: mscatter_baseidx_v8i8_v8bf16:
6306 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
6307 ; RV64V-NEXT: vsext.vf8 v12, v9
6308 ; RV64V-NEXT: vadd.vv v12, v12, v12
6309 ; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, ma
6310 ; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t
6313 ; RV64ZVE32F-LABEL: mscatter_baseidx_v8i8_v8bf16:
6314 ; RV64ZVE32F: # %bb.0:
6315 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
6316 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
6317 ; RV64ZVE32F-NEXT: andi a2, a1, 1
6318 ; RV64ZVE32F-NEXT: beqz a2, .LBB58_2
6319 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
6320 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
6321 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
6322 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
6323 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6324 ; RV64ZVE32F-NEXT: add a2, a0, a2
6325 ; RV64ZVE32F-NEXT: fmv.h.x fa5, a3
6326 ; RV64ZVE32F-NEXT: fsh fa5, 0(a2)
6327 ; RV64ZVE32F-NEXT: .LBB58_2: # %else
6328 ; RV64ZVE32F-NEXT: andi a2, a1, 2
6329 ; RV64ZVE32F-NEXT: beqz a2, .LBB58_4
6330 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
6331 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
6332 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1
6333 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
6334 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6335 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
6336 ; RV64ZVE32F-NEXT: vmv.x.s a3, v10
6337 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6338 ; RV64ZVE32F-NEXT: add a2, a0, a2
6339 ; RV64ZVE32F-NEXT: fmv.h.x fa5, a3
6340 ; RV64ZVE32F-NEXT: fsh fa5, 0(a2)
6341 ; RV64ZVE32F-NEXT: .LBB58_4: # %else2
6342 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
6343 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4
6344 ; RV64ZVE32F-NEXT: andi a2, a1, 4
6345 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
6346 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2
6347 ; RV64ZVE32F-NEXT: bnez a2, .LBB58_12
6348 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
6349 ; RV64ZVE32F-NEXT: andi a2, a1, 8
6350 ; RV64ZVE32F-NEXT: bnez a2, .LBB58_13
6351 ; RV64ZVE32F-NEXT: .LBB58_6: # %else6
6352 ; RV64ZVE32F-NEXT: andi a2, a1, 16
6353 ; RV64ZVE32F-NEXT: bnez a2, .LBB58_14
6354 ; RV64ZVE32F-NEXT: .LBB58_7: # %else8
6355 ; RV64ZVE32F-NEXT: andi a2, a1, 32
6356 ; RV64ZVE32F-NEXT: beqz a2, .LBB58_9
6357 ; RV64ZVE32F-NEXT: .LBB58_8: # %cond.store9
6358 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
6359 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 1
6360 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
6361 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6362 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5
6363 ; RV64ZVE32F-NEXT: vmv.x.s a3, v9
6364 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6365 ; RV64ZVE32F-NEXT: add a2, a0, a2
6366 ; RV64ZVE32F-NEXT: fmv.h.x fa5, a3
6367 ; RV64ZVE32F-NEXT: fsh fa5, 0(a2)
6368 ; RV64ZVE32F-NEXT: .LBB58_9: # %else10
6369 ; RV64ZVE32F-NEXT: andi a2, a1, 64
6370 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
6371 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 2
6372 ; RV64ZVE32F-NEXT: bnez a2, .LBB58_15
6373 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
6374 ; RV64ZVE32F-NEXT: andi a1, a1, -128
6375 ; RV64ZVE32F-NEXT: bnez a1, .LBB58_16
6376 ; RV64ZVE32F-NEXT: .LBB58_11: # %else14
6377 ; RV64ZVE32F-NEXT: ret
6378 ; RV64ZVE32F-NEXT: .LBB58_12: # %cond.store3
6379 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
6380 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6381 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2
6382 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6383 ; RV64ZVE32F-NEXT: vmv.x.s a3, v11
6384 ; RV64ZVE32F-NEXT: add a2, a0, a2
6385 ; RV64ZVE32F-NEXT: fmv.h.x fa5, a3
6386 ; RV64ZVE32F-NEXT: fsh fa5, 0(a2)
6387 ; RV64ZVE32F-NEXT: andi a2, a1, 8
6388 ; RV64ZVE32F-NEXT: beqz a2, .LBB58_6
6389 ; RV64ZVE32F-NEXT: .LBB58_13: # %cond.store5
6390 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
6391 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
6392 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
6393 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6394 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 3
6395 ; RV64ZVE32F-NEXT: vmv.x.s a3, v9
6396 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6397 ; RV64ZVE32F-NEXT: add a2, a0, a2
6398 ; RV64ZVE32F-NEXT: fmv.h.x fa5, a3
6399 ; RV64ZVE32F-NEXT: fsh fa5, 0(a2)
6400 ; RV64ZVE32F-NEXT: andi a2, a1, 16
6401 ; RV64ZVE32F-NEXT: beqz a2, .LBB58_7
6402 ; RV64ZVE32F-NEXT: .LBB58_14: # %cond.store7
6403 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
6404 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
6405 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6406 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
6407 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6408 ; RV64ZVE32F-NEXT: vmv.x.s a3, v9
6409 ; RV64ZVE32F-NEXT: add a2, a0, a2
6410 ; RV64ZVE32F-NEXT: fmv.h.x fa5, a3
6411 ; RV64ZVE32F-NEXT: fsh fa5, 0(a2)
6412 ; RV64ZVE32F-NEXT: andi a2, a1, 32
6413 ; RV64ZVE32F-NEXT: bnez a2, .LBB58_8
6414 ; RV64ZVE32F-NEXT: j .LBB58_9
6415 ; RV64ZVE32F-NEXT: .LBB58_15: # %cond.store11
6416 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
6417 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6418 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 6
6419 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6420 ; RV64ZVE32F-NEXT: vmv.x.s a3, v10
6421 ; RV64ZVE32F-NEXT: add a2, a0, a2
6422 ; RV64ZVE32F-NEXT: fmv.h.x fa5, a3
6423 ; RV64ZVE32F-NEXT: fsh fa5, 0(a2)
6424 ; RV64ZVE32F-NEXT: andi a1, a1, -128
6425 ; RV64ZVE32F-NEXT: beqz a1, .LBB58_11
6426 ; RV64ZVE32F-NEXT: .LBB58_16: # %cond.store13
6427 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
6428 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
6429 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6430 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
6431 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
6432 ; RV64ZVE32F-NEXT: vmv.x.s a1, v9
6433 ; RV64ZVE32F-NEXT: slli a1, a1, 1
6434 ; RV64ZVE32F-NEXT: add a0, a0, a1
6435 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma
6436 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8
6437 ; RV64ZVE32F-NEXT: fmv.h.x fa5, a1
6438 ; RV64ZVE32F-NEXT: fsh fa5, 0(a0)
6439 ; RV64ZVE32F-NEXT: ret
6440 %ptrs = getelementptr inbounds bfloat, ptr %base, <8 x i8> %idxs
6441 call void @llvm.masked.scatter.v8bf16.v8p0(<8 x bfloat> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> %m)
6445 define void @mscatter_baseidx_sext_v8i8_v8bf16(<8 x bfloat> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) {
6446 ; RV32-LABEL: mscatter_baseidx_sext_v8i8_v8bf16:
6448 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
6449 ; RV32-NEXT: vsext.vf4 v10, v9
6450 ; RV32-NEXT: vadd.vv v10, v10, v10
6451 ; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma
6452 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
6455 ; RV64V-LABEL: mscatter_baseidx_sext_v8i8_v8bf16:
6457 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
6458 ; RV64V-NEXT: vsext.vf8 v12, v9
6459 ; RV64V-NEXT: vadd.vv v12, v12, v12
6460 ; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, ma
6461 ; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t
6464 ; RV64ZVE32F-LABEL: mscatter_baseidx_sext_v8i8_v8bf16:
6465 ; RV64ZVE32F: # %bb.0:
6466 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
6467 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
6468 ; RV64ZVE32F-NEXT: andi a2, a1, 1
6469 ; RV64ZVE32F-NEXT: beqz a2, .LBB59_2
6470 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
6471 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
6472 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
6473 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
6474 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6475 ; RV64ZVE32F-NEXT: add a2, a0, a2
6476 ; RV64ZVE32F-NEXT: fmv.h.x fa5, a3
6477 ; RV64ZVE32F-NEXT: fsh fa5, 0(a2)
6478 ; RV64ZVE32F-NEXT: .LBB59_2: # %else
6479 ; RV64ZVE32F-NEXT: andi a2, a1, 2
6480 ; RV64ZVE32F-NEXT: beqz a2, .LBB59_4
6481 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
6482 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
6483 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1
6484 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
6485 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6486 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
6487 ; RV64ZVE32F-NEXT: vmv.x.s a3, v10
6488 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6489 ; RV64ZVE32F-NEXT: add a2, a0, a2
6490 ; RV64ZVE32F-NEXT: fmv.h.x fa5, a3
6491 ; RV64ZVE32F-NEXT: fsh fa5, 0(a2)
6492 ; RV64ZVE32F-NEXT: .LBB59_4: # %else2
6493 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
6494 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4
6495 ; RV64ZVE32F-NEXT: andi a2, a1, 4
6496 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
6497 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2
6498 ; RV64ZVE32F-NEXT: bnez a2, .LBB59_12
6499 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
6500 ; RV64ZVE32F-NEXT: andi a2, a1, 8
6501 ; RV64ZVE32F-NEXT: bnez a2, .LBB59_13
6502 ; RV64ZVE32F-NEXT: .LBB59_6: # %else6
6503 ; RV64ZVE32F-NEXT: andi a2, a1, 16
6504 ; RV64ZVE32F-NEXT: bnez a2, .LBB59_14
6505 ; RV64ZVE32F-NEXT: .LBB59_7: # %else8
6506 ; RV64ZVE32F-NEXT: andi a2, a1, 32
6507 ; RV64ZVE32F-NEXT: beqz a2, .LBB59_9
6508 ; RV64ZVE32F-NEXT: .LBB59_8: # %cond.store9
6509 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
6510 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 1
6511 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
6512 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6513 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5
6514 ; RV64ZVE32F-NEXT: vmv.x.s a3, v9
6515 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6516 ; RV64ZVE32F-NEXT: add a2, a0, a2
6517 ; RV64ZVE32F-NEXT: fmv.h.x fa5, a3
6518 ; RV64ZVE32F-NEXT: fsh fa5, 0(a2)
6519 ; RV64ZVE32F-NEXT: .LBB59_9: # %else10
6520 ; RV64ZVE32F-NEXT: andi a2, a1, 64
6521 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
6522 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 2
6523 ; RV64ZVE32F-NEXT: bnez a2, .LBB59_15
6524 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
6525 ; RV64ZVE32F-NEXT: andi a1, a1, -128
6526 ; RV64ZVE32F-NEXT: bnez a1, .LBB59_16
6527 ; RV64ZVE32F-NEXT: .LBB59_11: # %else14
6528 ; RV64ZVE32F-NEXT: ret
6529 ; RV64ZVE32F-NEXT: .LBB59_12: # %cond.store3
6530 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
6531 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6532 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2
6533 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6534 ; RV64ZVE32F-NEXT: vmv.x.s a3, v11
6535 ; RV64ZVE32F-NEXT: add a2, a0, a2
6536 ; RV64ZVE32F-NEXT: fmv.h.x fa5, a3
6537 ; RV64ZVE32F-NEXT: fsh fa5, 0(a2)
6538 ; RV64ZVE32F-NEXT: andi a2, a1, 8
6539 ; RV64ZVE32F-NEXT: beqz a2, .LBB59_6
6540 ; RV64ZVE32F-NEXT: .LBB59_13: # %cond.store5
6541 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
6542 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
6543 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
6544 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6545 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 3
6546 ; RV64ZVE32F-NEXT: vmv.x.s a3, v9
6547 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6548 ; RV64ZVE32F-NEXT: add a2, a0, a2
6549 ; RV64ZVE32F-NEXT: fmv.h.x fa5, a3
6550 ; RV64ZVE32F-NEXT: fsh fa5, 0(a2)
6551 ; RV64ZVE32F-NEXT: andi a2, a1, 16
6552 ; RV64ZVE32F-NEXT: beqz a2, .LBB59_7
6553 ; RV64ZVE32F-NEXT: .LBB59_14: # %cond.store7
6554 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
6555 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
6556 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6557 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
6558 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6559 ; RV64ZVE32F-NEXT: vmv.x.s a3, v9
6560 ; RV64ZVE32F-NEXT: add a2, a0, a2
6561 ; RV64ZVE32F-NEXT: fmv.h.x fa5, a3
6562 ; RV64ZVE32F-NEXT: fsh fa5, 0(a2)
6563 ; RV64ZVE32F-NEXT: andi a2, a1, 32
6564 ; RV64ZVE32F-NEXT: bnez a2, .LBB59_8
6565 ; RV64ZVE32F-NEXT: j .LBB59_9
6566 ; RV64ZVE32F-NEXT: .LBB59_15: # %cond.store11
6567 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
6568 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6569 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 6
6570 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6571 ; RV64ZVE32F-NEXT: vmv.x.s a3, v10
6572 ; RV64ZVE32F-NEXT: add a2, a0, a2
6573 ; RV64ZVE32F-NEXT: fmv.h.x fa5, a3
6574 ; RV64ZVE32F-NEXT: fsh fa5, 0(a2)
6575 ; RV64ZVE32F-NEXT: andi a1, a1, -128
6576 ; RV64ZVE32F-NEXT: beqz a1, .LBB59_11
6577 ; RV64ZVE32F-NEXT: .LBB59_16: # %cond.store13
6578 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
6579 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
6580 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6581 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
6582 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
6583 ; RV64ZVE32F-NEXT: vmv.x.s a1, v9
6584 ; RV64ZVE32F-NEXT: slli a1, a1, 1
6585 ; RV64ZVE32F-NEXT: add a0, a0, a1
6586 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma
6587 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8
6588 ; RV64ZVE32F-NEXT: fmv.h.x fa5, a1
6589 ; RV64ZVE32F-NEXT: fsh fa5, 0(a0)
6590 ; RV64ZVE32F-NEXT: ret
6591 %eidxs = sext <8 x i8> %idxs to <8 x i16>
6592 %ptrs = getelementptr inbounds bfloat, ptr %base, <8 x i16> %eidxs
6593 call void @llvm.masked.scatter.v8bf16.v8p0(<8 x bfloat> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> %m)
6597 define void @mscatter_baseidx_zext_v8i8_v8bf16(<8 x bfloat> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) {
6598 ; RV32-LABEL: mscatter_baseidx_zext_v8i8_v8bf16:
6600 ; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
6601 ; RV32-NEXT: vwaddu.vv v10, v9, v9
6602 ; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma
6603 ; RV32-NEXT: vsoxei16.v v8, (a0), v10, v0.t
6606 ; RV64V-LABEL: mscatter_baseidx_zext_v8i8_v8bf16:
6608 ; RV64V-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
6609 ; RV64V-NEXT: vwaddu.vv v10, v9, v9
6610 ; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, ma
6611 ; RV64V-NEXT: vsoxei16.v v8, (a0), v10, v0.t
6614 ; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i8_v8bf16:
6615 ; RV64ZVE32F: # %bb.0:
6616 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
6617 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
6618 ; RV64ZVE32F-NEXT: andi a2, a1, 1
6619 ; RV64ZVE32F-NEXT: beqz a2, .LBB60_2
6620 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
6621 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
6622 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
6623 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
6624 ; RV64ZVE32F-NEXT: andi a2, a2, 255
6625 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6626 ; RV64ZVE32F-NEXT: add a2, a0, a2
6627 ; RV64ZVE32F-NEXT: fmv.h.x fa5, a3
6628 ; RV64ZVE32F-NEXT: fsh fa5, 0(a2)
6629 ; RV64ZVE32F-NEXT: .LBB60_2: # %else
6630 ; RV64ZVE32F-NEXT: andi a2, a1, 2
6631 ; RV64ZVE32F-NEXT: beqz a2, .LBB60_4
6632 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
6633 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
6634 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1
6635 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
6636 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6637 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
6638 ; RV64ZVE32F-NEXT: vmv.x.s a3, v10
6639 ; RV64ZVE32F-NEXT: andi a2, a2, 255
6640 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6641 ; RV64ZVE32F-NEXT: add a2, a0, a2
6642 ; RV64ZVE32F-NEXT: fmv.h.x fa5, a3
6643 ; RV64ZVE32F-NEXT: fsh fa5, 0(a2)
6644 ; RV64ZVE32F-NEXT: .LBB60_4: # %else2
6645 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
6646 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4
6647 ; RV64ZVE32F-NEXT: andi a2, a1, 4
6648 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
6649 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2
6650 ; RV64ZVE32F-NEXT: bnez a2, .LBB60_12
6651 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
6652 ; RV64ZVE32F-NEXT: andi a2, a1, 8
6653 ; RV64ZVE32F-NEXT: bnez a2, .LBB60_13
6654 ; RV64ZVE32F-NEXT: .LBB60_6: # %else6
6655 ; RV64ZVE32F-NEXT: andi a2, a1, 16
6656 ; RV64ZVE32F-NEXT: bnez a2, .LBB60_14
6657 ; RV64ZVE32F-NEXT: .LBB60_7: # %else8
6658 ; RV64ZVE32F-NEXT: andi a2, a1, 32
6659 ; RV64ZVE32F-NEXT: beqz a2, .LBB60_9
6660 ; RV64ZVE32F-NEXT: .LBB60_8: # %cond.store9
6661 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
6662 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 1
6663 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
6664 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6665 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5
6666 ; RV64ZVE32F-NEXT: vmv.x.s a3, v9
6667 ; RV64ZVE32F-NEXT: andi a2, a2, 255
6668 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6669 ; RV64ZVE32F-NEXT: add a2, a0, a2
6670 ; RV64ZVE32F-NEXT: fmv.h.x fa5, a3
6671 ; RV64ZVE32F-NEXT: fsh fa5, 0(a2)
6672 ; RV64ZVE32F-NEXT: .LBB60_9: # %else10
6673 ; RV64ZVE32F-NEXT: andi a2, a1, 64
6674 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
6675 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 2
6676 ; RV64ZVE32F-NEXT: bnez a2, .LBB60_15
6677 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
6678 ; RV64ZVE32F-NEXT: andi a1, a1, -128
6679 ; RV64ZVE32F-NEXT: bnez a1, .LBB60_16
6680 ; RV64ZVE32F-NEXT: .LBB60_11: # %else14
6681 ; RV64ZVE32F-NEXT: ret
6682 ; RV64ZVE32F-NEXT: .LBB60_12: # %cond.store3
6683 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
6684 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6685 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2
6686 ; RV64ZVE32F-NEXT: andi a2, a2, 255
6687 ; RV64ZVE32F-NEXT: vmv.x.s a3, v11
6688 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6689 ; RV64ZVE32F-NEXT: add a2, a0, a2
6690 ; RV64ZVE32F-NEXT: fmv.h.x fa5, a3
6691 ; RV64ZVE32F-NEXT: fsh fa5, 0(a2)
6692 ; RV64ZVE32F-NEXT: andi a2, a1, 8
6693 ; RV64ZVE32F-NEXT: beqz a2, .LBB60_6
6694 ; RV64ZVE32F-NEXT: .LBB60_13: # %cond.store5
6695 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
6696 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
6697 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
6698 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6699 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 3
6700 ; RV64ZVE32F-NEXT: vmv.x.s a3, v9
6701 ; RV64ZVE32F-NEXT: andi a2, a2, 255
6702 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6703 ; RV64ZVE32F-NEXT: add a2, a0, a2
6704 ; RV64ZVE32F-NEXT: fmv.h.x fa5, a3
6705 ; RV64ZVE32F-NEXT: fsh fa5, 0(a2)
6706 ; RV64ZVE32F-NEXT: andi a2, a1, 16
6707 ; RV64ZVE32F-NEXT: beqz a2, .LBB60_7
6708 ; RV64ZVE32F-NEXT: .LBB60_14: # %cond.store7
6709 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
6710 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
6711 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6712 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
6713 ; RV64ZVE32F-NEXT: andi a2, a2, 255
6714 ; RV64ZVE32F-NEXT: vmv.x.s a3, v9
6715 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6716 ; RV64ZVE32F-NEXT: add a2, a0, a2
6717 ; RV64ZVE32F-NEXT: fmv.h.x fa5, a3
6718 ; RV64ZVE32F-NEXT: fsh fa5, 0(a2)
6719 ; RV64ZVE32F-NEXT: andi a2, a1, 32
6720 ; RV64ZVE32F-NEXT: bnez a2, .LBB60_8
6721 ; RV64ZVE32F-NEXT: j .LBB60_9
6722 ; RV64ZVE32F-NEXT: .LBB60_15: # %cond.store11
6723 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
6724 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6725 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 6
6726 ; RV64ZVE32F-NEXT: andi a2, a2, 255
6727 ; RV64ZVE32F-NEXT: vmv.x.s a3, v10
6728 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6729 ; RV64ZVE32F-NEXT: add a2, a0, a2
6730 ; RV64ZVE32F-NEXT: fmv.h.x fa5, a3
6731 ; RV64ZVE32F-NEXT: fsh fa5, 0(a2)
6732 ; RV64ZVE32F-NEXT: andi a1, a1, -128
6733 ; RV64ZVE32F-NEXT: beqz a1, .LBB60_11
6734 ; RV64ZVE32F-NEXT: .LBB60_16: # %cond.store13
6735 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
6736 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
6737 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6738 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
6739 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
6740 ; RV64ZVE32F-NEXT: vmv.x.s a1, v9
6741 ; RV64ZVE32F-NEXT: andi a1, a1, 255
6742 ; RV64ZVE32F-NEXT: slli a1, a1, 1
6743 ; RV64ZVE32F-NEXT: add a0, a0, a1
6744 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma
6745 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8
6746 ; RV64ZVE32F-NEXT: fmv.h.x fa5, a1
6747 ; RV64ZVE32F-NEXT: fsh fa5, 0(a0)
6748 ; RV64ZVE32F-NEXT: ret
6749 %eidxs = zext <8 x i8> %idxs to <8 x i16>
6750 %ptrs = getelementptr inbounds bfloat, ptr %base, <8 x i16> %eidxs
6751 call void @llvm.masked.scatter.v8bf16.v8p0(<8 x bfloat> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> %m)
6755 define void @mscatter_baseidx_v8bf16(<8 x bfloat> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m) {
6756 ; RV32-LABEL: mscatter_baseidx_v8bf16:
6758 ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
6759 ; RV32-NEXT: vwadd.vv v10, v9, v9
6760 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
6763 ; RV64V-LABEL: mscatter_baseidx_v8bf16:
6765 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
6766 ; RV64V-NEXT: vsext.vf4 v12, v9
6767 ; RV64V-NEXT: vadd.vv v12, v12, v12
6768 ; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, ma
6769 ; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t
6772 ; RV64ZVE32F-LABEL: mscatter_baseidx_v8bf16:
6773 ; RV64ZVE32F: # %bb.0:
6774 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
6775 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
6776 ; RV64ZVE32F-NEXT: andi a2, a1, 1
6777 ; RV64ZVE32F-NEXT: beqz a2, .LBB61_2
6778 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
6779 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
6780 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
6781 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
6782 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6783 ; RV64ZVE32F-NEXT: add a2, a0, a2
6784 ; RV64ZVE32F-NEXT: fmv.h.x fa5, a3
6785 ; RV64ZVE32F-NEXT: fsh fa5, 0(a2)
6786 ; RV64ZVE32F-NEXT: .LBB61_2: # %else
6787 ; RV64ZVE32F-NEXT: andi a2, a1, 2
6788 ; RV64ZVE32F-NEXT: beqz a2, .LBB61_4
6789 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
6790 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
6791 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1
6792 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
6793 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6794 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
6795 ; RV64ZVE32F-NEXT: vmv.x.s a3, v10
6796 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6797 ; RV64ZVE32F-NEXT: add a2, a0, a2
6798 ; RV64ZVE32F-NEXT: fmv.h.x fa5, a3
6799 ; RV64ZVE32F-NEXT: fsh fa5, 0(a2)
6800 ; RV64ZVE32F-NEXT: .LBB61_4: # %else2
6801 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
6802 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4
6803 ; RV64ZVE32F-NEXT: andi a2, a1, 4
6804 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
6805 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2
6806 ; RV64ZVE32F-NEXT: bnez a2, .LBB61_12
6807 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
6808 ; RV64ZVE32F-NEXT: andi a2, a1, 8
6809 ; RV64ZVE32F-NEXT: bnez a2, .LBB61_13
6810 ; RV64ZVE32F-NEXT: .LBB61_6: # %else6
6811 ; RV64ZVE32F-NEXT: andi a2, a1, 16
6812 ; RV64ZVE32F-NEXT: bnez a2, .LBB61_14
6813 ; RV64ZVE32F-NEXT: .LBB61_7: # %else8
6814 ; RV64ZVE32F-NEXT: andi a2, a1, 32
6815 ; RV64ZVE32F-NEXT: beqz a2, .LBB61_9
6816 ; RV64ZVE32F-NEXT: .LBB61_8: # %cond.store9
6817 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
6818 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 1
6819 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
6820 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6821 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5
6822 ; RV64ZVE32F-NEXT: vmv.x.s a3, v9
6823 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6824 ; RV64ZVE32F-NEXT: add a2, a0, a2
6825 ; RV64ZVE32F-NEXT: fmv.h.x fa5, a3
6826 ; RV64ZVE32F-NEXT: fsh fa5, 0(a2)
6827 ; RV64ZVE32F-NEXT: .LBB61_9: # %else10
6828 ; RV64ZVE32F-NEXT: andi a2, a1, 64
6829 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
6830 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 2
6831 ; RV64ZVE32F-NEXT: bnez a2, .LBB61_15
6832 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
6833 ; RV64ZVE32F-NEXT: andi a1, a1, -128
6834 ; RV64ZVE32F-NEXT: bnez a1, .LBB61_16
6835 ; RV64ZVE32F-NEXT: .LBB61_11: # %else14
6836 ; RV64ZVE32F-NEXT: ret
6837 ; RV64ZVE32F-NEXT: .LBB61_12: # %cond.store3
6838 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
6839 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6840 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2
6841 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6842 ; RV64ZVE32F-NEXT: vmv.x.s a3, v11
6843 ; RV64ZVE32F-NEXT: add a2, a0, a2
6844 ; RV64ZVE32F-NEXT: fmv.h.x fa5, a3
6845 ; RV64ZVE32F-NEXT: fsh fa5, 0(a2)
6846 ; RV64ZVE32F-NEXT: andi a2, a1, 8
6847 ; RV64ZVE32F-NEXT: beqz a2, .LBB61_6
6848 ; RV64ZVE32F-NEXT: .LBB61_13: # %cond.store5
6849 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
6850 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
6851 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
6852 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6853 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 3
6854 ; RV64ZVE32F-NEXT: vmv.x.s a3, v9
6855 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6856 ; RV64ZVE32F-NEXT: add a2, a0, a2
6857 ; RV64ZVE32F-NEXT: fmv.h.x fa5, a3
6858 ; RV64ZVE32F-NEXT: fsh fa5, 0(a2)
6859 ; RV64ZVE32F-NEXT: andi a2, a1, 16
6860 ; RV64ZVE32F-NEXT: beqz a2, .LBB61_7
6861 ; RV64ZVE32F-NEXT: .LBB61_14: # %cond.store7
6862 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6863 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
6864 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
6865 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6866 ; RV64ZVE32F-NEXT: vmv.x.s a3, v9
6867 ; RV64ZVE32F-NEXT: add a2, a0, a2
6868 ; RV64ZVE32F-NEXT: fmv.h.x fa5, a3
6869 ; RV64ZVE32F-NEXT: fsh fa5, 0(a2)
6870 ; RV64ZVE32F-NEXT: andi a2, a1, 32
6871 ; RV64ZVE32F-NEXT: bnez a2, .LBB61_8
6872 ; RV64ZVE32F-NEXT: j .LBB61_9
6873 ; RV64ZVE32F-NEXT: .LBB61_15: # %cond.store11
6874 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
6875 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6876 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 6
6877 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6878 ; RV64ZVE32F-NEXT: vmv.x.s a3, v10
6879 ; RV64ZVE32F-NEXT: add a2, a0, a2
6880 ; RV64ZVE32F-NEXT: fmv.h.x fa5, a3
6881 ; RV64ZVE32F-NEXT: fsh fa5, 0(a2)
6882 ; RV64ZVE32F-NEXT: andi a1, a1, -128
6883 ; RV64ZVE32F-NEXT: beqz a1, .LBB61_11
6884 ; RV64ZVE32F-NEXT: .LBB61_16: # %cond.store13
6885 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
6886 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
6887 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6888 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
6889 ; RV64ZVE32F-NEXT: vmv.x.s a1, v9
6890 ; RV64ZVE32F-NEXT: slli a1, a1, 1
6891 ; RV64ZVE32F-NEXT: add a0, a0, a1
6892 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8
6893 ; RV64ZVE32F-NEXT: fmv.h.x fa5, a1
6894 ; RV64ZVE32F-NEXT: fsh fa5, 0(a0)
6895 ; RV64ZVE32F-NEXT: ret
6896 %ptrs = getelementptr inbounds bfloat, ptr %base, <8 x i16> %idxs
6897 call void @llvm.masked.scatter.v8bf16.v8p0(<8 x bfloat> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> %m)
6901 declare void @llvm.masked.scatter.v1f16.v1p0(<1 x half>, <1 x ptr>, i32, <1 x i1>)
6903 define void @mscatter_v1f16(<1 x half> %val, <1 x ptr> %ptrs, <1 x i1> %m) {
6904 ; RV32V-LABEL: mscatter_v1f16:
6906 ; RV32V-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
6907 ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t
6910 ; RV64V-LABEL: mscatter_v1f16:
6912 ; RV64V-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
6913 ; RV64V-NEXT: vsoxei64.v v8, (zero), v9, v0.t
6916 ; RV32ZVE32F-LABEL: mscatter_v1f16:
6917 ; RV32ZVE32F: # %bb.0:
6918 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
6919 ; RV32ZVE32F-NEXT: vsoxei32.v v8, (zero), v9, v0.t
6920 ; RV32ZVE32F-NEXT: ret
6922 ; RV64ZVE32F-ZVFH-LABEL: mscatter_v1f16:
6923 ; RV64ZVE32F-ZVFH: # %bb.0:
6924 ; RV64ZVE32F-ZVFH-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
6925 ; RV64ZVE32F-ZVFH-NEXT: vfirst.m a1, v0
6926 ; RV64ZVE32F-ZVFH-NEXT: bnez a1, .LBB62_2
6927 ; RV64ZVE32F-ZVFH-NEXT: # %bb.1: # %cond.store
6928 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
6929 ; RV64ZVE32F-ZVFH-NEXT: vse16.v v8, (a0)
6930 ; RV64ZVE32F-ZVFH-NEXT: .LBB62_2: # %else
6931 ; RV64ZVE32F-ZVFH-NEXT: ret
6933 ; RV64ZVE32F-ZVFHMIN-LABEL: mscatter_v1f16:
6934 ; RV64ZVE32F-ZVFHMIN: # %bb.0:
6935 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
6936 ; RV64ZVE32F-ZVFHMIN-NEXT: vfirst.m a1, v0
6937 ; RV64ZVE32F-ZVFHMIN-NEXT: bnez a1, .LBB62_2
6938 ; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.1: # %cond.store
6939 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
6940 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v8
6941 ; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a1
6942 ; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0)
6943 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB62_2: # %else
6944 ; RV64ZVE32F-ZVFHMIN-NEXT: ret
6945 call void @llvm.masked.scatter.v1f16.v1p0(<1 x half> %val, <1 x ptr> %ptrs, i32 2, <1 x i1> %m)
6949 declare void @llvm.masked.scatter.v2f16.v2p0(<2 x half>, <2 x ptr>, i32, <2 x i1>)
6951 define void @mscatter_v2f16(<2 x half> %val, <2 x ptr> %ptrs, <2 x i1> %m) {
6952 ; RV32V-LABEL: mscatter_v2f16:
6954 ; RV32V-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
6955 ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t
6958 ; RV64V-LABEL: mscatter_v2f16:
6960 ; RV64V-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
6961 ; RV64V-NEXT: vsoxei64.v v8, (zero), v9, v0.t
6964 ; RV32ZVE32F-LABEL: mscatter_v2f16:
6965 ; RV32ZVE32F: # %bb.0:
6966 ; RV32ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
6967 ; RV32ZVE32F-NEXT: vsoxei32.v v8, (zero), v9, v0.t
6968 ; RV32ZVE32F-NEXT: ret
6970 ; RV64ZVE32F-ZVFH-LABEL: mscatter_v2f16:
6971 ; RV64ZVE32F-ZVFH: # %bb.0:
6972 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, m1, ta, ma
6973 ; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v0
6974 ; RV64ZVE32F-ZVFH-NEXT: andi a3, a2, 1
6975 ; RV64ZVE32F-ZVFH-NEXT: bnez a3, .LBB63_3
6976 ; RV64ZVE32F-ZVFH-NEXT: # %bb.1: # %else
6977 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a2, 2
6978 ; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB63_4
6979 ; RV64ZVE32F-ZVFH-NEXT: .LBB63_2: # %else2
6980 ; RV64ZVE32F-ZVFH-NEXT: ret
6981 ; RV64ZVE32F-ZVFH-NEXT: .LBB63_3: # %cond.store
6982 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
6983 ; RV64ZVE32F-ZVFH-NEXT: vse16.v v8, (a0)
6984 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a2, 2
6985 ; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB63_2
6986 ; RV64ZVE32F-ZVFH-NEXT: .LBB63_4: # %cond.store1
6987 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
6988 ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 1
6989 ; RV64ZVE32F-ZVFH-NEXT: vse16.v v8, (a1)
6990 ; RV64ZVE32F-ZVFH-NEXT: ret
6992 ; RV64ZVE32F-ZVFHMIN-LABEL: mscatter_v2f16:
6993 ; RV64ZVE32F-ZVFHMIN: # %bb.0:
6994 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma
6995 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v0
6996 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a3, a2, 1
6997 ; RV64ZVE32F-ZVFHMIN-NEXT: bnez a3, .LBB63_3
6998 ; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.1: # %else
6999 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a2, 2
7000 ; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB63_4
7001 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB63_2: # %else2
7002 ; RV64ZVE32F-ZVFHMIN-NEXT: ret
7003 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB63_3: # %cond.store
7004 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
7005 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v8
7006 ; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3
7007 ; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0)
7008 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a2, 2
7009 ; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB63_2
7010 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB63_4: # %cond.store1
7011 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
7012 ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 1
7013 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v8
7014 ; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
7015 ; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a1)
7016 ; RV64ZVE32F-ZVFHMIN-NEXT: ret
7017 call void @llvm.masked.scatter.v2f16.v2p0(<2 x half> %val, <2 x ptr> %ptrs, i32 2, <2 x i1> %m)
7021 declare void @llvm.masked.scatter.v4f16.v4p0(<4 x half>, <4 x ptr>, i32, <4 x i1>)
7023 define void @mscatter_v4f16(<4 x half> %val, <4 x ptr> %ptrs, <4 x i1> %m) {
7024 ; RV32-LABEL: mscatter_v4f16:
7026 ; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
7027 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
7030 ; RV64V-LABEL: mscatter_v4f16:
7032 ; RV64V-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
7033 ; RV64V-NEXT: vsoxei64.v v8, (zero), v10, v0.t
7036 ; RV64ZVE32F-ZVFH-LABEL: mscatter_v4f16:
7037 ; RV64ZVE32F-ZVFH: # %bb.0:
7038 ; RV64ZVE32F-ZVFH-NEXT: ld a4, 8(a0)
7039 ; RV64ZVE32F-ZVFH-NEXT: ld a2, 16(a0)
7040 ; RV64ZVE32F-ZVFH-NEXT: ld a1, 24(a0)
7041 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, m1, ta, ma
7042 ; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a3, v0
7043 ; RV64ZVE32F-ZVFH-NEXT: andi a5, a3, 1
7044 ; RV64ZVE32F-ZVFH-NEXT: bnez a5, .LBB64_5
7045 ; RV64ZVE32F-ZVFH-NEXT: # %bb.1: # %else
7046 ; RV64ZVE32F-ZVFH-NEXT: andi a0, a3, 2
7047 ; RV64ZVE32F-ZVFH-NEXT: bnez a0, .LBB64_6
7048 ; RV64ZVE32F-ZVFH-NEXT: .LBB64_2: # %else2
7049 ; RV64ZVE32F-ZVFH-NEXT: andi a0, a3, 4
7050 ; RV64ZVE32F-ZVFH-NEXT: bnez a0, .LBB64_7
7051 ; RV64ZVE32F-ZVFH-NEXT: .LBB64_3: # %else4
7052 ; RV64ZVE32F-ZVFH-NEXT: andi a3, a3, 8
7053 ; RV64ZVE32F-ZVFH-NEXT: bnez a3, .LBB64_8
7054 ; RV64ZVE32F-ZVFH-NEXT: .LBB64_4: # %else6
7055 ; RV64ZVE32F-ZVFH-NEXT: ret
7056 ; RV64ZVE32F-ZVFH-NEXT: .LBB64_5: # %cond.store
7057 ; RV64ZVE32F-ZVFH-NEXT: ld a0, 0(a0)
7058 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
7059 ; RV64ZVE32F-ZVFH-NEXT: vse16.v v8, (a0)
7060 ; RV64ZVE32F-ZVFH-NEXT: andi a0, a3, 2
7061 ; RV64ZVE32F-ZVFH-NEXT: beqz a0, .LBB64_2
7062 ; RV64ZVE32F-ZVFH-NEXT: .LBB64_6: # %cond.store1
7063 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
7064 ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 1
7065 ; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a4)
7066 ; RV64ZVE32F-ZVFH-NEXT: andi a0, a3, 4
7067 ; RV64ZVE32F-ZVFH-NEXT: beqz a0, .LBB64_3
7068 ; RV64ZVE32F-ZVFH-NEXT: .LBB64_7: # %cond.store3
7069 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
7070 ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 2
7071 ; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a2)
7072 ; RV64ZVE32F-ZVFH-NEXT: andi a3, a3, 8
7073 ; RV64ZVE32F-ZVFH-NEXT: beqz a3, .LBB64_4
7074 ; RV64ZVE32F-ZVFH-NEXT: .LBB64_8: # %cond.store5
7075 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
7076 ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 3
7077 ; RV64ZVE32F-ZVFH-NEXT: vse16.v v8, (a1)
7078 ; RV64ZVE32F-ZVFH-NEXT: ret
7080 ; RV64ZVE32F-ZVFHMIN-LABEL: mscatter_v4f16:
7081 ; RV64ZVE32F-ZVFHMIN: # %bb.0:
7082 ; RV64ZVE32F-ZVFHMIN-NEXT: ld a4, 8(a0)
7083 ; RV64ZVE32F-ZVFHMIN-NEXT: ld a2, 16(a0)
7084 ; RV64ZVE32F-ZVFHMIN-NEXT: ld a1, 24(a0)
7085 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma
7086 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v0
7087 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a5, a3, 1
7088 ; RV64ZVE32F-ZVFHMIN-NEXT: bnez a5, .LBB64_5
7089 ; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.1: # %else
7090 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a0, a3, 2
7091 ; RV64ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB64_6
7092 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB64_2: # %else2
7093 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a0, a3, 4
7094 ; RV64ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB64_7
7095 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB64_3: # %else4
7096 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a3, a3, 8
7097 ; RV64ZVE32F-ZVFHMIN-NEXT: bnez a3, .LBB64_8
7098 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB64_4: # %else6
7099 ; RV64ZVE32F-ZVFHMIN-NEXT: ret
7100 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB64_5: # %cond.store
7101 ; RV64ZVE32F-ZVFHMIN-NEXT: ld a0, 0(a0)
7102 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
7103 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a5, v8
7104 ; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a5
7105 ; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0)
7106 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a0, a3, 2
7107 ; RV64ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB64_2
7108 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB64_6: # %cond.store1
7109 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
7110 ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 1
7111 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9
7112 ; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
7113 ; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a4)
7114 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a0, a3, 4
7115 ; RV64ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB64_3
7116 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB64_7: # %cond.store3
7117 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
7118 ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 2
7119 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9
7120 ; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
7121 ; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2)
7122 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a3, a3, 8
7123 ; RV64ZVE32F-ZVFHMIN-NEXT: beqz a3, .LBB64_4
7124 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB64_8: # %cond.store5
7125 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
7126 ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 3
7127 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v8
7128 ; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
7129 ; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a1)
7130 ; RV64ZVE32F-ZVFHMIN-NEXT: ret
7131 call void @llvm.masked.scatter.v4f16.v4p0(<4 x half> %val, <4 x ptr> %ptrs, i32 2, <4 x i1> %m)
7135 define void @mscatter_truemask_v4f16(<4 x half> %val, <4 x ptr> %ptrs) {
7136 ; RV32-LABEL: mscatter_truemask_v4f16:
7138 ; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
7139 ; RV32-NEXT: vsoxei32.v v8, (zero), v9
7142 ; RV64V-LABEL: mscatter_truemask_v4f16:
7144 ; RV64V-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
7145 ; RV64V-NEXT: vsoxei64.v v8, (zero), v10
7148 ; RV64ZVE32F-ZVFH-LABEL: mscatter_truemask_v4f16:
7149 ; RV64ZVE32F-ZVFH: # %bb.0:
7150 ; RV64ZVE32F-ZVFH-NEXT: ld a1, 0(a0)
7151 ; RV64ZVE32F-ZVFH-NEXT: ld a2, 8(a0)
7152 ; RV64ZVE32F-ZVFH-NEXT: ld a3, 16(a0)
7153 ; RV64ZVE32F-ZVFH-NEXT: ld a0, 24(a0)
7154 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
7155 ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 1
7156 ; RV64ZVE32F-ZVFH-NEXT: vse16.v v8, (a1)
7157 ; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a2)
7158 ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 2
7159 ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 3
7160 ; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a3)
7161 ; RV64ZVE32F-ZVFH-NEXT: vse16.v v8, (a0)
7162 ; RV64ZVE32F-ZVFH-NEXT: ret
7164 ; RV64ZVE32F-ZVFHMIN-LABEL: mscatter_truemask_v4f16:
7165 ; RV64ZVE32F-ZVFHMIN: # %bb.0:
7166 ; RV64ZVE32F-ZVFHMIN-NEXT: ld a1, 0(a0)
7167 ; RV64ZVE32F-ZVFHMIN-NEXT: ld a2, 8(a0)
7168 ; RV64ZVE32F-ZVFHMIN-NEXT: ld a3, 16(a0)
7169 ; RV64ZVE32F-ZVFHMIN-NEXT: ld a0, 24(a0)
7170 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
7171 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a4, v8
7172 ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 1
7173 ; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a4
7174 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a4, v9
7175 ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 2
7176 ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 3
7177 ; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a1)
7178 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v9
7179 ; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a4
7180 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a4, v8
7181 ; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2)
7182 ; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a1
7183 ; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a3)
7184 ; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a4
7185 ; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0)
7186 ; RV64ZVE32F-ZVFHMIN-NEXT: ret
7187 call void @llvm.masked.scatter.v4f16.v4p0(<4 x half> %val, <4 x ptr> %ptrs, i32 2, <4 x i1> splat (i1 1))
7191 define void @mscatter_falsemask_v4f16(<4 x half> %val, <4 x ptr> %ptrs) {
7192 ; CHECK-LABEL: mscatter_falsemask_v4f16:
7195 call void @llvm.masked.scatter.v4f16.v4p0(<4 x half> %val, <4 x ptr> %ptrs, i32 2, <4 x i1> zeroinitializer)
7199 declare void @llvm.masked.scatter.v8f16.v8p0(<8 x half>, <8 x ptr>, i32, <8 x i1>)
7201 define void @mscatter_v8f16(<8 x half> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
7202 ; RV32-LABEL: mscatter_v8f16:
7204 ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
7205 ; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t
7208 ; RV64V-LABEL: mscatter_v8f16:
7210 ; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
7211 ; RV64V-NEXT: vsoxei64.v v8, (zero), v12, v0.t
7214 ; RV64ZVE32F-ZVFH-LABEL: mscatter_v8f16:
7215 ; RV64ZVE32F-ZVFH: # %bb.0:
7216 ; RV64ZVE32F-ZVFH-NEXT: ld a3, 40(a0)
7217 ; RV64ZVE32F-ZVFH-NEXT: ld a2, 48(a0)
7218 ; RV64ZVE32F-ZVFH-NEXT: ld a1, 56(a0)
7219 ; RV64ZVE32F-ZVFH-NEXT: ld t0, 8(a0)
7220 ; RV64ZVE32F-ZVFH-NEXT: ld a7, 16(a0)
7221 ; RV64ZVE32F-ZVFH-NEXT: ld a6, 24(a0)
7222 ; RV64ZVE32F-ZVFH-NEXT: ld a5, 32(a0)
7223 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, m1, ta, ma
7224 ; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a4, v0
7225 ; RV64ZVE32F-ZVFH-NEXT: andi t1, a4, 1
7226 ; RV64ZVE32F-ZVFH-NEXT: bnez t1, .LBB67_9
7227 ; RV64ZVE32F-ZVFH-NEXT: # %bb.1: # %else
7228 ; RV64ZVE32F-ZVFH-NEXT: andi a0, a4, 2
7229 ; RV64ZVE32F-ZVFH-NEXT: bnez a0, .LBB67_10
7230 ; RV64ZVE32F-ZVFH-NEXT: .LBB67_2: # %else2
7231 ; RV64ZVE32F-ZVFH-NEXT: andi a0, a4, 4
7232 ; RV64ZVE32F-ZVFH-NEXT: bnez a0, .LBB67_11
7233 ; RV64ZVE32F-ZVFH-NEXT: .LBB67_3: # %else4
7234 ; RV64ZVE32F-ZVFH-NEXT: andi a0, a4, 8
7235 ; RV64ZVE32F-ZVFH-NEXT: bnez a0, .LBB67_12
7236 ; RV64ZVE32F-ZVFH-NEXT: .LBB67_4: # %else6
7237 ; RV64ZVE32F-ZVFH-NEXT: andi a0, a4, 16
7238 ; RV64ZVE32F-ZVFH-NEXT: bnez a0, .LBB67_13
7239 ; RV64ZVE32F-ZVFH-NEXT: .LBB67_5: # %else8
7240 ; RV64ZVE32F-ZVFH-NEXT: andi a0, a4, 32
7241 ; RV64ZVE32F-ZVFH-NEXT: bnez a0, .LBB67_14
7242 ; RV64ZVE32F-ZVFH-NEXT: .LBB67_6: # %else10
7243 ; RV64ZVE32F-ZVFH-NEXT: andi a0, a4, 64
7244 ; RV64ZVE32F-ZVFH-NEXT: bnez a0, .LBB67_15
7245 ; RV64ZVE32F-ZVFH-NEXT: .LBB67_7: # %else12
7246 ; RV64ZVE32F-ZVFH-NEXT: andi a0, a4, -128
7247 ; RV64ZVE32F-ZVFH-NEXT: bnez a0, .LBB67_16
7248 ; RV64ZVE32F-ZVFH-NEXT: .LBB67_8: # %else14
7249 ; RV64ZVE32F-ZVFH-NEXT: ret
7250 ; RV64ZVE32F-ZVFH-NEXT: .LBB67_9: # %cond.store
7251 ; RV64ZVE32F-ZVFH-NEXT: ld a0, 0(a0)
7252 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
7253 ; RV64ZVE32F-ZVFH-NEXT: vse16.v v8, (a0)
7254 ; RV64ZVE32F-ZVFH-NEXT: andi a0, a4, 2
7255 ; RV64ZVE32F-ZVFH-NEXT: beqz a0, .LBB67_2
7256 ; RV64ZVE32F-ZVFH-NEXT: .LBB67_10: # %cond.store1
7257 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
7258 ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 1
7259 ; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (t0)
7260 ; RV64ZVE32F-ZVFH-NEXT: andi a0, a4, 4
7261 ; RV64ZVE32F-ZVFH-NEXT: beqz a0, .LBB67_3
7262 ; RV64ZVE32F-ZVFH-NEXT: .LBB67_11: # %cond.store3
7263 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
7264 ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 2
7265 ; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a7)
7266 ; RV64ZVE32F-ZVFH-NEXT: andi a0, a4, 8
7267 ; RV64ZVE32F-ZVFH-NEXT: beqz a0, .LBB67_4
7268 ; RV64ZVE32F-ZVFH-NEXT: .LBB67_12: # %cond.store5
7269 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
7270 ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 3
7271 ; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a6)
7272 ; RV64ZVE32F-ZVFH-NEXT: andi a0, a4, 16
7273 ; RV64ZVE32F-ZVFH-NEXT: beqz a0, .LBB67_5
7274 ; RV64ZVE32F-ZVFH-NEXT: .LBB67_13: # %cond.store7
7275 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
7276 ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 4
7277 ; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a5)
7278 ; RV64ZVE32F-ZVFH-NEXT: andi a0, a4, 32
7279 ; RV64ZVE32F-ZVFH-NEXT: beqz a0, .LBB67_6
7280 ; RV64ZVE32F-ZVFH-NEXT: .LBB67_14: # %cond.store9
7281 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
7282 ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 5
7283 ; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a3)
7284 ; RV64ZVE32F-ZVFH-NEXT: andi a0, a4, 64
7285 ; RV64ZVE32F-ZVFH-NEXT: beqz a0, .LBB67_7
7286 ; RV64ZVE32F-ZVFH-NEXT: .LBB67_15: # %cond.store11
7287 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
7288 ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 6
7289 ; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a2)
7290 ; RV64ZVE32F-ZVFH-NEXT: andi a0, a4, -128
7291 ; RV64ZVE32F-ZVFH-NEXT: beqz a0, .LBB67_8
7292 ; RV64ZVE32F-ZVFH-NEXT: .LBB67_16: # %cond.store13
7293 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
7294 ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 7
7295 ; RV64ZVE32F-ZVFH-NEXT: vse16.v v8, (a1)
7296 ; RV64ZVE32F-ZVFH-NEXT: ret
7298 ; RV64ZVE32F-ZVFHMIN-LABEL: mscatter_v8f16:
7299 ; RV64ZVE32F-ZVFHMIN: # %bb.0:
7300 ; RV64ZVE32F-ZVFHMIN-NEXT: ld a3, 40(a0)
7301 ; RV64ZVE32F-ZVFHMIN-NEXT: ld a2, 48(a0)
7302 ; RV64ZVE32F-ZVFHMIN-NEXT: ld a1, 56(a0)
7303 ; RV64ZVE32F-ZVFHMIN-NEXT: ld t0, 8(a0)
7304 ; RV64ZVE32F-ZVFHMIN-NEXT: ld a7, 16(a0)
7305 ; RV64ZVE32F-ZVFHMIN-NEXT: ld a6, 24(a0)
7306 ; RV64ZVE32F-ZVFHMIN-NEXT: ld a5, 32(a0)
7307 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma
7308 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a4, v0
7309 ; RV64ZVE32F-ZVFHMIN-NEXT: andi t1, a4, 1
7310 ; RV64ZVE32F-ZVFHMIN-NEXT: bnez t1, .LBB67_9
7311 ; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.1: # %else
7312 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a0, a4, 2
7313 ; RV64ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB67_10
7314 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB67_2: # %else2
7315 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a0, a4, 4
7316 ; RV64ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB67_11
7317 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB67_3: # %else4
7318 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a0, a4, 8
7319 ; RV64ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB67_12
7320 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB67_4: # %else6
7321 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a0, a4, 16
7322 ; RV64ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB67_13
7323 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB67_5: # %else8
7324 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a0, a4, 32
7325 ; RV64ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB67_14
7326 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB67_6: # %else10
7327 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a0, a4, 64
7328 ; RV64ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB67_15
7329 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB67_7: # %else12
7330 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a0, a4, -128
7331 ; RV64ZVE32F-ZVFHMIN-NEXT: bnez a0, .LBB67_16
7332 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB67_8: # %else14
7333 ; RV64ZVE32F-ZVFHMIN-NEXT: ret
7334 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB67_9: # %cond.store
7335 ; RV64ZVE32F-ZVFHMIN-NEXT: ld a0, 0(a0)
7336 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
7337 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s t1, v8
7338 ; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, t1
7339 ; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0)
7340 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a0, a4, 2
7341 ; RV64ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB67_2
7342 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB67_10: # %cond.store1
7343 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
7344 ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 1
7345 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9
7346 ; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
7347 ; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(t0)
7348 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a0, a4, 4
7349 ; RV64ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB67_3
7350 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB67_11: # %cond.store3
7351 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
7352 ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 2
7353 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9
7354 ; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
7355 ; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a7)
7356 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a0, a4, 8
7357 ; RV64ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB67_4
7358 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB67_12: # %cond.store5
7359 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
7360 ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 3
7361 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9
7362 ; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
7363 ; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a6)
7364 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a0, a4, 16
7365 ; RV64ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB67_5
7366 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB67_13: # %cond.store7
7367 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
7368 ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 4
7369 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9
7370 ; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
7371 ; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a5)
7372 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a0, a4, 32
7373 ; RV64ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB67_6
7374 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB67_14: # %cond.store9
7375 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
7376 ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 5
7377 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9
7378 ; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
7379 ; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a3)
7380 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a0, a4, 64
7381 ; RV64ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB67_7
7382 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB67_15: # %cond.store11
7383 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
7384 ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 6
7385 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v9
7386 ; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
7387 ; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2)
7388 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a0, a4, -128
7389 ; RV64ZVE32F-ZVFHMIN-NEXT: beqz a0, .LBB67_8
7390 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB67_16: # %cond.store13
7391 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
7392 ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 7
7393 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a0, v8
7394 ; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a0
7395 ; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a1)
7396 ; RV64ZVE32F-ZVFHMIN-NEXT: ret
7397 call void @llvm.masked.scatter.v8f16.v8p0(<8 x half> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> %m)
7401 define void @mscatter_baseidx_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) {
7402 ; RV32-LABEL: mscatter_baseidx_v8i8_v8f16:
7404 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
7405 ; RV32-NEXT: vsext.vf4 v10, v9
7406 ; RV32-NEXT: vadd.vv v10, v10, v10
7407 ; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma
7408 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
7411 ; RV64V-LABEL: mscatter_baseidx_v8i8_v8f16:
7413 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
7414 ; RV64V-NEXT: vsext.vf8 v12, v9
7415 ; RV64V-NEXT: vadd.vv v12, v12, v12
7416 ; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, ma
7417 ; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t
7420 ; RV64ZVE32F-ZVFH-LABEL: mscatter_baseidx_v8i8_v8f16:
7421 ; RV64ZVE32F-ZVFH: # %bb.0:
7422 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, m1, ta, ma
7423 ; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a1, v0
7424 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 1
7425 ; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB68_2
7426 ; RV64ZVE32F-ZVFH-NEXT: # %bb.1: # %cond.store
7427 ; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9
7428 ; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
7429 ; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
7430 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
7431 ; RV64ZVE32F-ZVFH-NEXT: vse16.v v8, (a2)
7432 ; RV64ZVE32F-ZVFH-NEXT: .LBB68_2: # %else
7433 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 2
7434 ; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB68_4
7435 ; RV64ZVE32F-ZVFH-NEXT: # %bb.3: # %cond.store1
7436 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
7437 ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v9, 1
7438 ; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v10
7439 ; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
7440 ; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
7441 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
7442 ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v8, 1
7443 ; RV64ZVE32F-ZVFH-NEXT: vse16.v v10, (a2)
7444 ; RV64ZVE32F-ZVFH-NEXT: .LBB68_4: # %else2
7445 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
7446 ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v9, 4
7447 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 4
7448 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
7449 ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v9, 2
7450 ; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB68_12
7451 ; RV64ZVE32F-ZVFH-NEXT: # %bb.5: # %else4
7452 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8
7453 ; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB68_13
7454 ; RV64ZVE32F-ZVFH-NEXT: .LBB68_6: # %else6
7455 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 16
7456 ; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB68_14
7457 ; RV64ZVE32F-ZVFH-NEXT: .LBB68_7: # %else8
7458 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 32
7459 ; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB68_9
7460 ; RV64ZVE32F-ZVFH-NEXT: .LBB68_8: # %cond.store9
7461 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
7462 ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v10, 1
7463 ; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9
7464 ; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
7465 ; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
7466 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
7467 ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 5
7468 ; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a2)
7469 ; RV64ZVE32F-ZVFH-NEXT: .LBB68_9: # %else10
7470 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 64
7471 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
7472 ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v10, 2
7473 ; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB68_15
7474 ; RV64ZVE32F-ZVFH-NEXT: # %bb.10: # %else12
7475 ; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, -128
7476 ; RV64ZVE32F-ZVFH-NEXT: bnez a1, .LBB68_16
7477 ; RV64ZVE32F-ZVFH-NEXT: .LBB68_11: # %else14
7478 ; RV64ZVE32F-ZVFH-NEXT: ret
7479 ; RV64ZVE32F-ZVFH-NEXT: .LBB68_12: # %cond.store3
7480 ; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9
7481 ; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
7482 ; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
7483 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
7484 ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v11, v8, 2
7485 ; RV64ZVE32F-ZVFH-NEXT: vse16.v v11, (a2)
7486 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8
7487 ; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB68_6
7488 ; RV64ZVE32F-ZVFH-NEXT: .LBB68_13: # %cond.store5
7489 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
7490 ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v9, 1
7491 ; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9
7492 ; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
7493 ; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
7494 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
7495 ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 3
7496 ; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a2)
7497 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 16
7498 ; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB68_7
7499 ; RV64ZVE32F-ZVFH-NEXT: .LBB68_14: # %cond.store7
7500 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, m1, ta, ma
7501 ; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v10
7502 ; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
7503 ; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
7504 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
7505 ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 4
7506 ; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a2)
7507 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 32
7508 ; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB68_8
7509 ; RV64ZVE32F-ZVFH-NEXT: j .LBB68_9
7510 ; RV64ZVE32F-ZVFH-NEXT: .LBB68_15: # %cond.store11
7511 ; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9
7512 ; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
7513 ; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
7514 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
7515 ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v8, 6
7516 ; RV64ZVE32F-ZVFH-NEXT: vse16.v v10, (a2)
7517 ; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, -128
7518 ; RV64ZVE32F-ZVFH-NEXT: beqz a1, .LBB68_11
7519 ; RV64ZVE32F-ZVFH-NEXT: .LBB68_16: # %cond.store13
7520 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
7521 ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v9, 1
7522 ; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a1, v9
7523 ; RV64ZVE32F-ZVFH-NEXT: slli a1, a1, 1
7524 ; RV64ZVE32F-ZVFH-NEXT: add a0, a0, a1
7525 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
7526 ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 7
7527 ; RV64ZVE32F-ZVFH-NEXT: vse16.v v8, (a0)
7528 ; RV64ZVE32F-ZVFH-NEXT: ret
7530 ; RV64ZVE32F-ZVFHMIN-LABEL: mscatter_baseidx_v8i8_v8f16:
7531 ; RV64ZVE32F-ZVFHMIN: # %bb.0:
7532 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma
7533 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v0
7534 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 1
7535 ; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB68_2
7536 ; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.1: # %cond.store
7537 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9
7538 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
7539 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v8
7540 ; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
7541 ; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
7542 ; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3
7543 ; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2)
7544 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB68_2: # %else
7545 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 2
7546 ; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB68_4
7547 ; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.3: # %cond.store1
7548 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
7549 ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v9, 1
7550 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v10
7551 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
7552 ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 1
7553 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v10
7554 ; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
7555 ; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
7556 ; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3
7557 ; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2)
7558 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB68_4: # %else2
7559 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
7560 ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v9, 4
7561 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 4
7562 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
7563 ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v9, 2
7564 ; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB68_12
7565 ; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.5: # %else4
7566 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8
7567 ; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB68_13
7568 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB68_6: # %else6
7569 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 16
7570 ; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB68_14
7571 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB68_7: # %else8
7572 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 32
7573 ; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB68_9
7574 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB68_8: # %cond.store9
7575 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
7576 ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 1
7577 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9
7578 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
7579 ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 5
7580 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v9
7581 ; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
7582 ; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
7583 ; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3
7584 ; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2)
7585 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB68_9: # %else10
7586 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 64
7587 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
7588 ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 2
7589 ; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB68_15
7590 ; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.10: # %else12
7591 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, -128
7592 ; RV64ZVE32F-ZVFHMIN-NEXT: bnez a1, .LBB68_16
7593 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB68_11: # %else14
7594 ; RV64ZVE32F-ZVFHMIN-NEXT: ret
7595 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB68_12: # %cond.store3
7596 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9
7597 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
7598 ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v11, v8, 2
7599 ; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
7600 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v11
7601 ; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
7602 ; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3
7603 ; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2)
7604 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8
7605 ; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB68_6
7606 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB68_13: # %cond.store5
7607 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
7608 ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v9, 1
7609 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9
7610 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
7611 ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 3
7612 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v9
7613 ; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
7614 ; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
7615 ; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3
7616 ; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2)
7617 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 16
7618 ; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB68_7
7619 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB68_14: # %cond.store7
7620 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma
7621 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v10
7622 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
7623 ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 4
7624 ; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
7625 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v9
7626 ; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
7627 ; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3
7628 ; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2)
7629 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 32
7630 ; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB68_8
7631 ; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB68_9
7632 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB68_15: # %cond.store11
7633 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9
7634 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
7635 ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 6
7636 ; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
7637 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v10
7638 ; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
7639 ; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3
7640 ; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2)
7641 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, -128
7642 ; RV64ZVE32F-ZVFHMIN-NEXT: beqz a1, .LBB68_11
7643 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB68_16: # %cond.store13
7644 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
7645 ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v9, 1
7646 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
7647 ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 7
7648 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
7649 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v9
7650 ; RV64ZVE32F-ZVFHMIN-NEXT: slli a1, a1, 1
7651 ; RV64ZVE32F-ZVFHMIN-NEXT: add a0, a0, a1
7652 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
7653 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v8
7654 ; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a1
7655 ; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0)
7656 ; RV64ZVE32F-ZVFHMIN-NEXT: ret
7657 %ptrs = getelementptr inbounds half, ptr %base, <8 x i8> %idxs
7658 call void @llvm.masked.scatter.v8f16.v8p0(<8 x half> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> %m)
7662 define void @mscatter_baseidx_sext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) {
7663 ; RV32-LABEL: mscatter_baseidx_sext_v8i8_v8f16:
7665 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
7666 ; RV32-NEXT: vsext.vf4 v10, v9
7667 ; RV32-NEXT: vadd.vv v10, v10, v10
7668 ; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma
7669 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
7672 ; RV64V-LABEL: mscatter_baseidx_sext_v8i8_v8f16:
7674 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
7675 ; RV64V-NEXT: vsext.vf8 v12, v9
7676 ; RV64V-NEXT: vadd.vv v12, v12, v12
7677 ; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, ma
7678 ; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t
7681 ; RV64ZVE32F-ZVFH-LABEL: mscatter_baseidx_sext_v8i8_v8f16:
7682 ; RV64ZVE32F-ZVFH: # %bb.0:
7683 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, m1, ta, ma
7684 ; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a1, v0
7685 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 1
7686 ; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB69_2
7687 ; RV64ZVE32F-ZVFH-NEXT: # %bb.1: # %cond.store
7688 ; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9
7689 ; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
7690 ; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
7691 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
7692 ; RV64ZVE32F-ZVFH-NEXT: vse16.v v8, (a2)
7693 ; RV64ZVE32F-ZVFH-NEXT: .LBB69_2: # %else
7694 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 2
7695 ; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB69_4
7696 ; RV64ZVE32F-ZVFH-NEXT: # %bb.3: # %cond.store1
7697 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
7698 ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v9, 1
7699 ; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v10
7700 ; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
7701 ; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
7702 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
7703 ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v8, 1
7704 ; RV64ZVE32F-ZVFH-NEXT: vse16.v v10, (a2)
7705 ; RV64ZVE32F-ZVFH-NEXT: .LBB69_4: # %else2
7706 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
7707 ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v9, 4
7708 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 4
7709 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
7710 ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v9, 2
7711 ; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB69_12
7712 ; RV64ZVE32F-ZVFH-NEXT: # %bb.5: # %else4
7713 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8
7714 ; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB69_13
7715 ; RV64ZVE32F-ZVFH-NEXT: .LBB69_6: # %else6
7716 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 16
7717 ; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB69_14
7718 ; RV64ZVE32F-ZVFH-NEXT: .LBB69_7: # %else8
7719 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 32
7720 ; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB69_9
7721 ; RV64ZVE32F-ZVFH-NEXT: .LBB69_8: # %cond.store9
7722 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
7723 ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v10, 1
7724 ; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9
7725 ; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
7726 ; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
7727 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
7728 ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 5
7729 ; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a2)
7730 ; RV64ZVE32F-ZVFH-NEXT: .LBB69_9: # %else10
7731 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 64
7732 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
7733 ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v10, 2
7734 ; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB69_15
7735 ; RV64ZVE32F-ZVFH-NEXT: # %bb.10: # %else12
7736 ; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, -128
7737 ; RV64ZVE32F-ZVFH-NEXT: bnez a1, .LBB69_16
7738 ; RV64ZVE32F-ZVFH-NEXT: .LBB69_11: # %else14
7739 ; RV64ZVE32F-ZVFH-NEXT: ret
7740 ; RV64ZVE32F-ZVFH-NEXT: .LBB69_12: # %cond.store3
7741 ; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9
7742 ; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
7743 ; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
7744 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
7745 ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v11, v8, 2
7746 ; RV64ZVE32F-ZVFH-NEXT: vse16.v v11, (a2)
7747 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8
7748 ; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB69_6
7749 ; RV64ZVE32F-ZVFH-NEXT: .LBB69_13: # %cond.store5
7750 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
7751 ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v9, 1
7752 ; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9
7753 ; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
7754 ; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
7755 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
7756 ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 3
7757 ; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a2)
7758 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 16
7759 ; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB69_7
7760 ; RV64ZVE32F-ZVFH-NEXT: .LBB69_14: # %cond.store7
7761 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, m1, ta, ma
7762 ; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v10
7763 ; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
7764 ; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
7765 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
7766 ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 4
7767 ; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a2)
7768 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 32
7769 ; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB69_8
7770 ; RV64ZVE32F-ZVFH-NEXT: j .LBB69_9
7771 ; RV64ZVE32F-ZVFH-NEXT: .LBB69_15: # %cond.store11
7772 ; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9
7773 ; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
7774 ; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
7775 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
7776 ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v8, 6
7777 ; RV64ZVE32F-ZVFH-NEXT: vse16.v v10, (a2)
7778 ; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, -128
7779 ; RV64ZVE32F-ZVFH-NEXT: beqz a1, .LBB69_11
7780 ; RV64ZVE32F-ZVFH-NEXT: .LBB69_16: # %cond.store13
7781 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
7782 ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v9, 1
7783 ; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a1, v9
7784 ; RV64ZVE32F-ZVFH-NEXT: slli a1, a1, 1
7785 ; RV64ZVE32F-ZVFH-NEXT: add a0, a0, a1
7786 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
7787 ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 7
7788 ; RV64ZVE32F-ZVFH-NEXT: vse16.v v8, (a0)
7789 ; RV64ZVE32F-ZVFH-NEXT: ret
7791 ; RV64ZVE32F-ZVFHMIN-LABEL: mscatter_baseidx_sext_v8i8_v8f16:
7792 ; RV64ZVE32F-ZVFHMIN: # %bb.0:
7793 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma
7794 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v0
7795 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 1
7796 ; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB69_2
7797 ; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.1: # %cond.store
7798 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9
7799 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
7800 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v8
7801 ; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
7802 ; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
7803 ; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3
7804 ; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2)
7805 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB69_2: # %else
7806 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 2
7807 ; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB69_4
7808 ; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.3: # %cond.store1
7809 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
7810 ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v9, 1
7811 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v10
7812 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
7813 ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 1
7814 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v10
7815 ; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
7816 ; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
7817 ; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3
7818 ; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2)
7819 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB69_4: # %else2
7820 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
7821 ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v9, 4
7822 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 4
7823 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
7824 ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v9, 2
7825 ; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB69_12
7826 ; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.5: # %else4
7827 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8
7828 ; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB69_13
7829 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB69_6: # %else6
7830 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 16
7831 ; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB69_14
7832 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB69_7: # %else8
7833 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 32
7834 ; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB69_9
7835 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB69_8: # %cond.store9
7836 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
7837 ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 1
7838 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9
7839 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
7840 ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 5
7841 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v9
7842 ; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
7843 ; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
7844 ; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3
7845 ; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2)
7846 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB69_9: # %else10
7847 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 64
7848 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
7849 ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 2
7850 ; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB69_15
7851 ; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.10: # %else12
7852 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, -128
7853 ; RV64ZVE32F-ZVFHMIN-NEXT: bnez a1, .LBB69_16
7854 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB69_11: # %else14
7855 ; RV64ZVE32F-ZVFHMIN-NEXT: ret
7856 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB69_12: # %cond.store3
7857 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9
7858 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
7859 ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v11, v8, 2
7860 ; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
7861 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v11
7862 ; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
7863 ; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3
7864 ; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2)
7865 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8
7866 ; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB69_6
7867 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB69_13: # %cond.store5
7868 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
7869 ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v9, 1
7870 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9
7871 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
7872 ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 3
7873 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v9
7874 ; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
7875 ; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
7876 ; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3
7877 ; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2)
7878 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 16
7879 ; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB69_7
7880 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB69_14: # %cond.store7
7881 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma
7882 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v10
7883 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
7884 ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 4
7885 ; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
7886 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v9
7887 ; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
7888 ; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3
7889 ; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2)
7890 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 32
7891 ; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB69_8
7892 ; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB69_9
7893 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB69_15: # %cond.store11
7894 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9
7895 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
7896 ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 6
7897 ; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
7898 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v10
7899 ; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
7900 ; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3
7901 ; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2)
7902 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, -128
7903 ; RV64ZVE32F-ZVFHMIN-NEXT: beqz a1, .LBB69_11
7904 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB69_16: # %cond.store13
7905 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
7906 ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v9, 1
7907 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
7908 ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 7
7909 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
7910 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v9
7911 ; RV64ZVE32F-ZVFHMIN-NEXT: slli a1, a1, 1
7912 ; RV64ZVE32F-ZVFHMIN-NEXT: add a0, a0, a1
7913 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
7914 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v8
7915 ; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a1
7916 ; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0)
7917 ; RV64ZVE32F-ZVFHMIN-NEXT: ret
7918 %eidxs = sext <8 x i8> %idxs to <8 x i16>
7919 %ptrs = getelementptr inbounds half, ptr %base, <8 x i16> %eidxs
7920 call void @llvm.masked.scatter.v8f16.v8p0(<8 x half> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> %m)
7924 define void @mscatter_baseidx_zext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) {
7925 ; RV32-LABEL: mscatter_baseidx_zext_v8i8_v8f16:
7927 ; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
7928 ; RV32-NEXT: vwaddu.vv v10, v9, v9
7929 ; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma
7930 ; RV32-NEXT: vsoxei16.v v8, (a0), v10, v0.t
7933 ; RV64V-LABEL: mscatter_baseidx_zext_v8i8_v8f16:
7935 ; RV64V-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
7936 ; RV64V-NEXT: vwaddu.vv v10, v9, v9
7937 ; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, ma
7938 ; RV64V-NEXT: vsoxei16.v v8, (a0), v10, v0.t
7941 ; RV64ZVE32F-ZVFH-LABEL: mscatter_baseidx_zext_v8i8_v8f16:
7942 ; RV64ZVE32F-ZVFH: # %bb.0:
7943 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, m1, ta, ma
7944 ; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a1, v0
7945 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 1
7946 ; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB70_2
7947 ; RV64ZVE32F-ZVFH-NEXT: # %bb.1: # %cond.store
7948 ; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9
7949 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a2, 255
7950 ; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
7951 ; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
7952 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
7953 ; RV64ZVE32F-ZVFH-NEXT: vse16.v v8, (a2)
7954 ; RV64ZVE32F-ZVFH-NEXT: .LBB70_2: # %else
7955 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 2
7956 ; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB70_4
7957 ; RV64ZVE32F-ZVFH-NEXT: # %bb.3: # %cond.store1
7958 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
7959 ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v9, 1
7960 ; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v10
7961 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a2, 255
7962 ; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
7963 ; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
7964 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
7965 ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v8, 1
7966 ; RV64ZVE32F-ZVFH-NEXT: vse16.v v10, (a2)
7967 ; RV64ZVE32F-ZVFH-NEXT: .LBB70_4: # %else2
7968 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
7969 ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v9, 4
7970 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 4
7971 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
7972 ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v9, 2
7973 ; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB70_12
7974 ; RV64ZVE32F-ZVFH-NEXT: # %bb.5: # %else4
7975 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8
7976 ; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB70_13
7977 ; RV64ZVE32F-ZVFH-NEXT: .LBB70_6: # %else6
7978 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 16
7979 ; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB70_14
7980 ; RV64ZVE32F-ZVFH-NEXT: .LBB70_7: # %else8
7981 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 32
7982 ; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB70_9
7983 ; RV64ZVE32F-ZVFH-NEXT: .LBB70_8: # %cond.store9
7984 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
7985 ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v10, 1
7986 ; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9
7987 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a2, 255
7988 ; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
7989 ; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
7990 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
7991 ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 5
7992 ; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a2)
7993 ; RV64ZVE32F-ZVFH-NEXT: .LBB70_9: # %else10
7994 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 64
7995 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
7996 ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v10, 2
7997 ; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB70_15
7998 ; RV64ZVE32F-ZVFH-NEXT: # %bb.10: # %else12
7999 ; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, -128
8000 ; RV64ZVE32F-ZVFH-NEXT: bnez a1, .LBB70_16
8001 ; RV64ZVE32F-ZVFH-NEXT: .LBB70_11: # %else14
8002 ; RV64ZVE32F-ZVFH-NEXT: ret
8003 ; RV64ZVE32F-ZVFH-NEXT: .LBB70_12: # %cond.store3
8004 ; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9
8005 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a2, 255
8006 ; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
8007 ; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
8008 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
8009 ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v11, v8, 2
8010 ; RV64ZVE32F-ZVFH-NEXT: vse16.v v11, (a2)
8011 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8
8012 ; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB70_6
8013 ; RV64ZVE32F-ZVFH-NEXT: .LBB70_13: # %cond.store5
8014 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
8015 ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v9, 1
8016 ; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9
8017 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a2, 255
8018 ; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
8019 ; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
8020 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
8021 ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 3
8022 ; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a2)
8023 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 16
8024 ; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB70_7
8025 ; RV64ZVE32F-ZVFH-NEXT: .LBB70_14: # %cond.store7
8026 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, m1, ta, ma
8027 ; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v10
8028 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a2, 255
8029 ; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
8030 ; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
8031 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
8032 ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 4
8033 ; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a2)
8034 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 32
8035 ; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB70_8
8036 ; RV64ZVE32F-ZVFH-NEXT: j .LBB70_9
8037 ; RV64ZVE32F-ZVFH-NEXT: .LBB70_15: # %cond.store11
8038 ; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9
8039 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a2, 255
8040 ; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
8041 ; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
8042 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
8043 ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v8, 6
8044 ; RV64ZVE32F-ZVFH-NEXT: vse16.v v10, (a2)
8045 ; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, -128
8046 ; RV64ZVE32F-ZVFH-NEXT: beqz a1, .LBB70_11
8047 ; RV64ZVE32F-ZVFH-NEXT: .LBB70_16: # %cond.store13
8048 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
8049 ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v9, 1
8050 ; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a1, v9
8051 ; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, 255
8052 ; RV64ZVE32F-ZVFH-NEXT: slli a1, a1, 1
8053 ; RV64ZVE32F-ZVFH-NEXT: add a0, a0, a1
8054 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
8055 ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 7
8056 ; RV64ZVE32F-ZVFH-NEXT: vse16.v v8, (a0)
8057 ; RV64ZVE32F-ZVFH-NEXT: ret
8059 ; RV64ZVE32F-ZVFHMIN-LABEL: mscatter_baseidx_zext_v8i8_v8f16:
8060 ; RV64ZVE32F-ZVFHMIN: # %bb.0:
8061 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma
8062 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v0
8063 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 1
8064 ; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB70_2
8065 ; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.1: # %cond.store
8066 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9
8067 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
8068 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v8
8069 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a2, 255
8070 ; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
8071 ; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
8072 ; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3
8073 ; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2)
8074 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_2: # %else
8075 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 2
8076 ; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB70_4
8077 ; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.3: # %cond.store1
8078 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
8079 ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v9, 1
8080 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v10
8081 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
8082 ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 1
8083 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v10
8084 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a2, 255
8085 ; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
8086 ; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
8087 ; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3
8088 ; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2)
8089 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_4: # %else2
8090 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
8091 ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v9, 4
8092 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 4
8093 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
8094 ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v9, 2
8095 ; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB70_12
8096 ; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.5: # %else4
8097 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8
8098 ; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB70_13
8099 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_6: # %else6
8100 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 16
8101 ; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB70_14
8102 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_7: # %else8
8103 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 32
8104 ; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB70_9
8105 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_8: # %cond.store9
8106 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
8107 ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 1
8108 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9
8109 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
8110 ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 5
8111 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v9
8112 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a2, 255
8113 ; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
8114 ; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
8115 ; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3
8116 ; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2)
8117 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_9: # %else10
8118 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 64
8119 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
8120 ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 2
8121 ; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB70_15
8122 ; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.10: # %else12
8123 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, -128
8124 ; RV64ZVE32F-ZVFHMIN-NEXT: bnez a1, .LBB70_16
8125 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_11: # %else14
8126 ; RV64ZVE32F-ZVFHMIN-NEXT: ret
8127 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_12: # %cond.store3
8128 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9
8129 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
8130 ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v11, v8, 2
8131 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a2, 255
8132 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v11
8133 ; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
8134 ; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
8135 ; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3
8136 ; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2)
8137 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8
8138 ; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB70_6
8139 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_13: # %cond.store5
8140 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
8141 ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v9, 1
8142 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9
8143 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
8144 ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 3
8145 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v9
8146 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a2, 255
8147 ; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
8148 ; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
8149 ; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3
8150 ; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2)
8151 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 16
8152 ; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB70_7
8153 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_14: # %cond.store7
8154 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma
8155 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v10
8156 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
8157 ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 4
8158 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a2, 255
8159 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v9
8160 ; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
8161 ; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
8162 ; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3
8163 ; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2)
8164 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 32
8165 ; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB70_8
8166 ; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB70_9
8167 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_15: # %cond.store11
8168 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9
8169 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
8170 ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 6
8171 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a2, 255
8172 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v10
8173 ; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
8174 ; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
8175 ; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3
8176 ; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2)
8177 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, -128
8178 ; RV64ZVE32F-ZVFHMIN-NEXT: beqz a1, .LBB70_11
8179 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_16: # %cond.store13
8180 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
8181 ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v9, 1
8182 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
8183 ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 7
8184 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
8185 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v9
8186 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, 255
8187 ; RV64ZVE32F-ZVFHMIN-NEXT: slli a1, a1, 1
8188 ; RV64ZVE32F-ZVFHMIN-NEXT: add a0, a0, a1
8189 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
8190 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v8
8191 ; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a1
8192 ; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0)
8193 ; RV64ZVE32F-ZVFHMIN-NEXT: ret
8194 %eidxs = zext <8 x i8> %idxs to <8 x i16>
8195 %ptrs = getelementptr inbounds half, ptr %base, <8 x i16> %eidxs
8196 call void @llvm.masked.scatter.v8f16.v8p0(<8 x half> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> %m)
8200 define void @mscatter_baseidx_v8f16(<8 x half> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m) {
8201 ; RV32-LABEL: mscatter_baseidx_v8f16:
8203 ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
8204 ; RV32-NEXT: vwadd.vv v10, v9, v9
8205 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
8208 ; RV64V-LABEL: mscatter_baseidx_v8f16:
8210 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
8211 ; RV64V-NEXT: vsext.vf4 v12, v9
8212 ; RV64V-NEXT: vadd.vv v12, v12, v12
8213 ; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, ma
8214 ; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t
8217 ; RV64ZVE32F-ZVFH-LABEL: mscatter_baseidx_v8f16:
8218 ; RV64ZVE32F-ZVFH: # %bb.0:
8219 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e8, m1, ta, ma
8220 ; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a1, v0
8221 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 1
8222 ; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB71_2
8223 ; RV64ZVE32F-ZVFH-NEXT: # %bb.1: # %cond.store
8224 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
8225 ; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9
8226 ; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
8227 ; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
8228 ; RV64ZVE32F-ZVFH-NEXT: vse16.v v8, (a2)
8229 ; RV64ZVE32F-ZVFH-NEXT: .LBB71_2: # %else
8230 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 2
8231 ; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB71_4
8232 ; RV64ZVE32F-ZVFH-NEXT: # %bb.3: # %cond.store1
8233 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
8234 ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v9, 1
8235 ; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v10
8236 ; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
8237 ; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
8238 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
8239 ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v8, 1
8240 ; RV64ZVE32F-ZVFH-NEXT: vse16.v v10, (a2)
8241 ; RV64ZVE32F-ZVFH-NEXT: .LBB71_4: # %else2
8242 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 4, e16, m1, ta, ma
8243 ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v9, 4
8244 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 4
8245 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
8246 ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v9, 2
8247 ; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB71_12
8248 ; RV64ZVE32F-ZVFH-NEXT: # %bb.5: # %else4
8249 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8
8250 ; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB71_13
8251 ; RV64ZVE32F-ZVFH-NEXT: .LBB71_6: # %else6
8252 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 16
8253 ; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB71_14
8254 ; RV64ZVE32F-ZVFH-NEXT: .LBB71_7: # %else8
8255 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 32
8256 ; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB71_9
8257 ; RV64ZVE32F-ZVFH-NEXT: .LBB71_8: # %cond.store9
8258 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
8259 ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v10, 1
8260 ; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9
8261 ; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
8262 ; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
8263 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
8264 ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 5
8265 ; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a2)
8266 ; RV64ZVE32F-ZVFH-NEXT: .LBB71_9: # %else10
8267 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 64
8268 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
8269 ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v10, 2
8270 ; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB71_15
8271 ; RV64ZVE32F-ZVFH-NEXT: # %bb.10: # %else12
8272 ; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, -128
8273 ; RV64ZVE32F-ZVFH-NEXT: bnez a1, .LBB71_16
8274 ; RV64ZVE32F-ZVFH-NEXT: .LBB71_11: # %else14
8275 ; RV64ZVE32F-ZVFH-NEXT: ret
8276 ; RV64ZVE32F-ZVFH-NEXT: .LBB71_12: # %cond.store3
8277 ; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9
8278 ; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
8279 ; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
8280 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
8281 ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v11, v8, 2
8282 ; RV64ZVE32F-ZVFH-NEXT: vse16.v v11, (a2)
8283 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8
8284 ; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB71_6
8285 ; RV64ZVE32F-ZVFH-NEXT: .LBB71_13: # %cond.store5
8286 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
8287 ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v9, 1
8288 ; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9
8289 ; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
8290 ; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
8291 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
8292 ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 3
8293 ; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a2)
8294 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 16
8295 ; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB71_7
8296 ; RV64ZVE32F-ZVFH-NEXT: .LBB71_14: # %cond.store7
8297 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
8298 ; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v10
8299 ; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
8300 ; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
8301 ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 4
8302 ; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a2)
8303 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 32
8304 ; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB71_8
8305 ; RV64ZVE32F-ZVFH-NEXT: j .LBB71_9
8306 ; RV64ZVE32F-ZVFH-NEXT: .LBB71_15: # %cond.store11
8307 ; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v9
8308 ; RV64ZVE32F-ZVFH-NEXT: slli a2, a2, 1
8309 ; RV64ZVE32F-ZVFH-NEXT: add a2, a0, a2
8310 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
8311 ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v8, 6
8312 ; RV64ZVE32F-ZVFH-NEXT: vse16.v v10, (a2)
8313 ; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, -128
8314 ; RV64ZVE32F-ZVFH-NEXT: beqz a1, .LBB71_11
8315 ; RV64ZVE32F-ZVFH-NEXT: .LBB71_16: # %cond.store13
8316 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
8317 ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v9, 1
8318 ; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a1, v9
8319 ; RV64ZVE32F-ZVFH-NEXT: slli a1, a1, 1
8320 ; RV64ZVE32F-ZVFH-NEXT: add a0, a0, a1
8321 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma
8322 ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 7
8323 ; RV64ZVE32F-ZVFH-NEXT: vse16.v v8, (a0)
8324 ; RV64ZVE32F-ZVFH-NEXT: ret
8326 ; RV64ZVE32F-ZVFHMIN-LABEL: mscatter_baseidx_v8f16:
8327 ; RV64ZVE32F-ZVFHMIN: # %bb.0:
8328 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e8, m1, ta, ma
8329 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v0
8330 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 1
8331 ; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB71_2
8332 ; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.1: # %cond.store
8333 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
8334 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9
8335 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v8
8336 ; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
8337 ; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
8338 ; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3
8339 ; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2)
8340 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB71_2: # %else
8341 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 2
8342 ; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB71_4
8343 ; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.3: # %cond.store1
8344 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
8345 ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v9, 1
8346 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v10
8347 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
8348 ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 1
8349 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v10
8350 ; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
8351 ; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
8352 ; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3
8353 ; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2)
8354 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB71_4: # %else2
8355 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 4, e16, m1, ta, ma
8356 ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v9, 4
8357 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 4
8358 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
8359 ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v9, 2
8360 ; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB71_12
8361 ; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.5: # %else4
8362 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8
8363 ; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB71_13
8364 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB71_6: # %else6
8365 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 16
8366 ; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB71_14
8367 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB71_7: # %else8
8368 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 32
8369 ; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB71_9
8370 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB71_8: # %cond.store9
8371 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
8372 ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 1
8373 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9
8374 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
8375 ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 5
8376 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v9
8377 ; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
8378 ; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
8379 ; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3
8380 ; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2)
8381 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB71_9: # %else10
8382 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 64
8383 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
8384 ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 2
8385 ; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB71_15
8386 ; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.10: # %else12
8387 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, -128
8388 ; RV64ZVE32F-ZVFHMIN-NEXT: bnez a1, .LBB71_16
8389 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB71_11: # %else14
8390 ; RV64ZVE32F-ZVFHMIN-NEXT: ret
8391 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB71_12: # %cond.store3
8392 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9
8393 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
8394 ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v11, v8, 2
8395 ; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
8396 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v11
8397 ; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
8398 ; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3
8399 ; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2)
8400 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8
8401 ; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB71_6
8402 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB71_13: # %cond.store5
8403 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
8404 ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v9, 1
8405 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9
8406 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
8407 ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 3
8408 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v9
8409 ; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
8410 ; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
8411 ; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3
8412 ; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2)
8413 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 16
8414 ; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB71_7
8415 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB71_14: # %cond.store7
8416 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
8417 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v10
8418 ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v8, 4
8419 ; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
8420 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v9
8421 ; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
8422 ; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3
8423 ; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2)
8424 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 32
8425 ; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB71_8
8426 ; RV64ZVE32F-ZVFHMIN-NEXT: j .LBB71_9
8427 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB71_15: # %cond.store11
8428 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v9
8429 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
8430 ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 6
8431 ; RV64ZVE32F-ZVFHMIN-NEXT: slli a2, a2, 1
8432 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a3, v10
8433 ; RV64ZVE32F-ZVFHMIN-NEXT: add a2, a0, a2
8434 ; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3
8435 ; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2)
8436 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, -128
8437 ; RV64ZVE32F-ZVFHMIN-NEXT: beqz a1, .LBB71_11
8438 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB71_16: # %cond.store13
8439 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
8440 ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v9, 1
8441 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 1, e16, m1, ta, ma
8442 ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 7
8443 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v9
8444 ; RV64ZVE32F-ZVFHMIN-NEXT: slli a1, a1, 1
8445 ; RV64ZVE32F-ZVFHMIN-NEXT: add a0, a0, a1
8446 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a1, v8
8447 ; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a1
8448 ; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a0)
8449 ; RV64ZVE32F-ZVFHMIN-NEXT: ret
8450 %ptrs = getelementptr inbounds half, ptr %base, <8 x i16> %idxs
8451 call void @llvm.masked.scatter.v8f16.v8p0(<8 x half> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> %m)
8455 declare void @llvm.masked.scatter.v1f32.v1p0(<1 x float>, <1 x ptr>, i32, <1 x i1>)
8457 define void @mscatter_v1f32(<1 x float> %val, <1 x ptr> %ptrs, <1 x i1> %m) {
8458 ; RV32V-LABEL: mscatter_v1f32:
8460 ; RV32V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
8461 ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t
8464 ; RV64V-LABEL: mscatter_v1f32:
8466 ; RV64V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
8467 ; RV64V-NEXT: vsoxei64.v v8, (zero), v9, v0.t
8470 ; RV32ZVE32F-LABEL: mscatter_v1f32:
8471 ; RV32ZVE32F: # %bb.0:
8472 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
8473 ; RV32ZVE32F-NEXT: vsoxei32.v v8, (zero), v9, v0.t
8474 ; RV32ZVE32F-NEXT: ret
8476 ; RV64ZVE32F-LABEL: mscatter_v1f32:
8477 ; RV64ZVE32F: # %bb.0:
8478 ; RV64ZVE32F-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
8479 ; RV64ZVE32F-NEXT: vfirst.m a1, v0
8480 ; RV64ZVE32F-NEXT: bnez a1, .LBB72_2
8481 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
8482 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
8483 ; RV64ZVE32F-NEXT: vse32.v v8, (a0)
8484 ; RV64ZVE32F-NEXT: .LBB72_2: # %else
8485 ; RV64ZVE32F-NEXT: ret
8486 call void @llvm.masked.scatter.v1f32.v1p0(<1 x float> %val, <1 x ptr> %ptrs, i32 4, <1 x i1> %m)
8490 declare void @llvm.masked.scatter.v2f32.v2p0(<2 x float>, <2 x ptr>, i32, <2 x i1>)
8492 define void @mscatter_v2f32(<2 x float> %val, <2 x ptr> %ptrs, <2 x i1> %m) {
8493 ; RV32V-LABEL: mscatter_v2f32:
8495 ; RV32V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
8496 ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t
8499 ; RV64V-LABEL: mscatter_v2f32:
8501 ; RV64V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
8502 ; RV64V-NEXT: vsoxei64.v v8, (zero), v9, v0.t
8505 ; RV32ZVE32F-LABEL: mscatter_v2f32:
8506 ; RV32ZVE32F: # %bb.0:
8507 ; RV32ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
8508 ; RV32ZVE32F-NEXT: vsoxei32.v v8, (zero), v9, v0.t
8509 ; RV32ZVE32F-NEXT: ret
8511 ; RV64ZVE32F-LABEL: mscatter_v2f32:
8512 ; RV64ZVE32F: # %bb.0:
8513 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
8514 ; RV64ZVE32F-NEXT: vmv.x.s a2, v0
8515 ; RV64ZVE32F-NEXT: andi a3, a2, 1
8516 ; RV64ZVE32F-NEXT: bnez a3, .LBB73_3
8517 ; RV64ZVE32F-NEXT: # %bb.1: # %else
8518 ; RV64ZVE32F-NEXT: andi a2, a2, 2
8519 ; RV64ZVE32F-NEXT: bnez a2, .LBB73_4
8520 ; RV64ZVE32F-NEXT: .LBB73_2: # %else2
8521 ; RV64ZVE32F-NEXT: ret
8522 ; RV64ZVE32F-NEXT: .LBB73_3: # %cond.store
8523 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
8524 ; RV64ZVE32F-NEXT: vse32.v v8, (a0)
8525 ; RV64ZVE32F-NEXT: andi a2, a2, 2
8526 ; RV64ZVE32F-NEXT: beqz a2, .LBB73_2
8527 ; RV64ZVE32F-NEXT: .LBB73_4: # %cond.store1
8528 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
8529 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
8530 ; RV64ZVE32F-NEXT: vse32.v v8, (a1)
8531 ; RV64ZVE32F-NEXT: ret
8532 call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> %val, <2 x ptr> %ptrs, i32 4, <2 x i1> %m)
8536 declare void @llvm.masked.scatter.v4f32.v4p0(<4 x float>, <4 x ptr>, i32, <4 x i1>)
8538 define void @mscatter_v4f32(<4 x float> %val, <4 x ptr> %ptrs, <4 x i1> %m) {
8539 ; RV32-LABEL: mscatter_v4f32:
8541 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
8542 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
8545 ; RV64V-LABEL: mscatter_v4f32:
8547 ; RV64V-NEXT: vsetivli zero, 4, e32, m1, ta, ma
8548 ; RV64V-NEXT: vsoxei64.v v8, (zero), v10, v0.t
8551 ; RV64ZVE32F-LABEL: mscatter_v4f32:
8552 ; RV64ZVE32F: # %bb.0:
8553 ; RV64ZVE32F-NEXT: ld a4, 8(a0)
8554 ; RV64ZVE32F-NEXT: ld a2, 16(a0)
8555 ; RV64ZVE32F-NEXT: ld a1, 24(a0)
8556 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
8557 ; RV64ZVE32F-NEXT: vmv.x.s a3, v0
8558 ; RV64ZVE32F-NEXT: andi a5, a3, 1
8559 ; RV64ZVE32F-NEXT: bnez a5, .LBB74_5
8560 ; RV64ZVE32F-NEXT: # %bb.1: # %else
8561 ; RV64ZVE32F-NEXT: andi a0, a3, 2
8562 ; RV64ZVE32F-NEXT: bnez a0, .LBB74_6
8563 ; RV64ZVE32F-NEXT: .LBB74_2: # %else2
8564 ; RV64ZVE32F-NEXT: andi a0, a3, 4
8565 ; RV64ZVE32F-NEXT: bnez a0, .LBB74_7
8566 ; RV64ZVE32F-NEXT: .LBB74_3: # %else4
8567 ; RV64ZVE32F-NEXT: andi a3, a3, 8
8568 ; RV64ZVE32F-NEXT: bnez a3, .LBB74_8
8569 ; RV64ZVE32F-NEXT: .LBB74_4: # %else6
8570 ; RV64ZVE32F-NEXT: ret
8571 ; RV64ZVE32F-NEXT: .LBB74_5: # %cond.store
8572 ; RV64ZVE32F-NEXT: ld a0, 0(a0)
8573 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
8574 ; RV64ZVE32F-NEXT: vse32.v v8, (a0)
8575 ; RV64ZVE32F-NEXT: andi a0, a3, 2
8576 ; RV64ZVE32F-NEXT: beqz a0, .LBB74_2
8577 ; RV64ZVE32F-NEXT: .LBB74_6: # %cond.store1
8578 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
8579 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
8580 ; RV64ZVE32F-NEXT: vse32.v v9, (a4)
8581 ; RV64ZVE32F-NEXT: andi a0, a3, 4
8582 ; RV64ZVE32F-NEXT: beqz a0, .LBB74_3
8583 ; RV64ZVE32F-NEXT: .LBB74_7: # %cond.store3
8584 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
8585 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
8586 ; RV64ZVE32F-NEXT: vse32.v v9, (a2)
8587 ; RV64ZVE32F-NEXT: andi a3, a3, 8
8588 ; RV64ZVE32F-NEXT: beqz a3, .LBB74_4
8589 ; RV64ZVE32F-NEXT: .LBB74_8: # %cond.store5
8590 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
8591 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 3
8592 ; RV64ZVE32F-NEXT: vse32.v v8, (a1)
8593 ; RV64ZVE32F-NEXT: ret
8594 call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> %val, <4 x ptr> %ptrs, i32 4, <4 x i1> %m)
8598 define void @mscatter_truemask_v4f32(<4 x float> %val, <4 x ptr> %ptrs) {
8599 ; RV32-LABEL: mscatter_truemask_v4f32:
8601 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
8602 ; RV32-NEXT: vsoxei32.v v8, (zero), v9
8605 ; RV64V-LABEL: mscatter_truemask_v4f32:
8607 ; RV64V-NEXT: vsetivli zero, 4, e32, m1, ta, ma
8608 ; RV64V-NEXT: vsoxei64.v v8, (zero), v10
8611 ; RV64ZVE32F-LABEL: mscatter_truemask_v4f32:
8612 ; RV64ZVE32F: # %bb.0:
8613 ; RV64ZVE32F-NEXT: ld a1, 0(a0)
8614 ; RV64ZVE32F-NEXT: ld a2, 8(a0)
8615 ; RV64ZVE32F-NEXT: ld a3, 16(a0)
8616 ; RV64ZVE32F-NEXT: ld a0, 24(a0)
8617 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
8618 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
8619 ; RV64ZVE32F-NEXT: vse32.v v8, (a1)
8620 ; RV64ZVE32F-NEXT: vse32.v v9, (a2)
8621 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
8622 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 3
8623 ; RV64ZVE32F-NEXT: vse32.v v9, (a3)
8624 ; RV64ZVE32F-NEXT: vse32.v v8, (a0)
8625 ; RV64ZVE32F-NEXT: ret
8626 call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> %val, <4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 1))
8630 define void @mscatter_falsemask_v4f32(<4 x float> %val, <4 x ptr> %ptrs) {
8631 ; CHECK-LABEL: mscatter_falsemask_v4f32:
8634 call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> %val, <4 x ptr> %ptrs, i32 4, <4 x i1> zeroinitializer)
8638 declare void @llvm.masked.scatter.v8f32.v8p0(<8 x float>, <8 x ptr>, i32, <8 x i1>)
8640 define void @mscatter_v8f32(<8 x float> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
8641 ; RV32-LABEL: mscatter_v8f32:
8643 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
8644 ; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t
8647 ; RV64V-LABEL: mscatter_v8f32:
8649 ; RV64V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
8650 ; RV64V-NEXT: vsoxei64.v v8, (zero), v12, v0.t
8653 ; RV64ZVE32F-LABEL: mscatter_v8f32:
8654 ; RV64ZVE32F: # %bb.0:
8655 ; RV64ZVE32F-NEXT: ld a3, 40(a0)
8656 ; RV64ZVE32F-NEXT: ld a2, 48(a0)
8657 ; RV64ZVE32F-NEXT: ld a1, 56(a0)
8658 ; RV64ZVE32F-NEXT: ld t0, 8(a0)
8659 ; RV64ZVE32F-NEXT: ld a7, 16(a0)
8660 ; RV64ZVE32F-NEXT: ld a6, 24(a0)
8661 ; RV64ZVE32F-NEXT: ld a5, 32(a0)
8662 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
8663 ; RV64ZVE32F-NEXT: vmv.x.s a4, v0
8664 ; RV64ZVE32F-NEXT: andi t1, a4, 1
8665 ; RV64ZVE32F-NEXT: bnez t1, .LBB77_9
8666 ; RV64ZVE32F-NEXT: # %bb.1: # %else
8667 ; RV64ZVE32F-NEXT: andi a0, a4, 2
8668 ; RV64ZVE32F-NEXT: bnez a0, .LBB77_10
8669 ; RV64ZVE32F-NEXT: .LBB77_2: # %else2
8670 ; RV64ZVE32F-NEXT: andi a0, a4, 4
8671 ; RV64ZVE32F-NEXT: bnez a0, .LBB77_11
8672 ; RV64ZVE32F-NEXT: .LBB77_3: # %else4
8673 ; RV64ZVE32F-NEXT: andi a0, a4, 8
8674 ; RV64ZVE32F-NEXT: bnez a0, .LBB77_12
8675 ; RV64ZVE32F-NEXT: .LBB77_4: # %else6
8676 ; RV64ZVE32F-NEXT: andi a0, a4, 16
8677 ; RV64ZVE32F-NEXT: bnez a0, .LBB77_13
8678 ; RV64ZVE32F-NEXT: .LBB77_5: # %else8
8679 ; RV64ZVE32F-NEXT: andi a0, a4, 32
8680 ; RV64ZVE32F-NEXT: bnez a0, .LBB77_14
8681 ; RV64ZVE32F-NEXT: .LBB77_6: # %else10
8682 ; RV64ZVE32F-NEXT: andi a0, a4, 64
8683 ; RV64ZVE32F-NEXT: bnez a0, .LBB77_15
8684 ; RV64ZVE32F-NEXT: .LBB77_7: # %else12
8685 ; RV64ZVE32F-NEXT: andi a0, a4, -128
8686 ; RV64ZVE32F-NEXT: bnez a0, .LBB77_16
8687 ; RV64ZVE32F-NEXT: .LBB77_8: # %else14
8688 ; RV64ZVE32F-NEXT: ret
8689 ; RV64ZVE32F-NEXT: .LBB77_9: # %cond.store
8690 ; RV64ZVE32F-NEXT: ld a0, 0(a0)
8691 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
8692 ; RV64ZVE32F-NEXT: vse32.v v8, (a0)
8693 ; RV64ZVE32F-NEXT: andi a0, a4, 2
8694 ; RV64ZVE32F-NEXT: beqz a0, .LBB77_2
8695 ; RV64ZVE32F-NEXT: .LBB77_10: # %cond.store1
8696 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
8697 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
8698 ; RV64ZVE32F-NEXT: vse32.v v10, (t0)
8699 ; RV64ZVE32F-NEXT: andi a0, a4, 4
8700 ; RV64ZVE32F-NEXT: beqz a0, .LBB77_3
8701 ; RV64ZVE32F-NEXT: .LBB77_11: # %cond.store3
8702 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
8703 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 2
8704 ; RV64ZVE32F-NEXT: vse32.v v10, (a7)
8705 ; RV64ZVE32F-NEXT: andi a0, a4, 8
8706 ; RV64ZVE32F-NEXT: beqz a0, .LBB77_4
8707 ; RV64ZVE32F-NEXT: .LBB77_12: # %cond.store5
8708 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
8709 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
8710 ; RV64ZVE32F-NEXT: vse32.v v10, (a6)
8711 ; RV64ZVE32F-NEXT: andi a0, a4, 16
8712 ; RV64ZVE32F-NEXT: beqz a0, .LBB77_5
8713 ; RV64ZVE32F-NEXT: .LBB77_13: # %cond.store7
8714 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
8715 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
8716 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
8717 ; RV64ZVE32F-NEXT: vse32.v v10, (a5)
8718 ; RV64ZVE32F-NEXT: andi a0, a4, 32
8719 ; RV64ZVE32F-NEXT: beqz a0, .LBB77_6
8720 ; RV64ZVE32F-NEXT: .LBB77_14: # %cond.store9
8721 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
8722 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 5
8723 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
8724 ; RV64ZVE32F-NEXT: vse32.v v10, (a3)
8725 ; RV64ZVE32F-NEXT: andi a0, a4, 64
8726 ; RV64ZVE32F-NEXT: beqz a0, .LBB77_7
8727 ; RV64ZVE32F-NEXT: .LBB77_15: # %cond.store11
8728 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
8729 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 6
8730 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
8731 ; RV64ZVE32F-NEXT: vse32.v v10, (a2)
8732 ; RV64ZVE32F-NEXT: andi a0, a4, -128
8733 ; RV64ZVE32F-NEXT: beqz a0, .LBB77_8
8734 ; RV64ZVE32F-NEXT: .LBB77_16: # %cond.store13
8735 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
8736 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
8737 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
8738 ; RV64ZVE32F-NEXT: vse32.v v8, (a1)
8739 ; RV64ZVE32F-NEXT: ret
8740 call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> %val, <8 x ptr> %ptrs, i32 4, <8 x i1> %m)
8744 define void @mscatter_baseidx_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) {
8745 ; RV32-LABEL: mscatter_baseidx_v8i8_v8f32:
8747 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
8748 ; RV32-NEXT: vsext.vf4 v12, v10
8749 ; RV32-NEXT: vsll.vi v10, v12, 2
8750 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
8753 ; RV64V-LABEL: mscatter_baseidx_v8i8_v8f32:
8755 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
8756 ; RV64V-NEXT: vsext.vf8 v12, v10
8757 ; RV64V-NEXT: vsll.vi v12, v12, 2
8758 ; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, ma
8759 ; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t
8762 ; RV64ZVE32F-LABEL: mscatter_baseidx_v8i8_v8f32:
8763 ; RV64ZVE32F: # %bb.0:
8764 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
8765 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
8766 ; RV64ZVE32F-NEXT: andi a2, a1, 1
8767 ; RV64ZVE32F-NEXT: beqz a2, .LBB78_2
8768 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
8769 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
8770 ; RV64ZVE32F-NEXT: slli a2, a2, 2
8771 ; RV64ZVE32F-NEXT: add a2, a0, a2
8772 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
8773 ; RV64ZVE32F-NEXT: vse32.v v8, (a2)
8774 ; RV64ZVE32F-NEXT: .LBB78_2: # %else
8775 ; RV64ZVE32F-NEXT: andi a2, a1, 2
8776 ; RV64ZVE32F-NEXT: beqz a2, .LBB78_4
8777 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
8778 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
8779 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1
8780 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
8781 ; RV64ZVE32F-NEXT: slli a2, a2, 2
8782 ; RV64ZVE32F-NEXT: add a2, a0, a2
8783 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
8784 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 1
8785 ; RV64ZVE32F-NEXT: vse32.v v11, (a2)
8786 ; RV64ZVE32F-NEXT: .LBB78_4: # %else2
8787 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
8788 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4
8789 ; RV64ZVE32F-NEXT: andi a2, a1, 4
8790 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
8791 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
8792 ; RV64ZVE32F-NEXT: bnez a2, .LBB78_12
8793 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
8794 ; RV64ZVE32F-NEXT: andi a2, a1, 8
8795 ; RV64ZVE32F-NEXT: bnez a2, .LBB78_13
8796 ; RV64ZVE32F-NEXT: .LBB78_6: # %else6
8797 ; RV64ZVE32F-NEXT: andi a2, a1, 16
8798 ; RV64ZVE32F-NEXT: bnez a2, .LBB78_14
8799 ; RV64ZVE32F-NEXT: .LBB78_7: # %else8
8800 ; RV64ZVE32F-NEXT: andi a2, a1, 32
8801 ; RV64ZVE32F-NEXT: beqz a2, .LBB78_9
8802 ; RV64ZVE32F-NEXT: .LBB78_8: # %cond.store9
8803 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
8804 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 1
8805 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
8806 ; RV64ZVE32F-NEXT: slli a2, a2, 2
8807 ; RV64ZVE32F-NEXT: add a2, a0, a2
8808 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
8809 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5
8810 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
8811 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
8812 ; RV64ZVE32F-NEXT: .LBB78_9: # %else10
8813 ; RV64ZVE32F-NEXT: andi a2, a1, 64
8814 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
8815 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2
8816 ; RV64ZVE32F-NEXT: bnez a2, .LBB78_15
8817 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
8818 ; RV64ZVE32F-NEXT: andi a1, a1, -128
8819 ; RV64ZVE32F-NEXT: bnez a1, .LBB78_16
8820 ; RV64ZVE32F-NEXT: .LBB78_11: # %else14
8821 ; RV64ZVE32F-NEXT: ret
8822 ; RV64ZVE32F-NEXT: .LBB78_12: # %cond.store3
8823 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
8824 ; RV64ZVE32F-NEXT: slli a2, a2, 2
8825 ; RV64ZVE32F-NEXT: add a2, a0, a2
8826 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
8827 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2
8828 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
8829 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
8830 ; RV64ZVE32F-NEXT: andi a2, a1, 8
8831 ; RV64ZVE32F-NEXT: beqz a2, .LBB78_6
8832 ; RV64ZVE32F-NEXT: .LBB78_13: # %cond.store5
8833 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
8834 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
8835 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
8836 ; RV64ZVE32F-NEXT: slli a2, a2, 2
8837 ; RV64ZVE32F-NEXT: add a2, a0, a2
8838 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
8839 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
8840 ; RV64ZVE32F-NEXT: vse32.v v10, (a2)
8841 ; RV64ZVE32F-NEXT: andi a2, a1, 16
8842 ; RV64ZVE32F-NEXT: beqz a2, .LBB78_7
8843 ; RV64ZVE32F-NEXT: .LBB78_14: # %cond.store7
8844 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
8845 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
8846 ; RV64ZVE32F-NEXT: slli a2, a2, 2
8847 ; RV64ZVE32F-NEXT: add a2, a0, a2
8848 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
8849 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
8850 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
8851 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
8852 ; RV64ZVE32F-NEXT: andi a2, a1, 32
8853 ; RV64ZVE32F-NEXT: bnez a2, .LBB78_8
8854 ; RV64ZVE32F-NEXT: j .LBB78_9
8855 ; RV64ZVE32F-NEXT: .LBB78_15: # %cond.store11
8856 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
8857 ; RV64ZVE32F-NEXT: slli a2, a2, 2
8858 ; RV64ZVE32F-NEXT: add a2, a0, a2
8859 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
8860 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6
8861 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
8862 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
8863 ; RV64ZVE32F-NEXT: andi a1, a1, -128
8864 ; RV64ZVE32F-NEXT: beqz a1, .LBB78_11
8865 ; RV64ZVE32F-NEXT: .LBB78_16: # %cond.store13
8866 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
8867 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
8868 ; RV64ZVE32F-NEXT: vmv.x.s a1, v10
8869 ; RV64ZVE32F-NEXT: slli a1, a1, 2
8870 ; RV64ZVE32F-NEXT: add a0, a0, a1
8871 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
8872 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
8873 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
8874 ; RV64ZVE32F-NEXT: vse32.v v8, (a0)
8875 ; RV64ZVE32F-NEXT: ret
8876 %ptrs = getelementptr inbounds float, ptr %base, <8 x i8> %idxs
8877 call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> %val, <8 x ptr> %ptrs, i32 4, <8 x i1> %m)
8881 define void @mscatter_baseidx_sext_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) {
8882 ; RV32-LABEL: mscatter_baseidx_sext_v8i8_v8f32:
8884 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
8885 ; RV32-NEXT: vsext.vf4 v12, v10
8886 ; RV32-NEXT: vsll.vi v10, v12, 2
8887 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
8890 ; RV64V-LABEL: mscatter_baseidx_sext_v8i8_v8f32:
8892 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
8893 ; RV64V-NEXT: vsext.vf8 v12, v10
8894 ; RV64V-NEXT: vsll.vi v12, v12, 2
8895 ; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, ma
8896 ; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t
8899 ; RV64ZVE32F-LABEL: mscatter_baseidx_sext_v8i8_v8f32:
8900 ; RV64ZVE32F: # %bb.0:
8901 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
8902 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
8903 ; RV64ZVE32F-NEXT: andi a2, a1, 1
8904 ; RV64ZVE32F-NEXT: beqz a2, .LBB79_2
8905 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
8906 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
8907 ; RV64ZVE32F-NEXT: slli a2, a2, 2
8908 ; RV64ZVE32F-NEXT: add a2, a0, a2
8909 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
8910 ; RV64ZVE32F-NEXT: vse32.v v8, (a2)
8911 ; RV64ZVE32F-NEXT: .LBB79_2: # %else
8912 ; RV64ZVE32F-NEXT: andi a2, a1, 2
8913 ; RV64ZVE32F-NEXT: beqz a2, .LBB79_4
8914 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
8915 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
8916 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1
8917 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
8918 ; RV64ZVE32F-NEXT: slli a2, a2, 2
8919 ; RV64ZVE32F-NEXT: add a2, a0, a2
8920 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
8921 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 1
8922 ; RV64ZVE32F-NEXT: vse32.v v11, (a2)
8923 ; RV64ZVE32F-NEXT: .LBB79_4: # %else2
8924 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
8925 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4
8926 ; RV64ZVE32F-NEXT: andi a2, a1, 4
8927 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
8928 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
8929 ; RV64ZVE32F-NEXT: bnez a2, .LBB79_12
8930 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
8931 ; RV64ZVE32F-NEXT: andi a2, a1, 8
8932 ; RV64ZVE32F-NEXT: bnez a2, .LBB79_13
8933 ; RV64ZVE32F-NEXT: .LBB79_6: # %else6
8934 ; RV64ZVE32F-NEXT: andi a2, a1, 16
8935 ; RV64ZVE32F-NEXT: bnez a2, .LBB79_14
8936 ; RV64ZVE32F-NEXT: .LBB79_7: # %else8
8937 ; RV64ZVE32F-NEXT: andi a2, a1, 32
8938 ; RV64ZVE32F-NEXT: beqz a2, .LBB79_9
8939 ; RV64ZVE32F-NEXT: .LBB79_8: # %cond.store9
8940 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
8941 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 1
8942 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
8943 ; RV64ZVE32F-NEXT: slli a2, a2, 2
8944 ; RV64ZVE32F-NEXT: add a2, a0, a2
8945 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
8946 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5
8947 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
8948 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
8949 ; RV64ZVE32F-NEXT: .LBB79_9: # %else10
8950 ; RV64ZVE32F-NEXT: andi a2, a1, 64
8951 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
8952 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2
8953 ; RV64ZVE32F-NEXT: bnez a2, .LBB79_15
8954 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
8955 ; RV64ZVE32F-NEXT: andi a1, a1, -128
8956 ; RV64ZVE32F-NEXT: bnez a1, .LBB79_16
8957 ; RV64ZVE32F-NEXT: .LBB79_11: # %else14
8958 ; RV64ZVE32F-NEXT: ret
8959 ; RV64ZVE32F-NEXT: .LBB79_12: # %cond.store3
8960 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
8961 ; RV64ZVE32F-NEXT: slli a2, a2, 2
8962 ; RV64ZVE32F-NEXT: add a2, a0, a2
8963 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
8964 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2
8965 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
8966 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
8967 ; RV64ZVE32F-NEXT: andi a2, a1, 8
8968 ; RV64ZVE32F-NEXT: beqz a2, .LBB79_6
8969 ; RV64ZVE32F-NEXT: .LBB79_13: # %cond.store5
8970 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
8971 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
8972 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
8973 ; RV64ZVE32F-NEXT: slli a2, a2, 2
8974 ; RV64ZVE32F-NEXT: add a2, a0, a2
8975 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
8976 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
8977 ; RV64ZVE32F-NEXT: vse32.v v10, (a2)
8978 ; RV64ZVE32F-NEXT: andi a2, a1, 16
8979 ; RV64ZVE32F-NEXT: beqz a2, .LBB79_7
8980 ; RV64ZVE32F-NEXT: .LBB79_14: # %cond.store7
8981 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
8982 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
8983 ; RV64ZVE32F-NEXT: slli a2, a2, 2
8984 ; RV64ZVE32F-NEXT: add a2, a0, a2
8985 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
8986 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
8987 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
8988 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
8989 ; RV64ZVE32F-NEXT: andi a2, a1, 32
8990 ; RV64ZVE32F-NEXT: bnez a2, .LBB79_8
8991 ; RV64ZVE32F-NEXT: j .LBB79_9
8992 ; RV64ZVE32F-NEXT: .LBB79_15: # %cond.store11
8993 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
8994 ; RV64ZVE32F-NEXT: slli a2, a2, 2
8995 ; RV64ZVE32F-NEXT: add a2, a0, a2
8996 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
8997 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6
8998 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
8999 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
9000 ; RV64ZVE32F-NEXT: andi a1, a1, -128
9001 ; RV64ZVE32F-NEXT: beqz a1, .LBB79_11
9002 ; RV64ZVE32F-NEXT: .LBB79_16: # %cond.store13
9003 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
9004 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
9005 ; RV64ZVE32F-NEXT: vmv.x.s a1, v10
9006 ; RV64ZVE32F-NEXT: slli a1, a1, 2
9007 ; RV64ZVE32F-NEXT: add a0, a0, a1
9008 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
9009 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
9010 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
9011 ; RV64ZVE32F-NEXT: vse32.v v8, (a0)
9012 ; RV64ZVE32F-NEXT: ret
9013 %eidxs = sext <8 x i8> %idxs to <8 x i32>
9014 %ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %eidxs
9015 call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> %val, <8 x ptr> %ptrs, i32 4, <8 x i1> %m)
9019 define void @mscatter_baseidx_zext_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) {
9020 ; RV32-LABEL: mscatter_baseidx_zext_v8i8_v8f32:
9022 ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
9023 ; RV32-NEXT: vzext.vf2 v11, v10
9024 ; RV32-NEXT: vsll.vi v10, v11, 2
9025 ; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
9026 ; RV32-NEXT: vsoxei16.v v8, (a0), v10, v0.t
9029 ; RV64V-LABEL: mscatter_baseidx_zext_v8i8_v8f32:
9031 ; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
9032 ; RV64V-NEXT: vzext.vf2 v11, v10
9033 ; RV64V-NEXT: vsll.vi v10, v11, 2
9034 ; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, ma
9035 ; RV64V-NEXT: vsoxei16.v v8, (a0), v10, v0.t
9038 ; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i8_v8f32:
9039 ; RV64ZVE32F: # %bb.0:
9040 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
9041 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
9042 ; RV64ZVE32F-NEXT: andi a2, a1, 1
9043 ; RV64ZVE32F-NEXT: beqz a2, .LBB80_2
9044 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
9045 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
9046 ; RV64ZVE32F-NEXT: andi a2, a2, 255
9047 ; RV64ZVE32F-NEXT: slli a2, a2, 2
9048 ; RV64ZVE32F-NEXT: add a2, a0, a2
9049 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
9050 ; RV64ZVE32F-NEXT: vse32.v v8, (a2)
9051 ; RV64ZVE32F-NEXT: .LBB80_2: # %else
9052 ; RV64ZVE32F-NEXT: andi a2, a1, 2
9053 ; RV64ZVE32F-NEXT: beqz a2, .LBB80_4
9054 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
9055 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
9056 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1
9057 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
9058 ; RV64ZVE32F-NEXT: andi a2, a2, 255
9059 ; RV64ZVE32F-NEXT: slli a2, a2, 2
9060 ; RV64ZVE32F-NEXT: add a2, a0, a2
9061 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
9062 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 1
9063 ; RV64ZVE32F-NEXT: vse32.v v11, (a2)
9064 ; RV64ZVE32F-NEXT: .LBB80_4: # %else2
9065 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
9066 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4
9067 ; RV64ZVE32F-NEXT: andi a2, a1, 4
9068 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
9069 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
9070 ; RV64ZVE32F-NEXT: bnez a2, .LBB80_12
9071 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
9072 ; RV64ZVE32F-NEXT: andi a2, a1, 8
9073 ; RV64ZVE32F-NEXT: bnez a2, .LBB80_13
9074 ; RV64ZVE32F-NEXT: .LBB80_6: # %else6
9075 ; RV64ZVE32F-NEXT: andi a2, a1, 16
9076 ; RV64ZVE32F-NEXT: bnez a2, .LBB80_14
9077 ; RV64ZVE32F-NEXT: .LBB80_7: # %else8
9078 ; RV64ZVE32F-NEXT: andi a2, a1, 32
9079 ; RV64ZVE32F-NEXT: beqz a2, .LBB80_9
9080 ; RV64ZVE32F-NEXT: .LBB80_8: # %cond.store9
9081 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
9082 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 1
9083 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
9084 ; RV64ZVE32F-NEXT: andi a2, a2, 255
9085 ; RV64ZVE32F-NEXT: slli a2, a2, 2
9086 ; RV64ZVE32F-NEXT: add a2, a0, a2
9087 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
9088 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5
9089 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
9090 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
9091 ; RV64ZVE32F-NEXT: .LBB80_9: # %else10
9092 ; RV64ZVE32F-NEXT: andi a2, a1, 64
9093 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
9094 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2
9095 ; RV64ZVE32F-NEXT: bnez a2, .LBB80_15
9096 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
9097 ; RV64ZVE32F-NEXT: andi a1, a1, -128
9098 ; RV64ZVE32F-NEXT: bnez a1, .LBB80_16
9099 ; RV64ZVE32F-NEXT: .LBB80_11: # %else14
9100 ; RV64ZVE32F-NEXT: ret
9101 ; RV64ZVE32F-NEXT: .LBB80_12: # %cond.store3
9102 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
9103 ; RV64ZVE32F-NEXT: andi a2, a2, 255
9104 ; RV64ZVE32F-NEXT: slli a2, a2, 2
9105 ; RV64ZVE32F-NEXT: add a2, a0, a2
9106 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
9107 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2
9108 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
9109 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
9110 ; RV64ZVE32F-NEXT: andi a2, a1, 8
9111 ; RV64ZVE32F-NEXT: beqz a2, .LBB80_6
9112 ; RV64ZVE32F-NEXT: .LBB80_13: # %cond.store5
9113 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
9114 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
9115 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
9116 ; RV64ZVE32F-NEXT: andi a2, a2, 255
9117 ; RV64ZVE32F-NEXT: slli a2, a2, 2
9118 ; RV64ZVE32F-NEXT: add a2, a0, a2
9119 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
9120 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
9121 ; RV64ZVE32F-NEXT: vse32.v v10, (a2)
9122 ; RV64ZVE32F-NEXT: andi a2, a1, 16
9123 ; RV64ZVE32F-NEXT: beqz a2, .LBB80_7
9124 ; RV64ZVE32F-NEXT: .LBB80_14: # %cond.store7
9125 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
9126 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
9127 ; RV64ZVE32F-NEXT: andi a2, a2, 255
9128 ; RV64ZVE32F-NEXT: slli a2, a2, 2
9129 ; RV64ZVE32F-NEXT: add a2, a0, a2
9130 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
9131 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
9132 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
9133 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
9134 ; RV64ZVE32F-NEXT: andi a2, a1, 32
9135 ; RV64ZVE32F-NEXT: bnez a2, .LBB80_8
9136 ; RV64ZVE32F-NEXT: j .LBB80_9
9137 ; RV64ZVE32F-NEXT: .LBB80_15: # %cond.store11
9138 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
9139 ; RV64ZVE32F-NEXT: andi a2, a2, 255
9140 ; RV64ZVE32F-NEXT: slli a2, a2, 2
9141 ; RV64ZVE32F-NEXT: add a2, a0, a2
9142 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
9143 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6
9144 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
9145 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
9146 ; RV64ZVE32F-NEXT: andi a1, a1, -128
9147 ; RV64ZVE32F-NEXT: beqz a1, .LBB80_11
9148 ; RV64ZVE32F-NEXT: .LBB80_16: # %cond.store13
9149 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
9150 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
9151 ; RV64ZVE32F-NEXT: vmv.x.s a1, v10
9152 ; RV64ZVE32F-NEXT: andi a1, a1, 255
9153 ; RV64ZVE32F-NEXT: slli a1, a1, 2
9154 ; RV64ZVE32F-NEXT: add a0, a0, a1
9155 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
9156 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
9157 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
9158 ; RV64ZVE32F-NEXT: vse32.v v8, (a0)
9159 ; RV64ZVE32F-NEXT: ret
9160 %eidxs = zext <8 x i8> %idxs to <8 x i32>
9161 %ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %eidxs
9162 call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> %val, <8 x ptr> %ptrs, i32 4, <8 x i1> %m)
9166 define void @mscatter_baseidx_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m) {
9167 ; RV32-LABEL: mscatter_baseidx_v8i16_v8f32:
9169 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
9170 ; RV32-NEXT: vsext.vf2 v12, v10
9171 ; RV32-NEXT: vsll.vi v10, v12, 2
9172 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
9175 ; RV64V-LABEL: mscatter_baseidx_v8i16_v8f32:
9177 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
9178 ; RV64V-NEXT: vsext.vf4 v12, v10
9179 ; RV64V-NEXT: vsll.vi v12, v12, 2
9180 ; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, ma
9181 ; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t
9184 ; RV64ZVE32F-LABEL: mscatter_baseidx_v8i16_v8f32:
9185 ; RV64ZVE32F: # %bb.0:
9186 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
9187 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
9188 ; RV64ZVE32F-NEXT: andi a2, a1, 1
9189 ; RV64ZVE32F-NEXT: beqz a2, .LBB81_2
9190 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
9191 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
9192 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
9193 ; RV64ZVE32F-NEXT: slli a2, a2, 2
9194 ; RV64ZVE32F-NEXT: add a2, a0, a2
9195 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
9196 ; RV64ZVE32F-NEXT: vse32.v v8, (a2)
9197 ; RV64ZVE32F-NEXT: .LBB81_2: # %else
9198 ; RV64ZVE32F-NEXT: andi a2, a1, 2
9199 ; RV64ZVE32F-NEXT: beqz a2, .LBB81_4
9200 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
9201 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
9202 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1
9203 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
9204 ; RV64ZVE32F-NEXT: slli a2, a2, 2
9205 ; RV64ZVE32F-NEXT: add a2, a0, a2
9206 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
9207 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 1
9208 ; RV64ZVE32F-NEXT: vse32.v v11, (a2)
9209 ; RV64ZVE32F-NEXT: .LBB81_4: # %else2
9210 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
9211 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4
9212 ; RV64ZVE32F-NEXT: andi a2, a1, 4
9213 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
9214 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
9215 ; RV64ZVE32F-NEXT: bnez a2, .LBB81_12
9216 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
9217 ; RV64ZVE32F-NEXT: andi a2, a1, 8
9218 ; RV64ZVE32F-NEXT: bnez a2, .LBB81_13
9219 ; RV64ZVE32F-NEXT: .LBB81_6: # %else6
9220 ; RV64ZVE32F-NEXT: andi a2, a1, 16
9221 ; RV64ZVE32F-NEXT: bnez a2, .LBB81_14
9222 ; RV64ZVE32F-NEXT: .LBB81_7: # %else8
9223 ; RV64ZVE32F-NEXT: andi a2, a1, 32
9224 ; RV64ZVE32F-NEXT: beqz a2, .LBB81_9
9225 ; RV64ZVE32F-NEXT: .LBB81_8: # %cond.store9
9226 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
9227 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 1
9228 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
9229 ; RV64ZVE32F-NEXT: slli a2, a2, 2
9230 ; RV64ZVE32F-NEXT: add a2, a0, a2
9231 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
9232 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5
9233 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
9234 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
9235 ; RV64ZVE32F-NEXT: .LBB81_9: # %else10
9236 ; RV64ZVE32F-NEXT: andi a2, a1, 64
9237 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
9238 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2
9239 ; RV64ZVE32F-NEXT: bnez a2, .LBB81_15
9240 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
9241 ; RV64ZVE32F-NEXT: andi a1, a1, -128
9242 ; RV64ZVE32F-NEXT: bnez a1, .LBB81_16
9243 ; RV64ZVE32F-NEXT: .LBB81_11: # %else14
9244 ; RV64ZVE32F-NEXT: ret
9245 ; RV64ZVE32F-NEXT: .LBB81_12: # %cond.store3
9246 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
9247 ; RV64ZVE32F-NEXT: slli a2, a2, 2
9248 ; RV64ZVE32F-NEXT: add a2, a0, a2
9249 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
9250 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2
9251 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
9252 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
9253 ; RV64ZVE32F-NEXT: andi a2, a1, 8
9254 ; RV64ZVE32F-NEXT: beqz a2, .LBB81_6
9255 ; RV64ZVE32F-NEXT: .LBB81_13: # %cond.store5
9256 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
9257 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
9258 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
9259 ; RV64ZVE32F-NEXT: slli a2, a2, 2
9260 ; RV64ZVE32F-NEXT: add a2, a0, a2
9261 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
9262 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
9263 ; RV64ZVE32F-NEXT: vse32.v v10, (a2)
9264 ; RV64ZVE32F-NEXT: andi a2, a1, 16
9265 ; RV64ZVE32F-NEXT: beqz a2, .LBB81_7
9266 ; RV64ZVE32F-NEXT: .LBB81_14: # %cond.store7
9267 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
9268 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
9269 ; RV64ZVE32F-NEXT: slli a2, a2, 2
9270 ; RV64ZVE32F-NEXT: add a2, a0, a2
9271 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
9272 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
9273 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
9274 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
9275 ; RV64ZVE32F-NEXT: andi a2, a1, 32
9276 ; RV64ZVE32F-NEXT: bnez a2, .LBB81_8
9277 ; RV64ZVE32F-NEXT: j .LBB81_9
9278 ; RV64ZVE32F-NEXT: .LBB81_15: # %cond.store11
9279 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
9280 ; RV64ZVE32F-NEXT: slli a2, a2, 2
9281 ; RV64ZVE32F-NEXT: add a2, a0, a2
9282 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
9283 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6
9284 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
9285 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
9286 ; RV64ZVE32F-NEXT: andi a1, a1, -128
9287 ; RV64ZVE32F-NEXT: beqz a1, .LBB81_11
9288 ; RV64ZVE32F-NEXT: .LBB81_16: # %cond.store13
9289 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
9290 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
9291 ; RV64ZVE32F-NEXT: vmv.x.s a1, v10
9292 ; RV64ZVE32F-NEXT: slli a1, a1, 2
9293 ; RV64ZVE32F-NEXT: add a0, a0, a1
9294 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
9295 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
9296 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
9297 ; RV64ZVE32F-NEXT: vse32.v v8, (a0)
9298 ; RV64ZVE32F-NEXT: ret
9299 %ptrs = getelementptr inbounds float, ptr %base, <8 x i16> %idxs
9300 call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> %val, <8 x ptr> %ptrs, i32 4, <8 x i1> %m)
9304 define void @mscatter_baseidx_sext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m) {
9305 ; RV32-LABEL: mscatter_baseidx_sext_v8i16_v8f32:
9307 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
9308 ; RV32-NEXT: vsext.vf2 v12, v10
9309 ; RV32-NEXT: vsll.vi v10, v12, 2
9310 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
9313 ; RV64V-LABEL: mscatter_baseidx_sext_v8i16_v8f32:
9315 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
9316 ; RV64V-NEXT: vsext.vf4 v12, v10
9317 ; RV64V-NEXT: vsll.vi v12, v12, 2
9318 ; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, ma
9319 ; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t
9322 ; RV64ZVE32F-LABEL: mscatter_baseidx_sext_v8i16_v8f32:
9323 ; RV64ZVE32F: # %bb.0:
9324 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
9325 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
9326 ; RV64ZVE32F-NEXT: andi a2, a1, 1
9327 ; RV64ZVE32F-NEXT: beqz a2, .LBB82_2
9328 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
9329 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
9330 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
9331 ; RV64ZVE32F-NEXT: slli a2, a2, 2
9332 ; RV64ZVE32F-NEXT: add a2, a0, a2
9333 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
9334 ; RV64ZVE32F-NEXT: vse32.v v8, (a2)
9335 ; RV64ZVE32F-NEXT: .LBB82_2: # %else
9336 ; RV64ZVE32F-NEXT: andi a2, a1, 2
9337 ; RV64ZVE32F-NEXT: beqz a2, .LBB82_4
9338 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
9339 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
9340 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1
9341 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
9342 ; RV64ZVE32F-NEXT: slli a2, a2, 2
9343 ; RV64ZVE32F-NEXT: add a2, a0, a2
9344 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
9345 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 1
9346 ; RV64ZVE32F-NEXT: vse32.v v11, (a2)
9347 ; RV64ZVE32F-NEXT: .LBB82_4: # %else2
9348 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
9349 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4
9350 ; RV64ZVE32F-NEXT: andi a2, a1, 4
9351 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
9352 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
9353 ; RV64ZVE32F-NEXT: bnez a2, .LBB82_12
9354 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
9355 ; RV64ZVE32F-NEXT: andi a2, a1, 8
9356 ; RV64ZVE32F-NEXT: bnez a2, .LBB82_13
9357 ; RV64ZVE32F-NEXT: .LBB82_6: # %else6
9358 ; RV64ZVE32F-NEXT: andi a2, a1, 16
9359 ; RV64ZVE32F-NEXT: bnez a2, .LBB82_14
9360 ; RV64ZVE32F-NEXT: .LBB82_7: # %else8
9361 ; RV64ZVE32F-NEXT: andi a2, a1, 32
9362 ; RV64ZVE32F-NEXT: beqz a2, .LBB82_9
9363 ; RV64ZVE32F-NEXT: .LBB82_8: # %cond.store9
9364 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
9365 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 1
9366 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
9367 ; RV64ZVE32F-NEXT: slli a2, a2, 2
9368 ; RV64ZVE32F-NEXT: add a2, a0, a2
9369 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
9370 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5
9371 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
9372 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
9373 ; RV64ZVE32F-NEXT: .LBB82_9: # %else10
9374 ; RV64ZVE32F-NEXT: andi a2, a1, 64
9375 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
9376 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2
9377 ; RV64ZVE32F-NEXT: bnez a2, .LBB82_15
9378 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
9379 ; RV64ZVE32F-NEXT: andi a1, a1, -128
9380 ; RV64ZVE32F-NEXT: bnez a1, .LBB82_16
9381 ; RV64ZVE32F-NEXT: .LBB82_11: # %else14
9382 ; RV64ZVE32F-NEXT: ret
9383 ; RV64ZVE32F-NEXT: .LBB82_12: # %cond.store3
9384 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
9385 ; RV64ZVE32F-NEXT: slli a2, a2, 2
9386 ; RV64ZVE32F-NEXT: add a2, a0, a2
9387 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
9388 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2
9389 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
9390 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
9391 ; RV64ZVE32F-NEXT: andi a2, a1, 8
9392 ; RV64ZVE32F-NEXT: beqz a2, .LBB82_6
9393 ; RV64ZVE32F-NEXT: .LBB82_13: # %cond.store5
9394 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
9395 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
9396 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
9397 ; RV64ZVE32F-NEXT: slli a2, a2, 2
9398 ; RV64ZVE32F-NEXT: add a2, a0, a2
9399 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
9400 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
9401 ; RV64ZVE32F-NEXT: vse32.v v10, (a2)
9402 ; RV64ZVE32F-NEXT: andi a2, a1, 16
9403 ; RV64ZVE32F-NEXT: beqz a2, .LBB82_7
9404 ; RV64ZVE32F-NEXT: .LBB82_14: # %cond.store7
9405 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
9406 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
9407 ; RV64ZVE32F-NEXT: slli a2, a2, 2
9408 ; RV64ZVE32F-NEXT: add a2, a0, a2
9409 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
9410 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
9411 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
9412 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
9413 ; RV64ZVE32F-NEXT: andi a2, a1, 32
9414 ; RV64ZVE32F-NEXT: bnez a2, .LBB82_8
9415 ; RV64ZVE32F-NEXT: j .LBB82_9
9416 ; RV64ZVE32F-NEXT: .LBB82_15: # %cond.store11
9417 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
9418 ; RV64ZVE32F-NEXT: slli a2, a2, 2
9419 ; RV64ZVE32F-NEXT: add a2, a0, a2
9420 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
9421 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6
9422 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
9423 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
9424 ; RV64ZVE32F-NEXT: andi a1, a1, -128
9425 ; RV64ZVE32F-NEXT: beqz a1, .LBB82_11
9426 ; RV64ZVE32F-NEXT: .LBB82_16: # %cond.store13
9427 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
9428 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
9429 ; RV64ZVE32F-NEXT: vmv.x.s a1, v10
9430 ; RV64ZVE32F-NEXT: slli a1, a1, 2
9431 ; RV64ZVE32F-NEXT: add a0, a0, a1
9432 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
9433 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
9434 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
9435 ; RV64ZVE32F-NEXT: vse32.v v8, (a0)
9436 ; RV64ZVE32F-NEXT: ret
9437 %eidxs = sext <8 x i16> %idxs to <8 x i32>
9438 %ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %eidxs
9439 call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> %val, <8 x ptr> %ptrs, i32 4, <8 x i1> %m)
9443 define void @mscatter_baseidx_zext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m) {
9444 ; RV32-LABEL: mscatter_baseidx_zext_v8i16_v8f32:
9446 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
9447 ; RV32-NEXT: vzext.vf2 v12, v10
9448 ; RV32-NEXT: vsll.vi v10, v12, 2
9449 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
9452 ; RV64V-LABEL: mscatter_baseidx_zext_v8i16_v8f32:
9454 ; RV64V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
9455 ; RV64V-NEXT: vzext.vf2 v12, v10
9456 ; RV64V-NEXT: vsll.vi v10, v12, 2
9457 ; RV64V-NEXT: vsoxei32.v v8, (a0), v10, v0.t
9460 ; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i16_v8f32:
9461 ; RV64ZVE32F: # %bb.0:
9462 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
9463 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
9464 ; RV64ZVE32F-NEXT: andi a2, a1, 1
9465 ; RV64ZVE32F-NEXT: beqz a2, .LBB83_2
9466 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
9467 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
9468 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
9469 ; RV64ZVE32F-NEXT: slli a2, a2, 48
9470 ; RV64ZVE32F-NEXT: srli a2, a2, 46
9471 ; RV64ZVE32F-NEXT: add a2, a0, a2
9472 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
9473 ; RV64ZVE32F-NEXT: vse32.v v8, (a2)
9474 ; RV64ZVE32F-NEXT: .LBB83_2: # %else
9475 ; RV64ZVE32F-NEXT: andi a2, a1, 2
9476 ; RV64ZVE32F-NEXT: beqz a2, .LBB83_4
9477 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
9478 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
9479 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1
9480 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
9481 ; RV64ZVE32F-NEXT: slli a2, a2, 48
9482 ; RV64ZVE32F-NEXT: srli a2, a2, 46
9483 ; RV64ZVE32F-NEXT: add a2, a0, a2
9484 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
9485 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 1
9486 ; RV64ZVE32F-NEXT: vse32.v v11, (a2)
9487 ; RV64ZVE32F-NEXT: .LBB83_4: # %else2
9488 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
9489 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4
9490 ; RV64ZVE32F-NEXT: andi a2, a1, 4
9491 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
9492 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
9493 ; RV64ZVE32F-NEXT: bnez a2, .LBB83_12
9494 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
9495 ; RV64ZVE32F-NEXT: andi a2, a1, 8
9496 ; RV64ZVE32F-NEXT: bnez a2, .LBB83_13
9497 ; RV64ZVE32F-NEXT: .LBB83_6: # %else6
9498 ; RV64ZVE32F-NEXT: andi a2, a1, 16
9499 ; RV64ZVE32F-NEXT: bnez a2, .LBB83_14
9500 ; RV64ZVE32F-NEXT: .LBB83_7: # %else8
9501 ; RV64ZVE32F-NEXT: andi a2, a1, 32
9502 ; RV64ZVE32F-NEXT: beqz a2, .LBB83_9
9503 ; RV64ZVE32F-NEXT: .LBB83_8: # %cond.store9
9504 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
9505 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 1
9506 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
9507 ; RV64ZVE32F-NEXT: slli a2, a2, 48
9508 ; RV64ZVE32F-NEXT: srli a2, a2, 46
9509 ; RV64ZVE32F-NEXT: add a2, a0, a2
9510 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
9511 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5
9512 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
9513 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
9514 ; RV64ZVE32F-NEXT: .LBB83_9: # %else10
9515 ; RV64ZVE32F-NEXT: andi a2, a1, 64
9516 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
9517 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2
9518 ; RV64ZVE32F-NEXT: bnez a2, .LBB83_15
9519 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
9520 ; RV64ZVE32F-NEXT: andi a1, a1, -128
9521 ; RV64ZVE32F-NEXT: bnez a1, .LBB83_16
9522 ; RV64ZVE32F-NEXT: .LBB83_11: # %else14
9523 ; RV64ZVE32F-NEXT: ret
9524 ; RV64ZVE32F-NEXT: .LBB83_12: # %cond.store3
9525 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
9526 ; RV64ZVE32F-NEXT: slli a2, a2, 48
9527 ; RV64ZVE32F-NEXT: srli a2, a2, 46
9528 ; RV64ZVE32F-NEXT: add a2, a0, a2
9529 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
9530 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2
9531 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
9532 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
9533 ; RV64ZVE32F-NEXT: andi a2, a1, 8
9534 ; RV64ZVE32F-NEXT: beqz a2, .LBB83_6
9535 ; RV64ZVE32F-NEXT: .LBB83_13: # %cond.store5
9536 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
9537 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
9538 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
9539 ; RV64ZVE32F-NEXT: slli a2, a2, 48
9540 ; RV64ZVE32F-NEXT: srli a2, a2, 46
9541 ; RV64ZVE32F-NEXT: add a2, a0, a2
9542 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
9543 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
9544 ; RV64ZVE32F-NEXT: vse32.v v10, (a2)
9545 ; RV64ZVE32F-NEXT: andi a2, a1, 16
9546 ; RV64ZVE32F-NEXT: beqz a2, .LBB83_7
9547 ; RV64ZVE32F-NEXT: .LBB83_14: # %cond.store7
9548 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
9549 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
9550 ; RV64ZVE32F-NEXT: slli a2, a2, 48
9551 ; RV64ZVE32F-NEXT: srli a2, a2, 46
9552 ; RV64ZVE32F-NEXT: add a2, a0, a2
9553 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
9554 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
9555 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
9556 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
9557 ; RV64ZVE32F-NEXT: andi a2, a1, 32
9558 ; RV64ZVE32F-NEXT: bnez a2, .LBB83_8
9559 ; RV64ZVE32F-NEXT: j .LBB83_9
9560 ; RV64ZVE32F-NEXT: .LBB83_15: # %cond.store11
9561 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
9562 ; RV64ZVE32F-NEXT: slli a2, a2, 48
9563 ; RV64ZVE32F-NEXT: srli a2, a2, 46
9564 ; RV64ZVE32F-NEXT: add a2, a0, a2
9565 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
9566 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6
9567 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
9568 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
9569 ; RV64ZVE32F-NEXT: andi a1, a1, -128
9570 ; RV64ZVE32F-NEXT: beqz a1, .LBB83_11
9571 ; RV64ZVE32F-NEXT: .LBB83_16: # %cond.store13
9572 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
9573 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
9574 ; RV64ZVE32F-NEXT: vmv.x.s a1, v10
9575 ; RV64ZVE32F-NEXT: slli a1, a1, 48
9576 ; RV64ZVE32F-NEXT: srli a1, a1, 46
9577 ; RV64ZVE32F-NEXT: add a0, a0, a1
9578 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
9579 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
9580 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
9581 ; RV64ZVE32F-NEXT: vse32.v v8, (a0)
9582 ; RV64ZVE32F-NEXT: ret
9583 %eidxs = zext <8 x i16> %idxs to <8 x i32>
9584 %ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %eidxs
9585 call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> %val, <8 x ptr> %ptrs, i32 4, <8 x i1> %m)
9589 define void @mscatter_baseidx_v8f32(<8 x float> %val, ptr %base, <8 x i32> %idxs, <8 x i1> %m) {
9590 ; RV32-LABEL: mscatter_baseidx_v8f32:
9592 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
9593 ; RV32-NEXT: vsll.vi v10, v10, 2
9594 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
9597 ; RV64V-LABEL: mscatter_baseidx_v8f32:
9599 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
9600 ; RV64V-NEXT: vsext.vf2 v12, v10
9601 ; RV64V-NEXT: vsll.vi v12, v12, 2
9602 ; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, ma
9603 ; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t
9606 ; RV64ZVE32F-LABEL: mscatter_baseidx_v8f32:
9607 ; RV64ZVE32F: # %bb.0:
9608 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
9609 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
9610 ; RV64ZVE32F-NEXT: andi a2, a1, 1
9611 ; RV64ZVE32F-NEXT: beqz a2, .LBB84_2
9612 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
9613 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
9614 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
9615 ; RV64ZVE32F-NEXT: slli a2, a2, 2
9616 ; RV64ZVE32F-NEXT: add a2, a0, a2
9617 ; RV64ZVE32F-NEXT: vse32.v v8, (a2)
9618 ; RV64ZVE32F-NEXT: .LBB84_2: # %else
9619 ; RV64ZVE32F-NEXT: andi a2, a1, 2
9620 ; RV64ZVE32F-NEXT: beqz a2, .LBB84_4
9621 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
9622 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
9623 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v10, 1
9624 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12
9625 ; RV64ZVE32F-NEXT: slli a2, a2, 2
9626 ; RV64ZVE32F-NEXT: add a2, a0, a2
9627 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 1
9628 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
9629 ; RV64ZVE32F-NEXT: .LBB84_4: # %else2
9630 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
9631 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v10, 4
9632 ; RV64ZVE32F-NEXT: andi a2, a1, 4
9633 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
9634 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
9635 ; RV64ZVE32F-NEXT: bnez a2, .LBB84_12
9636 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
9637 ; RV64ZVE32F-NEXT: andi a2, a1, 8
9638 ; RV64ZVE32F-NEXT: bnez a2, .LBB84_13
9639 ; RV64ZVE32F-NEXT: .LBB84_6: # %else6
9640 ; RV64ZVE32F-NEXT: andi a2, a1, 16
9641 ; RV64ZVE32F-NEXT: bnez a2, .LBB84_14
9642 ; RV64ZVE32F-NEXT: .LBB84_7: # %else8
9643 ; RV64ZVE32F-NEXT: andi a2, a1, 32
9644 ; RV64ZVE32F-NEXT: beqz a2, .LBB84_9
9645 ; RV64ZVE32F-NEXT: .LBB84_8: # %cond.store9
9646 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
9647 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v12, 1
9648 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
9649 ; RV64ZVE32F-NEXT: slli a2, a2, 2
9650 ; RV64ZVE32F-NEXT: add a2, a0, a2
9651 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
9652 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 5
9653 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
9654 ; RV64ZVE32F-NEXT: vse32.v v10, (a2)
9655 ; RV64ZVE32F-NEXT: .LBB84_9: # %else10
9656 ; RV64ZVE32F-NEXT: andi a2, a1, 64
9657 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
9658 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v12, 2
9659 ; RV64ZVE32F-NEXT: bnez a2, .LBB84_15
9660 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
9661 ; RV64ZVE32F-NEXT: andi a1, a1, -128
9662 ; RV64ZVE32F-NEXT: bnez a1, .LBB84_16
9663 ; RV64ZVE32F-NEXT: .LBB84_11: # %else14
9664 ; RV64ZVE32F-NEXT: ret
9665 ; RV64ZVE32F-NEXT: .LBB84_12: # %cond.store3
9666 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
9667 ; RV64ZVE32F-NEXT: slli a2, a2, 2
9668 ; RV64ZVE32F-NEXT: add a2, a0, a2
9669 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2
9670 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
9671 ; RV64ZVE32F-NEXT: vse32.v v11, (a2)
9672 ; RV64ZVE32F-NEXT: andi a2, a1, 8
9673 ; RV64ZVE32F-NEXT: beqz a2, .LBB84_6
9674 ; RV64ZVE32F-NEXT: .LBB84_13: # %cond.store5
9675 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
9676 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
9677 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
9678 ; RV64ZVE32F-NEXT: slli a2, a2, 2
9679 ; RV64ZVE32F-NEXT: add a2, a0, a2
9680 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
9681 ; RV64ZVE32F-NEXT: vse32.v v10, (a2)
9682 ; RV64ZVE32F-NEXT: andi a2, a1, 16
9683 ; RV64ZVE32F-NEXT: beqz a2, .LBB84_7
9684 ; RV64ZVE32F-NEXT: .LBB84_14: # %cond.store7
9685 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
9686 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12
9687 ; RV64ZVE32F-NEXT: slli a2, a2, 2
9688 ; RV64ZVE32F-NEXT: add a2, a0, a2
9689 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
9690 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
9691 ; RV64ZVE32F-NEXT: vse32.v v10, (a2)
9692 ; RV64ZVE32F-NEXT: andi a2, a1, 32
9693 ; RV64ZVE32F-NEXT: bnez a2, .LBB84_8
9694 ; RV64ZVE32F-NEXT: j .LBB84_9
9695 ; RV64ZVE32F-NEXT: .LBB84_15: # %cond.store11
9696 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
9697 ; RV64ZVE32F-NEXT: slli a2, a2, 2
9698 ; RV64ZVE32F-NEXT: add a2, a0, a2
9699 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
9700 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6
9701 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
9702 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
9703 ; RV64ZVE32F-NEXT: andi a1, a1, -128
9704 ; RV64ZVE32F-NEXT: beqz a1, .LBB84_11
9705 ; RV64ZVE32F-NEXT: .LBB84_16: # %cond.store13
9706 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
9707 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
9708 ; RV64ZVE32F-NEXT: vmv.x.s a1, v10
9709 ; RV64ZVE32F-NEXT: slli a1, a1, 2
9710 ; RV64ZVE32F-NEXT: add a0, a0, a1
9711 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
9712 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
9713 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
9714 ; RV64ZVE32F-NEXT: vse32.v v8, (a0)
9715 ; RV64ZVE32F-NEXT: ret
9716 %ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %idxs
9717 call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> %val, <8 x ptr> %ptrs, i32 4, <8 x i1> %m)
9721 declare void @llvm.masked.scatter.v1f64.v1p0(<1 x double>, <1 x ptr>, i32, <1 x i1>)
9723 define void @mscatter_v1f64(<1 x double> %val, <1 x ptr> %ptrs, <1 x i1> %m) {
9724 ; RV32V-LABEL: mscatter_v1f64:
9726 ; RV32V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
9727 ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t
9730 ; RV64V-LABEL: mscatter_v1f64:
9732 ; RV64V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
9733 ; RV64V-NEXT: vsoxei64.v v8, (zero), v9, v0.t
9736 ; RV32ZVE32F-LABEL: mscatter_v1f64:
9737 ; RV32ZVE32F: # %bb.0:
9738 ; RV32ZVE32F-NEXT: vsetvli a0, zero, e8, mf4, ta, ma
9739 ; RV32ZVE32F-NEXT: vfirst.m a0, v0
9740 ; RV32ZVE32F-NEXT: bnez a0, .LBB85_2
9741 ; RV32ZVE32F-NEXT: # %bb.1: # %cond.store
9742 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
9743 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
9744 ; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
9745 ; RV32ZVE32F-NEXT: .LBB85_2: # %else
9746 ; RV32ZVE32F-NEXT: ret
9748 ; RV64ZVE32F-LABEL: mscatter_v1f64:
9749 ; RV64ZVE32F: # %bb.0:
9750 ; RV64ZVE32F-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
9751 ; RV64ZVE32F-NEXT: vfirst.m a1, v0
9752 ; RV64ZVE32F-NEXT: bnez a1, .LBB85_2
9753 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
9754 ; RV64ZVE32F-NEXT: fsd fa0, 0(a0)
9755 ; RV64ZVE32F-NEXT: .LBB85_2: # %else
9756 ; RV64ZVE32F-NEXT: ret
9757 call void @llvm.masked.scatter.v1f64.v1p0(<1 x double> %val, <1 x ptr> %ptrs, i32 8, <1 x i1> %m)
9761 declare void @llvm.masked.scatter.v2f64.v2p0(<2 x double>, <2 x ptr>, i32, <2 x i1>)
9763 define void @mscatter_v2f64(<2 x double> %val, <2 x ptr> %ptrs, <2 x i1> %m) {
9764 ; RV32V-LABEL: mscatter_v2f64:
9766 ; RV32V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
9767 ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t
9770 ; RV64V-LABEL: mscatter_v2f64:
9772 ; RV64V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
9773 ; RV64V-NEXT: vsoxei64.v v8, (zero), v9, v0.t
9776 ; RV32ZVE32F-LABEL: mscatter_v2f64:
9777 ; RV32ZVE32F: # %bb.0:
9778 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
9779 ; RV32ZVE32F-NEXT: vmv.x.s a0, v0
9780 ; RV32ZVE32F-NEXT: andi a1, a0, 1
9781 ; RV32ZVE32F-NEXT: bnez a1, .LBB86_3
9782 ; RV32ZVE32F-NEXT: # %bb.1: # %else
9783 ; RV32ZVE32F-NEXT: andi a0, a0, 2
9784 ; RV32ZVE32F-NEXT: bnez a0, .LBB86_4
9785 ; RV32ZVE32F-NEXT: .LBB86_2: # %else2
9786 ; RV32ZVE32F-NEXT: ret
9787 ; RV32ZVE32F-NEXT: .LBB86_3: # %cond.store
9788 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
9789 ; RV32ZVE32F-NEXT: vmv.x.s a1, v8
9790 ; RV32ZVE32F-NEXT: fsd fa0, 0(a1)
9791 ; RV32ZVE32F-NEXT: andi a0, a0, 2
9792 ; RV32ZVE32F-NEXT: beqz a0, .LBB86_2
9793 ; RV32ZVE32F-NEXT: .LBB86_4: # %cond.store1
9794 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
9795 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 1
9796 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
9797 ; RV32ZVE32F-NEXT: fsd fa1, 0(a0)
9798 ; RV32ZVE32F-NEXT: ret
9800 ; RV64ZVE32F-LABEL: mscatter_v2f64:
9801 ; RV64ZVE32F: # %bb.0:
9802 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
9803 ; RV64ZVE32F-NEXT: vmv.x.s a2, v0
9804 ; RV64ZVE32F-NEXT: andi a3, a2, 1
9805 ; RV64ZVE32F-NEXT: bnez a3, .LBB86_3
9806 ; RV64ZVE32F-NEXT: # %bb.1: # %else
9807 ; RV64ZVE32F-NEXT: andi a2, a2, 2
9808 ; RV64ZVE32F-NEXT: bnez a2, .LBB86_4
9809 ; RV64ZVE32F-NEXT: .LBB86_2: # %else2
9810 ; RV64ZVE32F-NEXT: ret
9811 ; RV64ZVE32F-NEXT: .LBB86_3: # %cond.store
9812 ; RV64ZVE32F-NEXT: fsd fa0, 0(a0)
9813 ; RV64ZVE32F-NEXT: andi a2, a2, 2
9814 ; RV64ZVE32F-NEXT: beqz a2, .LBB86_2
9815 ; RV64ZVE32F-NEXT: .LBB86_4: # %cond.store1
9816 ; RV64ZVE32F-NEXT: fsd fa1, 0(a1)
9817 ; RV64ZVE32F-NEXT: ret
9818 call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> %val, <2 x ptr> %ptrs, i32 8, <2 x i1> %m)
9822 declare void @llvm.masked.scatter.v4f64.v4p0(<4 x double>, <4 x ptr>, i32, <4 x i1>)
9824 define void @mscatter_v4f64(<4 x double> %val, <4 x ptr> %ptrs, <4 x i1> %m) {
9825 ; RV32V-LABEL: mscatter_v4f64:
9827 ; RV32V-NEXT: vsetivli zero, 4, e64, m2, ta, ma
9828 ; RV32V-NEXT: vsoxei32.v v8, (zero), v10, v0.t
9831 ; RV64V-LABEL: mscatter_v4f64:
9833 ; RV64V-NEXT: vsetivli zero, 4, e64, m2, ta, ma
9834 ; RV64V-NEXT: vsoxei64.v v8, (zero), v10, v0.t
9837 ; RV32ZVE32F-LABEL: mscatter_v4f64:
9838 ; RV32ZVE32F: # %bb.0:
9839 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
9840 ; RV32ZVE32F-NEXT: vmv.x.s a0, v0
9841 ; RV32ZVE32F-NEXT: andi a1, a0, 1
9842 ; RV32ZVE32F-NEXT: bnez a1, .LBB87_5
9843 ; RV32ZVE32F-NEXT: # %bb.1: # %else
9844 ; RV32ZVE32F-NEXT: andi a1, a0, 2
9845 ; RV32ZVE32F-NEXT: bnez a1, .LBB87_6
9846 ; RV32ZVE32F-NEXT: .LBB87_2: # %else2
9847 ; RV32ZVE32F-NEXT: andi a1, a0, 4
9848 ; RV32ZVE32F-NEXT: bnez a1, .LBB87_7
9849 ; RV32ZVE32F-NEXT: .LBB87_3: # %else4
9850 ; RV32ZVE32F-NEXT: andi a0, a0, 8
9851 ; RV32ZVE32F-NEXT: bnez a0, .LBB87_8
9852 ; RV32ZVE32F-NEXT: .LBB87_4: # %else6
9853 ; RV32ZVE32F-NEXT: ret
9854 ; RV32ZVE32F-NEXT: .LBB87_5: # %cond.store
9855 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
9856 ; RV32ZVE32F-NEXT: vmv.x.s a1, v8
9857 ; RV32ZVE32F-NEXT: fsd fa0, 0(a1)
9858 ; RV32ZVE32F-NEXT: andi a1, a0, 2
9859 ; RV32ZVE32F-NEXT: beqz a1, .LBB87_2
9860 ; RV32ZVE32F-NEXT: .LBB87_6: # %cond.store1
9861 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
9862 ; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 1
9863 ; RV32ZVE32F-NEXT: vmv.x.s a1, v9
9864 ; RV32ZVE32F-NEXT: fsd fa1, 0(a1)
9865 ; RV32ZVE32F-NEXT: andi a1, a0, 4
9866 ; RV32ZVE32F-NEXT: beqz a1, .LBB87_3
9867 ; RV32ZVE32F-NEXT: .LBB87_7: # %cond.store3
9868 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
9869 ; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 2
9870 ; RV32ZVE32F-NEXT: vmv.x.s a1, v9
9871 ; RV32ZVE32F-NEXT: fsd fa2, 0(a1)
9872 ; RV32ZVE32F-NEXT: andi a0, a0, 8
9873 ; RV32ZVE32F-NEXT: beqz a0, .LBB87_4
9874 ; RV32ZVE32F-NEXT: .LBB87_8: # %cond.store5
9875 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
9876 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 3
9877 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
9878 ; RV32ZVE32F-NEXT: fsd fa3, 0(a0)
9879 ; RV32ZVE32F-NEXT: ret
9881 ; RV64ZVE32F-LABEL: mscatter_v4f64:
9882 ; RV64ZVE32F: # %bb.0:
9883 ; RV64ZVE32F-NEXT: ld a4, 8(a0)
9884 ; RV64ZVE32F-NEXT: ld a2, 16(a0)
9885 ; RV64ZVE32F-NEXT: ld a1, 24(a0)
9886 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
9887 ; RV64ZVE32F-NEXT: vmv.x.s a3, v0
9888 ; RV64ZVE32F-NEXT: andi a5, a3, 1
9889 ; RV64ZVE32F-NEXT: bnez a5, .LBB87_5
9890 ; RV64ZVE32F-NEXT: # %bb.1: # %else
9891 ; RV64ZVE32F-NEXT: andi a0, a3, 2
9892 ; RV64ZVE32F-NEXT: bnez a0, .LBB87_6
9893 ; RV64ZVE32F-NEXT: .LBB87_2: # %else2
9894 ; RV64ZVE32F-NEXT: andi a0, a3, 4
9895 ; RV64ZVE32F-NEXT: bnez a0, .LBB87_7
9896 ; RV64ZVE32F-NEXT: .LBB87_3: # %else4
9897 ; RV64ZVE32F-NEXT: andi a3, a3, 8
9898 ; RV64ZVE32F-NEXT: bnez a3, .LBB87_8
9899 ; RV64ZVE32F-NEXT: .LBB87_4: # %else6
9900 ; RV64ZVE32F-NEXT: ret
9901 ; RV64ZVE32F-NEXT: .LBB87_5: # %cond.store
9902 ; RV64ZVE32F-NEXT: ld a0, 0(a0)
9903 ; RV64ZVE32F-NEXT: fsd fa0, 0(a0)
9904 ; RV64ZVE32F-NEXT: andi a0, a3, 2
9905 ; RV64ZVE32F-NEXT: beqz a0, .LBB87_2
9906 ; RV64ZVE32F-NEXT: .LBB87_6: # %cond.store1
9907 ; RV64ZVE32F-NEXT: fsd fa1, 0(a4)
9908 ; RV64ZVE32F-NEXT: andi a0, a3, 4
9909 ; RV64ZVE32F-NEXT: beqz a0, .LBB87_3
9910 ; RV64ZVE32F-NEXT: .LBB87_7: # %cond.store3
9911 ; RV64ZVE32F-NEXT: fsd fa2, 0(a2)
9912 ; RV64ZVE32F-NEXT: andi a3, a3, 8
9913 ; RV64ZVE32F-NEXT: beqz a3, .LBB87_4
9914 ; RV64ZVE32F-NEXT: .LBB87_8: # %cond.store5
9915 ; RV64ZVE32F-NEXT: fsd fa3, 0(a1)
9916 ; RV64ZVE32F-NEXT: ret
9917 call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> %val, <4 x ptr> %ptrs, i32 8, <4 x i1> %m)
9921 define void @mscatter_truemask_v4f64(<4 x double> %val, <4 x ptr> %ptrs) {
9922 ; RV32V-LABEL: mscatter_truemask_v4f64:
9924 ; RV32V-NEXT: vsetivli zero, 4, e64, m2, ta, ma
9925 ; RV32V-NEXT: vsoxei32.v v8, (zero), v10
9928 ; RV64V-LABEL: mscatter_truemask_v4f64:
9930 ; RV64V-NEXT: vsetivli zero, 4, e64, m2, ta, ma
9931 ; RV64V-NEXT: vsoxei64.v v8, (zero), v10
9934 ; RV32ZVE32F-LABEL: mscatter_truemask_v4f64:
9935 ; RV32ZVE32F: # %bb.0:
9936 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
9937 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
9938 ; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 1
9939 ; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
9940 ; RV32ZVE32F-NEXT: vmv.x.s a0, v9
9941 ; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 2
9942 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 3
9943 ; RV32ZVE32F-NEXT: fsd fa1, 0(a0)
9944 ; RV32ZVE32F-NEXT: vmv.x.s a0, v9
9945 ; RV32ZVE32F-NEXT: fsd fa2, 0(a0)
9946 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
9947 ; RV32ZVE32F-NEXT: fsd fa3, 0(a0)
9948 ; RV32ZVE32F-NEXT: ret
9950 ; RV64ZVE32F-LABEL: mscatter_truemask_v4f64:
9951 ; RV64ZVE32F: # %bb.0:
9952 ; RV64ZVE32F-NEXT: ld a1, 0(a0)
9953 ; RV64ZVE32F-NEXT: ld a2, 8(a0)
9954 ; RV64ZVE32F-NEXT: ld a3, 16(a0)
9955 ; RV64ZVE32F-NEXT: ld a0, 24(a0)
9956 ; RV64ZVE32F-NEXT: fsd fa0, 0(a1)
9957 ; RV64ZVE32F-NEXT: fsd fa1, 0(a2)
9958 ; RV64ZVE32F-NEXT: fsd fa2, 0(a3)
9959 ; RV64ZVE32F-NEXT: fsd fa3, 0(a0)
9960 ; RV64ZVE32F-NEXT: ret
9961 call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> %val, <4 x ptr> %ptrs, i32 8, <4 x i1> splat (i1 1))
9965 define void @mscatter_falsemask_v4f64(<4 x double> %val, <4 x ptr> %ptrs) {
9966 ; CHECK-LABEL: mscatter_falsemask_v4f64:
9969 call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> %val, <4 x ptr> %ptrs, i32 8, <4 x i1> zeroinitializer)
9973 declare void @llvm.masked.scatter.v8f64.v8p0(<8 x double>, <8 x ptr>, i32, <8 x i1>)
9975 define void @mscatter_v8f64(<8 x double> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
9976 ; RV32V-LABEL: mscatter_v8f64:
9978 ; RV32V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
9979 ; RV32V-NEXT: vsoxei32.v v8, (zero), v12, v0.t
9982 ; RV64V-LABEL: mscatter_v8f64:
9984 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
9985 ; RV64V-NEXT: vsoxei64.v v8, (zero), v12, v0.t
9988 ; RV32ZVE32F-LABEL: mscatter_v8f64:
9989 ; RV32ZVE32F: # %bb.0:
9990 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
9991 ; RV32ZVE32F-NEXT: vmv.x.s a0, v0
9992 ; RV32ZVE32F-NEXT: andi a1, a0, 1
9993 ; RV32ZVE32F-NEXT: bnez a1, .LBB90_9
9994 ; RV32ZVE32F-NEXT: # %bb.1: # %else
9995 ; RV32ZVE32F-NEXT: andi a1, a0, 2
9996 ; RV32ZVE32F-NEXT: bnez a1, .LBB90_10
9997 ; RV32ZVE32F-NEXT: .LBB90_2: # %else2
9998 ; RV32ZVE32F-NEXT: andi a1, a0, 4
9999 ; RV32ZVE32F-NEXT: bnez a1, .LBB90_11
10000 ; RV32ZVE32F-NEXT: .LBB90_3: # %else4
10001 ; RV32ZVE32F-NEXT: andi a1, a0, 8
10002 ; RV32ZVE32F-NEXT: bnez a1, .LBB90_12
10003 ; RV32ZVE32F-NEXT: .LBB90_4: # %else6
10004 ; RV32ZVE32F-NEXT: andi a1, a0, 16
10005 ; RV32ZVE32F-NEXT: bnez a1, .LBB90_13
10006 ; RV32ZVE32F-NEXT: .LBB90_5: # %else8
10007 ; RV32ZVE32F-NEXT: andi a1, a0, 32
10008 ; RV32ZVE32F-NEXT: bnez a1, .LBB90_14
10009 ; RV32ZVE32F-NEXT: .LBB90_6: # %else10
10010 ; RV32ZVE32F-NEXT: andi a1, a0, 64
10011 ; RV32ZVE32F-NEXT: bnez a1, .LBB90_15
10012 ; RV32ZVE32F-NEXT: .LBB90_7: # %else12
10013 ; RV32ZVE32F-NEXT: andi a0, a0, -128
10014 ; RV32ZVE32F-NEXT: bnez a0, .LBB90_16
10015 ; RV32ZVE32F-NEXT: .LBB90_8: # %else14
10016 ; RV32ZVE32F-NEXT: ret
10017 ; RV32ZVE32F-NEXT: .LBB90_9: # %cond.store
10018 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
10019 ; RV32ZVE32F-NEXT: vmv.x.s a1, v8
10020 ; RV32ZVE32F-NEXT: fsd fa0, 0(a1)
10021 ; RV32ZVE32F-NEXT: andi a1, a0, 2
10022 ; RV32ZVE32F-NEXT: beqz a1, .LBB90_2
10023 ; RV32ZVE32F-NEXT: .LBB90_10: # %cond.store1
10024 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
10025 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
10026 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
10027 ; RV32ZVE32F-NEXT: fsd fa1, 0(a1)
10028 ; RV32ZVE32F-NEXT: andi a1, a0, 4
10029 ; RV32ZVE32F-NEXT: beqz a1, .LBB90_3
10030 ; RV32ZVE32F-NEXT: .LBB90_11: # %cond.store3
10031 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
10032 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
10033 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
10034 ; RV32ZVE32F-NEXT: fsd fa2, 0(a1)
10035 ; RV32ZVE32F-NEXT: andi a1, a0, 8
10036 ; RV32ZVE32F-NEXT: beqz a1, .LBB90_4
10037 ; RV32ZVE32F-NEXT: .LBB90_12: # %cond.store5
10038 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
10039 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
10040 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
10041 ; RV32ZVE32F-NEXT: fsd fa3, 0(a1)
10042 ; RV32ZVE32F-NEXT: andi a1, a0, 16
10043 ; RV32ZVE32F-NEXT: beqz a1, .LBB90_5
10044 ; RV32ZVE32F-NEXT: .LBB90_13: # %cond.store7
10045 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
10046 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
10047 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
10048 ; RV32ZVE32F-NEXT: fsd fa4, 0(a1)
10049 ; RV32ZVE32F-NEXT: andi a1, a0, 32
10050 ; RV32ZVE32F-NEXT: beqz a1, .LBB90_6
10051 ; RV32ZVE32F-NEXT: .LBB90_14: # %cond.store9
10052 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
10053 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
10054 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
10055 ; RV32ZVE32F-NEXT: fsd fa5, 0(a1)
10056 ; RV32ZVE32F-NEXT: andi a1, a0, 64
10057 ; RV32ZVE32F-NEXT: beqz a1, .LBB90_7
10058 ; RV32ZVE32F-NEXT: .LBB90_15: # %cond.store11
10059 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
10060 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
10061 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
10062 ; RV32ZVE32F-NEXT: fsd fa6, 0(a1)
10063 ; RV32ZVE32F-NEXT: andi a0, a0, -128
10064 ; RV32ZVE32F-NEXT: beqz a0, .LBB90_8
10065 ; RV32ZVE32F-NEXT: .LBB90_16: # %cond.store13
10066 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
10067 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
10068 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
10069 ; RV32ZVE32F-NEXT: fsd fa7, 0(a0)
10070 ; RV32ZVE32F-NEXT: ret
10072 ; RV64ZVE32F-LABEL: mscatter_v8f64:
10073 ; RV64ZVE32F: # %bb.0:
10074 ; RV64ZVE32F-NEXT: ld a3, 40(a0)
10075 ; RV64ZVE32F-NEXT: ld a2, 48(a0)
10076 ; RV64ZVE32F-NEXT: ld a1, 56(a0)
10077 ; RV64ZVE32F-NEXT: ld t0, 8(a0)
10078 ; RV64ZVE32F-NEXT: ld a7, 16(a0)
10079 ; RV64ZVE32F-NEXT: ld a6, 24(a0)
10080 ; RV64ZVE32F-NEXT: ld a5, 32(a0)
10081 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10082 ; RV64ZVE32F-NEXT: vmv.x.s a4, v0
10083 ; RV64ZVE32F-NEXT: andi t1, a4, 1
10084 ; RV64ZVE32F-NEXT: bnez t1, .LBB90_9
10085 ; RV64ZVE32F-NEXT: # %bb.1: # %else
10086 ; RV64ZVE32F-NEXT: andi a0, a4, 2
10087 ; RV64ZVE32F-NEXT: bnez a0, .LBB90_10
10088 ; RV64ZVE32F-NEXT: .LBB90_2: # %else2
10089 ; RV64ZVE32F-NEXT: andi a0, a4, 4
10090 ; RV64ZVE32F-NEXT: bnez a0, .LBB90_11
10091 ; RV64ZVE32F-NEXT: .LBB90_3: # %else4
10092 ; RV64ZVE32F-NEXT: andi a0, a4, 8
10093 ; RV64ZVE32F-NEXT: bnez a0, .LBB90_12
10094 ; RV64ZVE32F-NEXT: .LBB90_4: # %else6
10095 ; RV64ZVE32F-NEXT: andi a0, a4, 16
10096 ; RV64ZVE32F-NEXT: bnez a0, .LBB90_13
10097 ; RV64ZVE32F-NEXT: .LBB90_5: # %else8
10098 ; RV64ZVE32F-NEXT: andi a0, a4, 32
10099 ; RV64ZVE32F-NEXT: bnez a0, .LBB90_14
10100 ; RV64ZVE32F-NEXT: .LBB90_6: # %else10
10101 ; RV64ZVE32F-NEXT: andi a0, a4, 64
10102 ; RV64ZVE32F-NEXT: bnez a0, .LBB90_15
10103 ; RV64ZVE32F-NEXT: .LBB90_7: # %else12
10104 ; RV64ZVE32F-NEXT: andi a0, a4, -128
10105 ; RV64ZVE32F-NEXT: bnez a0, .LBB90_16
10106 ; RV64ZVE32F-NEXT: .LBB90_8: # %else14
10107 ; RV64ZVE32F-NEXT: ret
10108 ; RV64ZVE32F-NEXT: .LBB90_9: # %cond.store
10109 ; RV64ZVE32F-NEXT: ld a0, 0(a0)
10110 ; RV64ZVE32F-NEXT: fsd fa0, 0(a0)
10111 ; RV64ZVE32F-NEXT: andi a0, a4, 2
10112 ; RV64ZVE32F-NEXT: beqz a0, .LBB90_2
10113 ; RV64ZVE32F-NEXT: .LBB90_10: # %cond.store1
10114 ; RV64ZVE32F-NEXT: fsd fa1, 0(t0)
10115 ; RV64ZVE32F-NEXT: andi a0, a4, 4
10116 ; RV64ZVE32F-NEXT: beqz a0, .LBB90_3
10117 ; RV64ZVE32F-NEXT: .LBB90_11: # %cond.store3
10118 ; RV64ZVE32F-NEXT: fsd fa2, 0(a7)
10119 ; RV64ZVE32F-NEXT: andi a0, a4, 8
10120 ; RV64ZVE32F-NEXT: beqz a0, .LBB90_4
10121 ; RV64ZVE32F-NEXT: .LBB90_12: # %cond.store5
10122 ; RV64ZVE32F-NEXT: fsd fa3, 0(a6)
10123 ; RV64ZVE32F-NEXT: andi a0, a4, 16
10124 ; RV64ZVE32F-NEXT: beqz a0, .LBB90_5
10125 ; RV64ZVE32F-NEXT: .LBB90_13: # %cond.store7
10126 ; RV64ZVE32F-NEXT: fsd fa4, 0(a5)
10127 ; RV64ZVE32F-NEXT: andi a0, a4, 32
10128 ; RV64ZVE32F-NEXT: beqz a0, .LBB90_6
10129 ; RV64ZVE32F-NEXT: .LBB90_14: # %cond.store9
10130 ; RV64ZVE32F-NEXT: fsd fa5, 0(a3)
10131 ; RV64ZVE32F-NEXT: andi a0, a4, 64
10132 ; RV64ZVE32F-NEXT: beqz a0, .LBB90_7
10133 ; RV64ZVE32F-NEXT: .LBB90_15: # %cond.store11
10134 ; RV64ZVE32F-NEXT: fsd fa6, 0(a2)
10135 ; RV64ZVE32F-NEXT: andi a0, a4, -128
10136 ; RV64ZVE32F-NEXT: beqz a0, .LBB90_8
10137 ; RV64ZVE32F-NEXT: .LBB90_16: # %cond.store13
10138 ; RV64ZVE32F-NEXT: fsd fa7, 0(a1)
10139 ; RV64ZVE32F-NEXT: ret
10140 call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m)
10144 define void @mscatter_baseidx_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) {
10145 ; RV32V-LABEL: mscatter_baseidx_v8i8_v8f64:
10147 ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
10148 ; RV32V-NEXT: vsext.vf4 v14, v12
10149 ; RV32V-NEXT: vsll.vi v12, v14, 3
10150 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
10151 ; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t
10154 ; RV64V-LABEL: mscatter_baseidx_v8i8_v8f64:
10156 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
10157 ; RV64V-NEXT: vsext.vf8 v16, v12
10158 ; RV64V-NEXT: vsll.vi v12, v16, 3
10159 ; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t
10162 ; RV32ZVE32F-LABEL: mscatter_baseidx_v8i8_v8f64:
10163 ; RV32ZVE32F: # %bb.0:
10164 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
10165 ; RV32ZVE32F-NEXT: vsext.vf4 v10, v8
10166 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
10167 ; RV32ZVE32F-NEXT: vmv.x.s a1, v0
10168 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
10169 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3
10170 ; RV32ZVE32F-NEXT: andi a2, a1, 1
10171 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0
10172 ; RV32ZVE32F-NEXT: bnez a2, .LBB91_9
10173 ; RV32ZVE32F-NEXT: # %bb.1: # %else
10174 ; RV32ZVE32F-NEXT: andi a0, a1, 2
10175 ; RV32ZVE32F-NEXT: bnez a0, .LBB91_10
10176 ; RV32ZVE32F-NEXT: .LBB91_2: # %else2
10177 ; RV32ZVE32F-NEXT: andi a0, a1, 4
10178 ; RV32ZVE32F-NEXT: bnez a0, .LBB91_11
10179 ; RV32ZVE32F-NEXT: .LBB91_3: # %else4
10180 ; RV32ZVE32F-NEXT: andi a0, a1, 8
10181 ; RV32ZVE32F-NEXT: bnez a0, .LBB91_12
10182 ; RV32ZVE32F-NEXT: .LBB91_4: # %else6
10183 ; RV32ZVE32F-NEXT: andi a0, a1, 16
10184 ; RV32ZVE32F-NEXT: bnez a0, .LBB91_13
10185 ; RV32ZVE32F-NEXT: .LBB91_5: # %else8
10186 ; RV32ZVE32F-NEXT: andi a0, a1, 32
10187 ; RV32ZVE32F-NEXT: bnez a0, .LBB91_14
10188 ; RV32ZVE32F-NEXT: .LBB91_6: # %else10
10189 ; RV32ZVE32F-NEXT: andi a0, a1, 64
10190 ; RV32ZVE32F-NEXT: bnez a0, .LBB91_15
10191 ; RV32ZVE32F-NEXT: .LBB91_7: # %else12
10192 ; RV32ZVE32F-NEXT: andi a0, a1, -128
10193 ; RV32ZVE32F-NEXT: bnez a0, .LBB91_16
10194 ; RV32ZVE32F-NEXT: .LBB91_8: # %else14
10195 ; RV32ZVE32F-NEXT: ret
10196 ; RV32ZVE32F-NEXT: .LBB91_9: # %cond.store
10197 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
10198 ; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
10199 ; RV32ZVE32F-NEXT: andi a0, a1, 2
10200 ; RV32ZVE32F-NEXT: beqz a0, .LBB91_2
10201 ; RV32ZVE32F-NEXT: .LBB91_10: # %cond.store1
10202 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
10203 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
10204 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
10205 ; RV32ZVE32F-NEXT: fsd fa1, 0(a0)
10206 ; RV32ZVE32F-NEXT: andi a0, a1, 4
10207 ; RV32ZVE32F-NEXT: beqz a0, .LBB91_3
10208 ; RV32ZVE32F-NEXT: .LBB91_11: # %cond.store3
10209 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
10210 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
10211 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
10212 ; RV32ZVE32F-NEXT: fsd fa2, 0(a0)
10213 ; RV32ZVE32F-NEXT: andi a0, a1, 8
10214 ; RV32ZVE32F-NEXT: beqz a0, .LBB91_4
10215 ; RV32ZVE32F-NEXT: .LBB91_12: # %cond.store5
10216 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
10217 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
10218 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
10219 ; RV32ZVE32F-NEXT: fsd fa3, 0(a0)
10220 ; RV32ZVE32F-NEXT: andi a0, a1, 16
10221 ; RV32ZVE32F-NEXT: beqz a0, .LBB91_5
10222 ; RV32ZVE32F-NEXT: .LBB91_13: # %cond.store7
10223 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
10224 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
10225 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
10226 ; RV32ZVE32F-NEXT: fsd fa4, 0(a0)
10227 ; RV32ZVE32F-NEXT: andi a0, a1, 32
10228 ; RV32ZVE32F-NEXT: beqz a0, .LBB91_6
10229 ; RV32ZVE32F-NEXT: .LBB91_14: # %cond.store9
10230 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
10231 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
10232 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
10233 ; RV32ZVE32F-NEXT: fsd fa5, 0(a0)
10234 ; RV32ZVE32F-NEXT: andi a0, a1, 64
10235 ; RV32ZVE32F-NEXT: beqz a0, .LBB91_7
10236 ; RV32ZVE32F-NEXT: .LBB91_15: # %cond.store11
10237 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
10238 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
10239 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
10240 ; RV32ZVE32F-NEXT: fsd fa6, 0(a0)
10241 ; RV32ZVE32F-NEXT: andi a0, a1, -128
10242 ; RV32ZVE32F-NEXT: beqz a0, .LBB91_8
10243 ; RV32ZVE32F-NEXT: .LBB91_16: # %cond.store13
10244 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
10245 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
10246 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
10247 ; RV32ZVE32F-NEXT: fsd fa7, 0(a0)
10248 ; RV32ZVE32F-NEXT: ret
10250 ; RV64ZVE32F-LABEL: mscatter_baseidx_v8i8_v8f64:
10251 ; RV64ZVE32F: # %bb.0:
10252 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10253 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
10254 ; RV64ZVE32F-NEXT: andi a2, a1, 1
10255 ; RV64ZVE32F-NEXT: beqz a2, .LBB91_2
10256 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
10257 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
10258 ; RV64ZVE32F-NEXT: slli a2, a2, 3
10259 ; RV64ZVE32F-NEXT: add a2, a0, a2
10260 ; RV64ZVE32F-NEXT: fsd fa0, 0(a2)
10261 ; RV64ZVE32F-NEXT: .LBB91_2: # %else
10262 ; RV64ZVE32F-NEXT: andi a2, a1, 2
10263 ; RV64ZVE32F-NEXT: beqz a2, .LBB91_4
10264 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
10265 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
10266 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
10267 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
10268 ; RV64ZVE32F-NEXT: slli a2, a2, 3
10269 ; RV64ZVE32F-NEXT: add a2, a0, a2
10270 ; RV64ZVE32F-NEXT: fsd fa1, 0(a2)
10271 ; RV64ZVE32F-NEXT: .LBB91_4: # %else2
10272 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
10273 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
10274 ; RV64ZVE32F-NEXT: andi a2, a1, 4
10275 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
10276 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
10277 ; RV64ZVE32F-NEXT: bnez a2, .LBB91_12
10278 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
10279 ; RV64ZVE32F-NEXT: andi a2, a1, 8
10280 ; RV64ZVE32F-NEXT: bnez a2, .LBB91_13
10281 ; RV64ZVE32F-NEXT: .LBB91_6: # %else6
10282 ; RV64ZVE32F-NEXT: andi a2, a1, 16
10283 ; RV64ZVE32F-NEXT: bnez a2, .LBB91_14
10284 ; RV64ZVE32F-NEXT: .LBB91_7: # %else8
10285 ; RV64ZVE32F-NEXT: andi a2, a1, 32
10286 ; RV64ZVE32F-NEXT: beqz a2, .LBB91_9
10287 ; RV64ZVE32F-NEXT: .LBB91_8: # %cond.store9
10288 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
10289 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
10290 ; RV64ZVE32F-NEXT: slli a2, a2, 3
10291 ; RV64ZVE32F-NEXT: add a2, a0, a2
10292 ; RV64ZVE32F-NEXT: fsd fa5, 0(a2)
10293 ; RV64ZVE32F-NEXT: .LBB91_9: # %else10
10294 ; RV64ZVE32F-NEXT: andi a2, a1, 64
10295 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
10296 ; RV64ZVE32F-NEXT: bnez a2, .LBB91_15
10297 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
10298 ; RV64ZVE32F-NEXT: andi a1, a1, -128
10299 ; RV64ZVE32F-NEXT: bnez a1, .LBB91_16
10300 ; RV64ZVE32F-NEXT: .LBB91_11: # %else14
10301 ; RV64ZVE32F-NEXT: ret
10302 ; RV64ZVE32F-NEXT: .LBB91_12: # %cond.store3
10303 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
10304 ; RV64ZVE32F-NEXT: slli a2, a2, 3
10305 ; RV64ZVE32F-NEXT: add a2, a0, a2
10306 ; RV64ZVE32F-NEXT: fsd fa2, 0(a2)
10307 ; RV64ZVE32F-NEXT: andi a2, a1, 8
10308 ; RV64ZVE32F-NEXT: beqz a2, .LBB91_6
10309 ; RV64ZVE32F-NEXT: .LBB91_13: # %cond.store5
10310 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
10311 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
10312 ; RV64ZVE32F-NEXT: slli a2, a2, 3
10313 ; RV64ZVE32F-NEXT: add a2, a0, a2
10314 ; RV64ZVE32F-NEXT: fsd fa3, 0(a2)
10315 ; RV64ZVE32F-NEXT: andi a2, a1, 16
10316 ; RV64ZVE32F-NEXT: beqz a2, .LBB91_7
10317 ; RV64ZVE32F-NEXT: .LBB91_14: # %cond.store7
10318 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
10319 ; RV64ZVE32F-NEXT: slli a2, a2, 3
10320 ; RV64ZVE32F-NEXT: add a2, a0, a2
10321 ; RV64ZVE32F-NEXT: fsd fa4, 0(a2)
10322 ; RV64ZVE32F-NEXT: andi a2, a1, 32
10323 ; RV64ZVE32F-NEXT: bnez a2, .LBB91_8
10324 ; RV64ZVE32F-NEXT: j .LBB91_9
10325 ; RV64ZVE32F-NEXT: .LBB91_15: # %cond.store11
10326 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
10327 ; RV64ZVE32F-NEXT: slli a2, a2, 3
10328 ; RV64ZVE32F-NEXT: add a2, a0, a2
10329 ; RV64ZVE32F-NEXT: fsd fa6, 0(a2)
10330 ; RV64ZVE32F-NEXT: andi a1, a1, -128
10331 ; RV64ZVE32F-NEXT: beqz a1, .LBB91_11
10332 ; RV64ZVE32F-NEXT: .LBB91_16: # %cond.store13
10333 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
10334 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8
10335 ; RV64ZVE32F-NEXT: slli a1, a1, 3
10336 ; RV64ZVE32F-NEXT: add a0, a0, a1
10337 ; RV64ZVE32F-NEXT: fsd fa7, 0(a0)
10338 ; RV64ZVE32F-NEXT: ret
10339 %ptrs = getelementptr inbounds double, ptr %base, <8 x i8> %idxs
10340 call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m)
10344 define void @mscatter_baseidx_sext_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) {
10345 ; RV32V-LABEL: mscatter_baseidx_sext_v8i8_v8f64:
10347 ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
10348 ; RV32V-NEXT: vsext.vf4 v14, v12
10349 ; RV32V-NEXT: vsll.vi v12, v14, 3
10350 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
10351 ; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t
10354 ; RV64V-LABEL: mscatter_baseidx_sext_v8i8_v8f64:
10356 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
10357 ; RV64V-NEXT: vsext.vf8 v16, v12
10358 ; RV64V-NEXT: vsll.vi v12, v16, 3
10359 ; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t
10362 ; RV32ZVE32F-LABEL: mscatter_baseidx_sext_v8i8_v8f64:
10363 ; RV32ZVE32F: # %bb.0:
10364 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
10365 ; RV32ZVE32F-NEXT: vsext.vf4 v10, v8
10366 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
10367 ; RV32ZVE32F-NEXT: vmv.x.s a1, v0
10368 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
10369 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3
10370 ; RV32ZVE32F-NEXT: andi a2, a1, 1
10371 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0
10372 ; RV32ZVE32F-NEXT: bnez a2, .LBB92_9
10373 ; RV32ZVE32F-NEXT: # %bb.1: # %else
10374 ; RV32ZVE32F-NEXT: andi a0, a1, 2
10375 ; RV32ZVE32F-NEXT: bnez a0, .LBB92_10
10376 ; RV32ZVE32F-NEXT: .LBB92_2: # %else2
10377 ; RV32ZVE32F-NEXT: andi a0, a1, 4
10378 ; RV32ZVE32F-NEXT: bnez a0, .LBB92_11
10379 ; RV32ZVE32F-NEXT: .LBB92_3: # %else4
10380 ; RV32ZVE32F-NEXT: andi a0, a1, 8
10381 ; RV32ZVE32F-NEXT: bnez a0, .LBB92_12
10382 ; RV32ZVE32F-NEXT: .LBB92_4: # %else6
10383 ; RV32ZVE32F-NEXT: andi a0, a1, 16
10384 ; RV32ZVE32F-NEXT: bnez a0, .LBB92_13
10385 ; RV32ZVE32F-NEXT: .LBB92_5: # %else8
10386 ; RV32ZVE32F-NEXT: andi a0, a1, 32
10387 ; RV32ZVE32F-NEXT: bnez a0, .LBB92_14
10388 ; RV32ZVE32F-NEXT: .LBB92_6: # %else10
10389 ; RV32ZVE32F-NEXT: andi a0, a1, 64
10390 ; RV32ZVE32F-NEXT: bnez a0, .LBB92_15
10391 ; RV32ZVE32F-NEXT: .LBB92_7: # %else12
10392 ; RV32ZVE32F-NEXT: andi a0, a1, -128
10393 ; RV32ZVE32F-NEXT: bnez a0, .LBB92_16
10394 ; RV32ZVE32F-NEXT: .LBB92_8: # %else14
10395 ; RV32ZVE32F-NEXT: ret
10396 ; RV32ZVE32F-NEXT: .LBB92_9: # %cond.store
10397 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
10398 ; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
10399 ; RV32ZVE32F-NEXT: andi a0, a1, 2
10400 ; RV32ZVE32F-NEXT: beqz a0, .LBB92_2
10401 ; RV32ZVE32F-NEXT: .LBB92_10: # %cond.store1
10402 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
10403 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
10404 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
10405 ; RV32ZVE32F-NEXT: fsd fa1, 0(a0)
10406 ; RV32ZVE32F-NEXT: andi a0, a1, 4
10407 ; RV32ZVE32F-NEXT: beqz a0, .LBB92_3
10408 ; RV32ZVE32F-NEXT: .LBB92_11: # %cond.store3
10409 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
10410 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
10411 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
10412 ; RV32ZVE32F-NEXT: fsd fa2, 0(a0)
10413 ; RV32ZVE32F-NEXT: andi a0, a1, 8
10414 ; RV32ZVE32F-NEXT: beqz a0, .LBB92_4
10415 ; RV32ZVE32F-NEXT: .LBB92_12: # %cond.store5
10416 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
10417 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
10418 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
10419 ; RV32ZVE32F-NEXT: fsd fa3, 0(a0)
10420 ; RV32ZVE32F-NEXT: andi a0, a1, 16
10421 ; RV32ZVE32F-NEXT: beqz a0, .LBB92_5
10422 ; RV32ZVE32F-NEXT: .LBB92_13: # %cond.store7
10423 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
10424 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
10425 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
10426 ; RV32ZVE32F-NEXT: fsd fa4, 0(a0)
10427 ; RV32ZVE32F-NEXT: andi a0, a1, 32
10428 ; RV32ZVE32F-NEXT: beqz a0, .LBB92_6
10429 ; RV32ZVE32F-NEXT: .LBB92_14: # %cond.store9
10430 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
10431 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
10432 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
10433 ; RV32ZVE32F-NEXT: fsd fa5, 0(a0)
10434 ; RV32ZVE32F-NEXT: andi a0, a1, 64
10435 ; RV32ZVE32F-NEXT: beqz a0, .LBB92_7
10436 ; RV32ZVE32F-NEXT: .LBB92_15: # %cond.store11
10437 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
10438 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
10439 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
10440 ; RV32ZVE32F-NEXT: fsd fa6, 0(a0)
10441 ; RV32ZVE32F-NEXT: andi a0, a1, -128
10442 ; RV32ZVE32F-NEXT: beqz a0, .LBB92_8
10443 ; RV32ZVE32F-NEXT: .LBB92_16: # %cond.store13
10444 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
10445 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
10446 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
10447 ; RV32ZVE32F-NEXT: fsd fa7, 0(a0)
10448 ; RV32ZVE32F-NEXT: ret
10450 ; RV64ZVE32F-LABEL: mscatter_baseidx_sext_v8i8_v8f64:
10451 ; RV64ZVE32F: # %bb.0:
10452 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10453 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
10454 ; RV64ZVE32F-NEXT: andi a2, a1, 1
10455 ; RV64ZVE32F-NEXT: beqz a2, .LBB92_2
10456 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
10457 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
10458 ; RV64ZVE32F-NEXT: slli a2, a2, 3
10459 ; RV64ZVE32F-NEXT: add a2, a0, a2
10460 ; RV64ZVE32F-NEXT: fsd fa0, 0(a2)
10461 ; RV64ZVE32F-NEXT: .LBB92_2: # %else
10462 ; RV64ZVE32F-NEXT: andi a2, a1, 2
10463 ; RV64ZVE32F-NEXT: beqz a2, .LBB92_4
10464 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
10465 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
10466 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
10467 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
10468 ; RV64ZVE32F-NEXT: slli a2, a2, 3
10469 ; RV64ZVE32F-NEXT: add a2, a0, a2
10470 ; RV64ZVE32F-NEXT: fsd fa1, 0(a2)
10471 ; RV64ZVE32F-NEXT: .LBB92_4: # %else2
10472 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
10473 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
10474 ; RV64ZVE32F-NEXT: andi a2, a1, 4
10475 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
10476 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
10477 ; RV64ZVE32F-NEXT: bnez a2, .LBB92_12
10478 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
10479 ; RV64ZVE32F-NEXT: andi a2, a1, 8
10480 ; RV64ZVE32F-NEXT: bnez a2, .LBB92_13
10481 ; RV64ZVE32F-NEXT: .LBB92_6: # %else6
10482 ; RV64ZVE32F-NEXT: andi a2, a1, 16
10483 ; RV64ZVE32F-NEXT: bnez a2, .LBB92_14
10484 ; RV64ZVE32F-NEXT: .LBB92_7: # %else8
10485 ; RV64ZVE32F-NEXT: andi a2, a1, 32
10486 ; RV64ZVE32F-NEXT: beqz a2, .LBB92_9
10487 ; RV64ZVE32F-NEXT: .LBB92_8: # %cond.store9
10488 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
10489 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
10490 ; RV64ZVE32F-NEXT: slli a2, a2, 3
10491 ; RV64ZVE32F-NEXT: add a2, a0, a2
10492 ; RV64ZVE32F-NEXT: fsd fa5, 0(a2)
10493 ; RV64ZVE32F-NEXT: .LBB92_9: # %else10
10494 ; RV64ZVE32F-NEXT: andi a2, a1, 64
10495 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
10496 ; RV64ZVE32F-NEXT: bnez a2, .LBB92_15
10497 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
10498 ; RV64ZVE32F-NEXT: andi a1, a1, -128
10499 ; RV64ZVE32F-NEXT: bnez a1, .LBB92_16
10500 ; RV64ZVE32F-NEXT: .LBB92_11: # %else14
10501 ; RV64ZVE32F-NEXT: ret
10502 ; RV64ZVE32F-NEXT: .LBB92_12: # %cond.store3
10503 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
10504 ; RV64ZVE32F-NEXT: slli a2, a2, 3
10505 ; RV64ZVE32F-NEXT: add a2, a0, a2
10506 ; RV64ZVE32F-NEXT: fsd fa2, 0(a2)
10507 ; RV64ZVE32F-NEXT: andi a2, a1, 8
10508 ; RV64ZVE32F-NEXT: beqz a2, .LBB92_6
10509 ; RV64ZVE32F-NEXT: .LBB92_13: # %cond.store5
10510 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
10511 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
10512 ; RV64ZVE32F-NEXT: slli a2, a2, 3
10513 ; RV64ZVE32F-NEXT: add a2, a0, a2
10514 ; RV64ZVE32F-NEXT: fsd fa3, 0(a2)
10515 ; RV64ZVE32F-NEXT: andi a2, a1, 16
10516 ; RV64ZVE32F-NEXT: beqz a2, .LBB92_7
10517 ; RV64ZVE32F-NEXT: .LBB92_14: # %cond.store7
10518 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
10519 ; RV64ZVE32F-NEXT: slli a2, a2, 3
10520 ; RV64ZVE32F-NEXT: add a2, a0, a2
10521 ; RV64ZVE32F-NEXT: fsd fa4, 0(a2)
10522 ; RV64ZVE32F-NEXT: andi a2, a1, 32
10523 ; RV64ZVE32F-NEXT: bnez a2, .LBB92_8
10524 ; RV64ZVE32F-NEXT: j .LBB92_9
10525 ; RV64ZVE32F-NEXT: .LBB92_15: # %cond.store11
10526 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
10527 ; RV64ZVE32F-NEXT: slli a2, a2, 3
10528 ; RV64ZVE32F-NEXT: add a2, a0, a2
10529 ; RV64ZVE32F-NEXT: fsd fa6, 0(a2)
10530 ; RV64ZVE32F-NEXT: andi a1, a1, -128
10531 ; RV64ZVE32F-NEXT: beqz a1, .LBB92_11
10532 ; RV64ZVE32F-NEXT: .LBB92_16: # %cond.store13
10533 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
10534 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8
10535 ; RV64ZVE32F-NEXT: slli a1, a1, 3
10536 ; RV64ZVE32F-NEXT: add a0, a0, a1
10537 ; RV64ZVE32F-NEXT: fsd fa7, 0(a0)
10538 ; RV64ZVE32F-NEXT: ret
10539 %eidxs = sext <8 x i8> %idxs to <8 x i64>
10540 %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
10541 call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m)
10545 define void @mscatter_baseidx_zext_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) {
10546 ; RV32V-LABEL: mscatter_baseidx_zext_v8i8_v8f64:
10548 ; RV32V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
10549 ; RV32V-NEXT: vzext.vf2 v13, v12
10550 ; RV32V-NEXT: vsll.vi v12, v13, 3
10551 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
10552 ; RV32V-NEXT: vsoxei16.v v8, (a0), v12, v0.t
10555 ; RV64V-LABEL: mscatter_baseidx_zext_v8i8_v8f64:
10557 ; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
10558 ; RV64V-NEXT: vzext.vf2 v13, v12
10559 ; RV64V-NEXT: vsll.vi v12, v13, 3
10560 ; RV64V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
10561 ; RV64V-NEXT: vsoxei16.v v8, (a0), v12, v0.t
10564 ; RV32ZVE32F-LABEL: mscatter_baseidx_zext_v8i8_v8f64:
10565 ; RV32ZVE32F: # %bb.0:
10566 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
10567 ; RV32ZVE32F-NEXT: vzext.vf4 v10, v8
10568 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
10569 ; RV32ZVE32F-NEXT: vmv.x.s a1, v0
10570 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
10571 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3
10572 ; RV32ZVE32F-NEXT: andi a2, a1, 1
10573 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0
10574 ; RV32ZVE32F-NEXT: bnez a2, .LBB93_9
10575 ; RV32ZVE32F-NEXT: # %bb.1: # %else
10576 ; RV32ZVE32F-NEXT: andi a0, a1, 2
10577 ; RV32ZVE32F-NEXT: bnez a0, .LBB93_10
10578 ; RV32ZVE32F-NEXT: .LBB93_2: # %else2
10579 ; RV32ZVE32F-NEXT: andi a0, a1, 4
10580 ; RV32ZVE32F-NEXT: bnez a0, .LBB93_11
10581 ; RV32ZVE32F-NEXT: .LBB93_3: # %else4
10582 ; RV32ZVE32F-NEXT: andi a0, a1, 8
10583 ; RV32ZVE32F-NEXT: bnez a0, .LBB93_12
10584 ; RV32ZVE32F-NEXT: .LBB93_4: # %else6
10585 ; RV32ZVE32F-NEXT: andi a0, a1, 16
10586 ; RV32ZVE32F-NEXT: bnez a0, .LBB93_13
10587 ; RV32ZVE32F-NEXT: .LBB93_5: # %else8
10588 ; RV32ZVE32F-NEXT: andi a0, a1, 32
10589 ; RV32ZVE32F-NEXT: bnez a0, .LBB93_14
10590 ; RV32ZVE32F-NEXT: .LBB93_6: # %else10
10591 ; RV32ZVE32F-NEXT: andi a0, a1, 64
10592 ; RV32ZVE32F-NEXT: bnez a0, .LBB93_15
10593 ; RV32ZVE32F-NEXT: .LBB93_7: # %else12
10594 ; RV32ZVE32F-NEXT: andi a0, a1, -128
10595 ; RV32ZVE32F-NEXT: bnez a0, .LBB93_16
10596 ; RV32ZVE32F-NEXT: .LBB93_8: # %else14
10597 ; RV32ZVE32F-NEXT: ret
10598 ; RV32ZVE32F-NEXT: .LBB93_9: # %cond.store
10599 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
10600 ; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
10601 ; RV32ZVE32F-NEXT: andi a0, a1, 2
10602 ; RV32ZVE32F-NEXT: beqz a0, .LBB93_2
10603 ; RV32ZVE32F-NEXT: .LBB93_10: # %cond.store1
10604 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
10605 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
10606 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
10607 ; RV32ZVE32F-NEXT: fsd fa1, 0(a0)
10608 ; RV32ZVE32F-NEXT: andi a0, a1, 4
10609 ; RV32ZVE32F-NEXT: beqz a0, .LBB93_3
10610 ; RV32ZVE32F-NEXT: .LBB93_11: # %cond.store3
10611 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
10612 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
10613 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
10614 ; RV32ZVE32F-NEXT: fsd fa2, 0(a0)
10615 ; RV32ZVE32F-NEXT: andi a0, a1, 8
10616 ; RV32ZVE32F-NEXT: beqz a0, .LBB93_4
10617 ; RV32ZVE32F-NEXT: .LBB93_12: # %cond.store5
10618 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
10619 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
10620 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
10621 ; RV32ZVE32F-NEXT: fsd fa3, 0(a0)
10622 ; RV32ZVE32F-NEXT: andi a0, a1, 16
10623 ; RV32ZVE32F-NEXT: beqz a0, .LBB93_5
10624 ; RV32ZVE32F-NEXT: .LBB93_13: # %cond.store7
10625 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
10626 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
10627 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
10628 ; RV32ZVE32F-NEXT: fsd fa4, 0(a0)
10629 ; RV32ZVE32F-NEXT: andi a0, a1, 32
10630 ; RV32ZVE32F-NEXT: beqz a0, .LBB93_6
10631 ; RV32ZVE32F-NEXT: .LBB93_14: # %cond.store9
10632 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
10633 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
10634 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
10635 ; RV32ZVE32F-NEXT: fsd fa5, 0(a0)
10636 ; RV32ZVE32F-NEXT: andi a0, a1, 64
10637 ; RV32ZVE32F-NEXT: beqz a0, .LBB93_7
10638 ; RV32ZVE32F-NEXT: .LBB93_15: # %cond.store11
10639 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
10640 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
10641 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
10642 ; RV32ZVE32F-NEXT: fsd fa6, 0(a0)
10643 ; RV32ZVE32F-NEXT: andi a0, a1, -128
10644 ; RV32ZVE32F-NEXT: beqz a0, .LBB93_8
10645 ; RV32ZVE32F-NEXT: .LBB93_16: # %cond.store13
10646 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
10647 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
10648 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
10649 ; RV32ZVE32F-NEXT: fsd fa7, 0(a0)
10650 ; RV32ZVE32F-NEXT: ret
10652 ; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i8_v8f64:
10653 ; RV64ZVE32F: # %bb.0:
10654 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10655 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
10656 ; RV64ZVE32F-NEXT: andi a2, a1, 1
10657 ; RV64ZVE32F-NEXT: beqz a2, .LBB93_2
10658 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
10659 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
10660 ; RV64ZVE32F-NEXT: andi a2, a2, 255
10661 ; RV64ZVE32F-NEXT: slli a2, a2, 3
10662 ; RV64ZVE32F-NEXT: add a2, a0, a2
10663 ; RV64ZVE32F-NEXT: fsd fa0, 0(a2)
10664 ; RV64ZVE32F-NEXT: .LBB93_2: # %else
10665 ; RV64ZVE32F-NEXT: andi a2, a1, 2
10666 ; RV64ZVE32F-NEXT: beqz a2, .LBB93_4
10667 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
10668 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
10669 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
10670 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
10671 ; RV64ZVE32F-NEXT: andi a2, a2, 255
10672 ; RV64ZVE32F-NEXT: slli a2, a2, 3
10673 ; RV64ZVE32F-NEXT: add a2, a0, a2
10674 ; RV64ZVE32F-NEXT: fsd fa1, 0(a2)
10675 ; RV64ZVE32F-NEXT: .LBB93_4: # %else2
10676 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
10677 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
10678 ; RV64ZVE32F-NEXT: andi a2, a1, 4
10679 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
10680 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
10681 ; RV64ZVE32F-NEXT: bnez a2, .LBB93_12
10682 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
10683 ; RV64ZVE32F-NEXT: andi a2, a1, 8
10684 ; RV64ZVE32F-NEXT: bnez a2, .LBB93_13
10685 ; RV64ZVE32F-NEXT: .LBB93_6: # %else6
10686 ; RV64ZVE32F-NEXT: andi a2, a1, 16
10687 ; RV64ZVE32F-NEXT: bnez a2, .LBB93_14
10688 ; RV64ZVE32F-NEXT: .LBB93_7: # %else8
10689 ; RV64ZVE32F-NEXT: andi a2, a1, 32
10690 ; RV64ZVE32F-NEXT: beqz a2, .LBB93_9
10691 ; RV64ZVE32F-NEXT: .LBB93_8: # %cond.store9
10692 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
10693 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
10694 ; RV64ZVE32F-NEXT: andi a2, a2, 255
10695 ; RV64ZVE32F-NEXT: slli a2, a2, 3
10696 ; RV64ZVE32F-NEXT: add a2, a0, a2
10697 ; RV64ZVE32F-NEXT: fsd fa5, 0(a2)
10698 ; RV64ZVE32F-NEXT: .LBB93_9: # %else10
10699 ; RV64ZVE32F-NEXT: andi a2, a1, 64
10700 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
10701 ; RV64ZVE32F-NEXT: bnez a2, .LBB93_15
10702 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
10703 ; RV64ZVE32F-NEXT: andi a1, a1, -128
10704 ; RV64ZVE32F-NEXT: bnez a1, .LBB93_16
10705 ; RV64ZVE32F-NEXT: .LBB93_11: # %else14
10706 ; RV64ZVE32F-NEXT: ret
10707 ; RV64ZVE32F-NEXT: .LBB93_12: # %cond.store3
10708 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
10709 ; RV64ZVE32F-NEXT: andi a2, a2, 255
10710 ; RV64ZVE32F-NEXT: slli a2, a2, 3
10711 ; RV64ZVE32F-NEXT: add a2, a0, a2
10712 ; RV64ZVE32F-NEXT: fsd fa2, 0(a2)
10713 ; RV64ZVE32F-NEXT: andi a2, a1, 8
10714 ; RV64ZVE32F-NEXT: beqz a2, .LBB93_6
10715 ; RV64ZVE32F-NEXT: .LBB93_13: # %cond.store5
10716 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
10717 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
10718 ; RV64ZVE32F-NEXT: andi a2, a2, 255
10719 ; RV64ZVE32F-NEXT: slli a2, a2, 3
10720 ; RV64ZVE32F-NEXT: add a2, a0, a2
10721 ; RV64ZVE32F-NEXT: fsd fa3, 0(a2)
10722 ; RV64ZVE32F-NEXT: andi a2, a1, 16
10723 ; RV64ZVE32F-NEXT: beqz a2, .LBB93_7
10724 ; RV64ZVE32F-NEXT: .LBB93_14: # %cond.store7
10725 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
10726 ; RV64ZVE32F-NEXT: andi a2, a2, 255
10727 ; RV64ZVE32F-NEXT: slli a2, a2, 3
10728 ; RV64ZVE32F-NEXT: add a2, a0, a2
10729 ; RV64ZVE32F-NEXT: fsd fa4, 0(a2)
10730 ; RV64ZVE32F-NEXT: andi a2, a1, 32
10731 ; RV64ZVE32F-NEXT: bnez a2, .LBB93_8
10732 ; RV64ZVE32F-NEXT: j .LBB93_9
10733 ; RV64ZVE32F-NEXT: .LBB93_15: # %cond.store11
10734 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
10735 ; RV64ZVE32F-NEXT: andi a2, a2, 255
10736 ; RV64ZVE32F-NEXT: slli a2, a2, 3
10737 ; RV64ZVE32F-NEXT: add a2, a0, a2
10738 ; RV64ZVE32F-NEXT: fsd fa6, 0(a2)
10739 ; RV64ZVE32F-NEXT: andi a1, a1, -128
10740 ; RV64ZVE32F-NEXT: beqz a1, .LBB93_11
10741 ; RV64ZVE32F-NEXT: .LBB93_16: # %cond.store13
10742 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
10743 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8
10744 ; RV64ZVE32F-NEXT: andi a1, a1, 255
10745 ; RV64ZVE32F-NEXT: slli a1, a1, 3
10746 ; RV64ZVE32F-NEXT: add a0, a0, a1
10747 ; RV64ZVE32F-NEXT: fsd fa7, 0(a0)
10748 ; RV64ZVE32F-NEXT: ret
10749 %eidxs = zext <8 x i8> %idxs to <8 x i64>
10750 %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
10751 call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m)
10755 define void @mscatter_baseidx_v8i16_v8f64(<8 x double> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m) {
10756 ; RV32V-LABEL: mscatter_baseidx_v8i16_v8f64:
10758 ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
10759 ; RV32V-NEXT: vsext.vf2 v14, v12
10760 ; RV32V-NEXT: vsll.vi v12, v14, 3
10761 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
10762 ; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t
10765 ; RV64V-LABEL: mscatter_baseidx_v8i16_v8f64:
10767 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
10768 ; RV64V-NEXT: vsext.vf4 v16, v12
10769 ; RV64V-NEXT: vsll.vi v12, v16, 3
10770 ; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t
10773 ; RV32ZVE32F-LABEL: mscatter_baseidx_v8i16_v8f64:
10774 ; RV32ZVE32F: # %bb.0:
10775 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
10776 ; RV32ZVE32F-NEXT: vsext.vf2 v10, v8
10777 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
10778 ; RV32ZVE32F-NEXT: vmv.x.s a1, v0
10779 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
10780 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3
10781 ; RV32ZVE32F-NEXT: andi a2, a1, 1
10782 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0
10783 ; RV32ZVE32F-NEXT: bnez a2, .LBB94_9
10784 ; RV32ZVE32F-NEXT: # %bb.1: # %else
10785 ; RV32ZVE32F-NEXT: andi a0, a1, 2
10786 ; RV32ZVE32F-NEXT: bnez a0, .LBB94_10
10787 ; RV32ZVE32F-NEXT: .LBB94_2: # %else2
10788 ; RV32ZVE32F-NEXT: andi a0, a1, 4
10789 ; RV32ZVE32F-NEXT: bnez a0, .LBB94_11
10790 ; RV32ZVE32F-NEXT: .LBB94_3: # %else4
10791 ; RV32ZVE32F-NEXT: andi a0, a1, 8
10792 ; RV32ZVE32F-NEXT: bnez a0, .LBB94_12
10793 ; RV32ZVE32F-NEXT: .LBB94_4: # %else6
10794 ; RV32ZVE32F-NEXT: andi a0, a1, 16
10795 ; RV32ZVE32F-NEXT: bnez a0, .LBB94_13
10796 ; RV32ZVE32F-NEXT: .LBB94_5: # %else8
10797 ; RV32ZVE32F-NEXT: andi a0, a1, 32
10798 ; RV32ZVE32F-NEXT: bnez a0, .LBB94_14
10799 ; RV32ZVE32F-NEXT: .LBB94_6: # %else10
10800 ; RV32ZVE32F-NEXT: andi a0, a1, 64
10801 ; RV32ZVE32F-NEXT: bnez a0, .LBB94_15
10802 ; RV32ZVE32F-NEXT: .LBB94_7: # %else12
10803 ; RV32ZVE32F-NEXT: andi a0, a1, -128
10804 ; RV32ZVE32F-NEXT: bnez a0, .LBB94_16
10805 ; RV32ZVE32F-NEXT: .LBB94_8: # %else14
10806 ; RV32ZVE32F-NEXT: ret
10807 ; RV32ZVE32F-NEXT: .LBB94_9: # %cond.store
10808 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
10809 ; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
10810 ; RV32ZVE32F-NEXT: andi a0, a1, 2
10811 ; RV32ZVE32F-NEXT: beqz a0, .LBB94_2
10812 ; RV32ZVE32F-NEXT: .LBB94_10: # %cond.store1
10813 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
10814 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
10815 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
10816 ; RV32ZVE32F-NEXT: fsd fa1, 0(a0)
10817 ; RV32ZVE32F-NEXT: andi a0, a1, 4
10818 ; RV32ZVE32F-NEXT: beqz a0, .LBB94_3
10819 ; RV32ZVE32F-NEXT: .LBB94_11: # %cond.store3
10820 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
10821 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
10822 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
10823 ; RV32ZVE32F-NEXT: fsd fa2, 0(a0)
10824 ; RV32ZVE32F-NEXT: andi a0, a1, 8
10825 ; RV32ZVE32F-NEXT: beqz a0, .LBB94_4
10826 ; RV32ZVE32F-NEXT: .LBB94_12: # %cond.store5
10827 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
10828 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
10829 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
10830 ; RV32ZVE32F-NEXT: fsd fa3, 0(a0)
10831 ; RV32ZVE32F-NEXT: andi a0, a1, 16
10832 ; RV32ZVE32F-NEXT: beqz a0, .LBB94_5
10833 ; RV32ZVE32F-NEXT: .LBB94_13: # %cond.store7
10834 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
10835 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
10836 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
10837 ; RV32ZVE32F-NEXT: fsd fa4, 0(a0)
10838 ; RV32ZVE32F-NEXT: andi a0, a1, 32
10839 ; RV32ZVE32F-NEXT: beqz a0, .LBB94_6
10840 ; RV32ZVE32F-NEXT: .LBB94_14: # %cond.store9
10841 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
10842 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
10843 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
10844 ; RV32ZVE32F-NEXT: fsd fa5, 0(a0)
10845 ; RV32ZVE32F-NEXT: andi a0, a1, 64
10846 ; RV32ZVE32F-NEXT: beqz a0, .LBB94_7
10847 ; RV32ZVE32F-NEXT: .LBB94_15: # %cond.store11
10848 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
10849 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
10850 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
10851 ; RV32ZVE32F-NEXT: fsd fa6, 0(a0)
10852 ; RV32ZVE32F-NEXT: andi a0, a1, -128
10853 ; RV32ZVE32F-NEXT: beqz a0, .LBB94_8
10854 ; RV32ZVE32F-NEXT: .LBB94_16: # %cond.store13
10855 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
10856 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
10857 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
10858 ; RV32ZVE32F-NEXT: fsd fa7, 0(a0)
10859 ; RV32ZVE32F-NEXT: ret
10861 ; RV64ZVE32F-LABEL: mscatter_baseidx_v8i16_v8f64:
10862 ; RV64ZVE32F: # %bb.0:
10863 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10864 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
10865 ; RV64ZVE32F-NEXT: andi a2, a1, 1
10866 ; RV64ZVE32F-NEXT: beqz a2, .LBB94_2
10867 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
10868 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
10869 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
10870 ; RV64ZVE32F-NEXT: slli a2, a2, 3
10871 ; RV64ZVE32F-NEXT: add a2, a0, a2
10872 ; RV64ZVE32F-NEXT: fsd fa0, 0(a2)
10873 ; RV64ZVE32F-NEXT: .LBB94_2: # %else
10874 ; RV64ZVE32F-NEXT: andi a2, a1, 2
10875 ; RV64ZVE32F-NEXT: beqz a2, .LBB94_4
10876 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
10877 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
10878 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
10879 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
10880 ; RV64ZVE32F-NEXT: slli a2, a2, 3
10881 ; RV64ZVE32F-NEXT: add a2, a0, a2
10882 ; RV64ZVE32F-NEXT: fsd fa1, 0(a2)
10883 ; RV64ZVE32F-NEXT: .LBB94_4: # %else2
10884 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
10885 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
10886 ; RV64ZVE32F-NEXT: andi a2, a1, 4
10887 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
10888 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
10889 ; RV64ZVE32F-NEXT: bnez a2, .LBB94_12
10890 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
10891 ; RV64ZVE32F-NEXT: andi a2, a1, 8
10892 ; RV64ZVE32F-NEXT: bnez a2, .LBB94_13
10893 ; RV64ZVE32F-NEXT: .LBB94_6: # %else6
10894 ; RV64ZVE32F-NEXT: andi a2, a1, 16
10895 ; RV64ZVE32F-NEXT: bnez a2, .LBB94_14
10896 ; RV64ZVE32F-NEXT: .LBB94_7: # %else8
10897 ; RV64ZVE32F-NEXT: andi a2, a1, 32
10898 ; RV64ZVE32F-NEXT: beqz a2, .LBB94_9
10899 ; RV64ZVE32F-NEXT: .LBB94_8: # %cond.store9
10900 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
10901 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
10902 ; RV64ZVE32F-NEXT: slli a2, a2, 3
10903 ; RV64ZVE32F-NEXT: add a2, a0, a2
10904 ; RV64ZVE32F-NEXT: fsd fa5, 0(a2)
10905 ; RV64ZVE32F-NEXT: .LBB94_9: # %else10
10906 ; RV64ZVE32F-NEXT: andi a2, a1, 64
10907 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
10908 ; RV64ZVE32F-NEXT: bnez a2, .LBB94_15
10909 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
10910 ; RV64ZVE32F-NEXT: andi a1, a1, -128
10911 ; RV64ZVE32F-NEXT: bnez a1, .LBB94_16
10912 ; RV64ZVE32F-NEXT: .LBB94_11: # %else14
10913 ; RV64ZVE32F-NEXT: ret
10914 ; RV64ZVE32F-NEXT: .LBB94_12: # %cond.store3
10915 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
10916 ; RV64ZVE32F-NEXT: slli a2, a2, 3
10917 ; RV64ZVE32F-NEXT: add a2, a0, a2
10918 ; RV64ZVE32F-NEXT: fsd fa2, 0(a2)
10919 ; RV64ZVE32F-NEXT: andi a2, a1, 8
10920 ; RV64ZVE32F-NEXT: beqz a2, .LBB94_6
10921 ; RV64ZVE32F-NEXT: .LBB94_13: # %cond.store5
10922 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
10923 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
10924 ; RV64ZVE32F-NEXT: slli a2, a2, 3
10925 ; RV64ZVE32F-NEXT: add a2, a0, a2
10926 ; RV64ZVE32F-NEXT: fsd fa3, 0(a2)
10927 ; RV64ZVE32F-NEXT: andi a2, a1, 16
10928 ; RV64ZVE32F-NEXT: beqz a2, .LBB94_7
10929 ; RV64ZVE32F-NEXT: .LBB94_14: # %cond.store7
10930 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
10931 ; RV64ZVE32F-NEXT: slli a2, a2, 3
10932 ; RV64ZVE32F-NEXT: add a2, a0, a2
10933 ; RV64ZVE32F-NEXT: fsd fa4, 0(a2)
10934 ; RV64ZVE32F-NEXT: andi a2, a1, 32
10935 ; RV64ZVE32F-NEXT: bnez a2, .LBB94_8
10936 ; RV64ZVE32F-NEXT: j .LBB94_9
10937 ; RV64ZVE32F-NEXT: .LBB94_15: # %cond.store11
10938 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
10939 ; RV64ZVE32F-NEXT: slli a2, a2, 3
10940 ; RV64ZVE32F-NEXT: add a2, a0, a2
10941 ; RV64ZVE32F-NEXT: fsd fa6, 0(a2)
10942 ; RV64ZVE32F-NEXT: andi a1, a1, -128
10943 ; RV64ZVE32F-NEXT: beqz a1, .LBB94_11
10944 ; RV64ZVE32F-NEXT: .LBB94_16: # %cond.store13
10945 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
10946 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8
10947 ; RV64ZVE32F-NEXT: slli a1, a1, 3
10948 ; RV64ZVE32F-NEXT: add a0, a0, a1
10949 ; RV64ZVE32F-NEXT: fsd fa7, 0(a0)
10950 ; RV64ZVE32F-NEXT: ret
10951 %ptrs = getelementptr inbounds double, ptr %base, <8 x i16> %idxs
10952 call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m)
10956 define void @mscatter_baseidx_sext_v8i16_v8f64(<8 x double> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m) {
10957 ; RV32V-LABEL: mscatter_baseidx_sext_v8i16_v8f64:
10959 ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
10960 ; RV32V-NEXT: vsext.vf2 v14, v12
10961 ; RV32V-NEXT: vsll.vi v12, v14, 3
10962 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
10963 ; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t
10966 ; RV64V-LABEL: mscatter_baseidx_sext_v8i16_v8f64:
10968 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
10969 ; RV64V-NEXT: vsext.vf4 v16, v12
10970 ; RV64V-NEXT: vsll.vi v12, v16, 3
10971 ; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t
10974 ; RV32ZVE32F-LABEL: mscatter_baseidx_sext_v8i16_v8f64:
10975 ; RV32ZVE32F: # %bb.0:
10976 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
10977 ; RV32ZVE32F-NEXT: vsext.vf2 v10, v8
10978 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
10979 ; RV32ZVE32F-NEXT: vmv.x.s a1, v0
10980 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
10981 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3
10982 ; RV32ZVE32F-NEXT: andi a2, a1, 1
10983 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0
10984 ; RV32ZVE32F-NEXT: bnez a2, .LBB95_9
10985 ; RV32ZVE32F-NEXT: # %bb.1: # %else
10986 ; RV32ZVE32F-NEXT: andi a0, a1, 2
10987 ; RV32ZVE32F-NEXT: bnez a0, .LBB95_10
10988 ; RV32ZVE32F-NEXT: .LBB95_2: # %else2
10989 ; RV32ZVE32F-NEXT: andi a0, a1, 4
10990 ; RV32ZVE32F-NEXT: bnez a0, .LBB95_11
10991 ; RV32ZVE32F-NEXT: .LBB95_3: # %else4
10992 ; RV32ZVE32F-NEXT: andi a0, a1, 8
10993 ; RV32ZVE32F-NEXT: bnez a0, .LBB95_12
10994 ; RV32ZVE32F-NEXT: .LBB95_4: # %else6
10995 ; RV32ZVE32F-NEXT: andi a0, a1, 16
10996 ; RV32ZVE32F-NEXT: bnez a0, .LBB95_13
10997 ; RV32ZVE32F-NEXT: .LBB95_5: # %else8
10998 ; RV32ZVE32F-NEXT: andi a0, a1, 32
10999 ; RV32ZVE32F-NEXT: bnez a0, .LBB95_14
11000 ; RV32ZVE32F-NEXT: .LBB95_6: # %else10
11001 ; RV32ZVE32F-NEXT: andi a0, a1, 64
11002 ; RV32ZVE32F-NEXT: bnez a0, .LBB95_15
11003 ; RV32ZVE32F-NEXT: .LBB95_7: # %else12
11004 ; RV32ZVE32F-NEXT: andi a0, a1, -128
11005 ; RV32ZVE32F-NEXT: bnez a0, .LBB95_16
11006 ; RV32ZVE32F-NEXT: .LBB95_8: # %else14
11007 ; RV32ZVE32F-NEXT: ret
11008 ; RV32ZVE32F-NEXT: .LBB95_9: # %cond.store
11009 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
11010 ; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
11011 ; RV32ZVE32F-NEXT: andi a0, a1, 2
11012 ; RV32ZVE32F-NEXT: beqz a0, .LBB95_2
11013 ; RV32ZVE32F-NEXT: .LBB95_10: # %cond.store1
11014 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
11015 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
11016 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
11017 ; RV32ZVE32F-NEXT: fsd fa1, 0(a0)
11018 ; RV32ZVE32F-NEXT: andi a0, a1, 4
11019 ; RV32ZVE32F-NEXT: beqz a0, .LBB95_3
11020 ; RV32ZVE32F-NEXT: .LBB95_11: # %cond.store3
11021 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
11022 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
11023 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
11024 ; RV32ZVE32F-NEXT: fsd fa2, 0(a0)
11025 ; RV32ZVE32F-NEXT: andi a0, a1, 8
11026 ; RV32ZVE32F-NEXT: beqz a0, .LBB95_4
11027 ; RV32ZVE32F-NEXT: .LBB95_12: # %cond.store5
11028 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
11029 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
11030 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
11031 ; RV32ZVE32F-NEXT: fsd fa3, 0(a0)
11032 ; RV32ZVE32F-NEXT: andi a0, a1, 16
11033 ; RV32ZVE32F-NEXT: beqz a0, .LBB95_5
11034 ; RV32ZVE32F-NEXT: .LBB95_13: # %cond.store7
11035 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
11036 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
11037 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
11038 ; RV32ZVE32F-NEXT: fsd fa4, 0(a0)
11039 ; RV32ZVE32F-NEXT: andi a0, a1, 32
11040 ; RV32ZVE32F-NEXT: beqz a0, .LBB95_6
11041 ; RV32ZVE32F-NEXT: .LBB95_14: # %cond.store9
11042 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
11043 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
11044 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
11045 ; RV32ZVE32F-NEXT: fsd fa5, 0(a0)
11046 ; RV32ZVE32F-NEXT: andi a0, a1, 64
11047 ; RV32ZVE32F-NEXT: beqz a0, .LBB95_7
11048 ; RV32ZVE32F-NEXT: .LBB95_15: # %cond.store11
11049 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
11050 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
11051 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
11052 ; RV32ZVE32F-NEXT: fsd fa6, 0(a0)
11053 ; RV32ZVE32F-NEXT: andi a0, a1, -128
11054 ; RV32ZVE32F-NEXT: beqz a0, .LBB95_8
11055 ; RV32ZVE32F-NEXT: .LBB95_16: # %cond.store13
11056 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
11057 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
11058 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
11059 ; RV32ZVE32F-NEXT: fsd fa7, 0(a0)
11060 ; RV32ZVE32F-NEXT: ret
11062 ; RV64ZVE32F-LABEL: mscatter_baseidx_sext_v8i16_v8f64:
11063 ; RV64ZVE32F: # %bb.0:
11064 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
11065 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
11066 ; RV64ZVE32F-NEXT: andi a2, a1, 1
11067 ; RV64ZVE32F-NEXT: beqz a2, .LBB95_2
11068 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
11069 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
11070 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
11071 ; RV64ZVE32F-NEXT: slli a2, a2, 3
11072 ; RV64ZVE32F-NEXT: add a2, a0, a2
11073 ; RV64ZVE32F-NEXT: fsd fa0, 0(a2)
11074 ; RV64ZVE32F-NEXT: .LBB95_2: # %else
11075 ; RV64ZVE32F-NEXT: andi a2, a1, 2
11076 ; RV64ZVE32F-NEXT: beqz a2, .LBB95_4
11077 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
11078 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
11079 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
11080 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
11081 ; RV64ZVE32F-NEXT: slli a2, a2, 3
11082 ; RV64ZVE32F-NEXT: add a2, a0, a2
11083 ; RV64ZVE32F-NEXT: fsd fa1, 0(a2)
11084 ; RV64ZVE32F-NEXT: .LBB95_4: # %else2
11085 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
11086 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
11087 ; RV64ZVE32F-NEXT: andi a2, a1, 4
11088 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
11089 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
11090 ; RV64ZVE32F-NEXT: bnez a2, .LBB95_12
11091 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
11092 ; RV64ZVE32F-NEXT: andi a2, a1, 8
11093 ; RV64ZVE32F-NEXT: bnez a2, .LBB95_13
11094 ; RV64ZVE32F-NEXT: .LBB95_6: # %else6
11095 ; RV64ZVE32F-NEXT: andi a2, a1, 16
11096 ; RV64ZVE32F-NEXT: bnez a2, .LBB95_14
11097 ; RV64ZVE32F-NEXT: .LBB95_7: # %else8
11098 ; RV64ZVE32F-NEXT: andi a2, a1, 32
11099 ; RV64ZVE32F-NEXT: beqz a2, .LBB95_9
11100 ; RV64ZVE32F-NEXT: .LBB95_8: # %cond.store9
11101 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
11102 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
11103 ; RV64ZVE32F-NEXT: slli a2, a2, 3
11104 ; RV64ZVE32F-NEXT: add a2, a0, a2
11105 ; RV64ZVE32F-NEXT: fsd fa5, 0(a2)
11106 ; RV64ZVE32F-NEXT: .LBB95_9: # %else10
11107 ; RV64ZVE32F-NEXT: andi a2, a1, 64
11108 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
11109 ; RV64ZVE32F-NEXT: bnez a2, .LBB95_15
11110 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
11111 ; RV64ZVE32F-NEXT: andi a1, a1, -128
11112 ; RV64ZVE32F-NEXT: bnez a1, .LBB95_16
11113 ; RV64ZVE32F-NEXT: .LBB95_11: # %else14
11114 ; RV64ZVE32F-NEXT: ret
11115 ; RV64ZVE32F-NEXT: .LBB95_12: # %cond.store3
11116 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
11117 ; RV64ZVE32F-NEXT: slli a2, a2, 3
11118 ; RV64ZVE32F-NEXT: add a2, a0, a2
11119 ; RV64ZVE32F-NEXT: fsd fa2, 0(a2)
11120 ; RV64ZVE32F-NEXT: andi a2, a1, 8
11121 ; RV64ZVE32F-NEXT: beqz a2, .LBB95_6
11122 ; RV64ZVE32F-NEXT: .LBB95_13: # %cond.store5
11123 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
11124 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
11125 ; RV64ZVE32F-NEXT: slli a2, a2, 3
11126 ; RV64ZVE32F-NEXT: add a2, a0, a2
11127 ; RV64ZVE32F-NEXT: fsd fa3, 0(a2)
11128 ; RV64ZVE32F-NEXT: andi a2, a1, 16
11129 ; RV64ZVE32F-NEXT: beqz a2, .LBB95_7
11130 ; RV64ZVE32F-NEXT: .LBB95_14: # %cond.store7
11131 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
11132 ; RV64ZVE32F-NEXT: slli a2, a2, 3
11133 ; RV64ZVE32F-NEXT: add a2, a0, a2
11134 ; RV64ZVE32F-NEXT: fsd fa4, 0(a2)
11135 ; RV64ZVE32F-NEXT: andi a2, a1, 32
11136 ; RV64ZVE32F-NEXT: bnez a2, .LBB95_8
11137 ; RV64ZVE32F-NEXT: j .LBB95_9
11138 ; RV64ZVE32F-NEXT: .LBB95_15: # %cond.store11
11139 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
11140 ; RV64ZVE32F-NEXT: slli a2, a2, 3
11141 ; RV64ZVE32F-NEXT: add a2, a0, a2
11142 ; RV64ZVE32F-NEXT: fsd fa6, 0(a2)
11143 ; RV64ZVE32F-NEXT: andi a1, a1, -128
11144 ; RV64ZVE32F-NEXT: beqz a1, .LBB95_11
11145 ; RV64ZVE32F-NEXT: .LBB95_16: # %cond.store13
11146 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
11147 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8
11148 ; RV64ZVE32F-NEXT: slli a1, a1, 3
11149 ; RV64ZVE32F-NEXT: add a0, a0, a1
11150 ; RV64ZVE32F-NEXT: fsd fa7, 0(a0)
11151 ; RV64ZVE32F-NEXT: ret
11152 %eidxs = sext <8 x i16> %idxs to <8 x i64>
11153 %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
11154 call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m)
11158 define void @mscatter_baseidx_zext_v8i16_v8f64(<8 x double> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m) {
11159 ; RV32V-LABEL: mscatter_baseidx_zext_v8i16_v8f64:
11161 ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
11162 ; RV32V-NEXT: vzext.vf2 v14, v12
11163 ; RV32V-NEXT: vsll.vi v12, v14, 3
11164 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
11165 ; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t
11168 ; RV64V-LABEL: mscatter_baseidx_zext_v8i16_v8f64:
11170 ; RV64V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
11171 ; RV64V-NEXT: vzext.vf2 v14, v12
11172 ; RV64V-NEXT: vsll.vi v12, v14, 3
11173 ; RV64V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
11174 ; RV64V-NEXT: vsoxei32.v v8, (a0), v12, v0.t
11177 ; RV32ZVE32F-LABEL: mscatter_baseidx_zext_v8i16_v8f64:
11178 ; RV32ZVE32F: # %bb.0:
11179 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
11180 ; RV32ZVE32F-NEXT: vzext.vf2 v10, v8
11181 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
11182 ; RV32ZVE32F-NEXT: vmv.x.s a1, v0
11183 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
11184 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3
11185 ; RV32ZVE32F-NEXT: andi a2, a1, 1
11186 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0
11187 ; RV32ZVE32F-NEXT: bnez a2, .LBB96_9
11188 ; RV32ZVE32F-NEXT: # %bb.1: # %else
11189 ; RV32ZVE32F-NEXT: andi a0, a1, 2
11190 ; RV32ZVE32F-NEXT: bnez a0, .LBB96_10
11191 ; RV32ZVE32F-NEXT: .LBB96_2: # %else2
11192 ; RV32ZVE32F-NEXT: andi a0, a1, 4
11193 ; RV32ZVE32F-NEXT: bnez a0, .LBB96_11
11194 ; RV32ZVE32F-NEXT: .LBB96_3: # %else4
11195 ; RV32ZVE32F-NEXT: andi a0, a1, 8
11196 ; RV32ZVE32F-NEXT: bnez a0, .LBB96_12
11197 ; RV32ZVE32F-NEXT: .LBB96_4: # %else6
11198 ; RV32ZVE32F-NEXT: andi a0, a1, 16
11199 ; RV32ZVE32F-NEXT: bnez a0, .LBB96_13
11200 ; RV32ZVE32F-NEXT: .LBB96_5: # %else8
11201 ; RV32ZVE32F-NEXT: andi a0, a1, 32
11202 ; RV32ZVE32F-NEXT: bnez a0, .LBB96_14
11203 ; RV32ZVE32F-NEXT: .LBB96_6: # %else10
11204 ; RV32ZVE32F-NEXT: andi a0, a1, 64
11205 ; RV32ZVE32F-NEXT: bnez a0, .LBB96_15
11206 ; RV32ZVE32F-NEXT: .LBB96_7: # %else12
11207 ; RV32ZVE32F-NEXT: andi a0, a1, -128
11208 ; RV32ZVE32F-NEXT: bnez a0, .LBB96_16
11209 ; RV32ZVE32F-NEXT: .LBB96_8: # %else14
11210 ; RV32ZVE32F-NEXT: ret
11211 ; RV32ZVE32F-NEXT: .LBB96_9: # %cond.store
11212 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
11213 ; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
11214 ; RV32ZVE32F-NEXT: andi a0, a1, 2
11215 ; RV32ZVE32F-NEXT: beqz a0, .LBB96_2
11216 ; RV32ZVE32F-NEXT: .LBB96_10: # %cond.store1
11217 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
11218 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
11219 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
11220 ; RV32ZVE32F-NEXT: fsd fa1, 0(a0)
11221 ; RV32ZVE32F-NEXT: andi a0, a1, 4
11222 ; RV32ZVE32F-NEXT: beqz a0, .LBB96_3
11223 ; RV32ZVE32F-NEXT: .LBB96_11: # %cond.store3
11224 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
11225 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
11226 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
11227 ; RV32ZVE32F-NEXT: fsd fa2, 0(a0)
11228 ; RV32ZVE32F-NEXT: andi a0, a1, 8
11229 ; RV32ZVE32F-NEXT: beqz a0, .LBB96_4
11230 ; RV32ZVE32F-NEXT: .LBB96_12: # %cond.store5
11231 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
11232 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
11233 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
11234 ; RV32ZVE32F-NEXT: fsd fa3, 0(a0)
11235 ; RV32ZVE32F-NEXT: andi a0, a1, 16
11236 ; RV32ZVE32F-NEXT: beqz a0, .LBB96_5
11237 ; RV32ZVE32F-NEXT: .LBB96_13: # %cond.store7
11238 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
11239 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
11240 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
11241 ; RV32ZVE32F-NEXT: fsd fa4, 0(a0)
11242 ; RV32ZVE32F-NEXT: andi a0, a1, 32
11243 ; RV32ZVE32F-NEXT: beqz a0, .LBB96_6
11244 ; RV32ZVE32F-NEXT: .LBB96_14: # %cond.store9
11245 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
11246 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
11247 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
11248 ; RV32ZVE32F-NEXT: fsd fa5, 0(a0)
11249 ; RV32ZVE32F-NEXT: andi a0, a1, 64
11250 ; RV32ZVE32F-NEXT: beqz a0, .LBB96_7
11251 ; RV32ZVE32F-NEXT: .LBB96_15: # %cond.store11
11252 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
11253 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
11254 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
11255 ; RV32ZVE32F-NEXT: fsd fa6, 0(a0)
11256 ; RV32ZVE32F-NEXT: andi a0, a1, -128
11257 ; RV32ZVE32F-NEXT: beqz a0, .LBB96_8
11258 ; RV32ZVE32F-NEXT: .LBB96_16: # %cond.store13
11259 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
11260 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
11261 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
11262 ; RV32ZVE32F-NEXT: fsd fa7, 0(a0)
11263 ; RV32ZVE32F-NEXT: ret
11265 ; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i16_v8f64:
11266 ; RV64ZVE32F: # %bb.0:
11267 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
11268 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
11269 ; RV64ZVE32F-NEXT: andi a2, a1, 1
11270 ; RV64ZVE32F-NEXT: beqz a2, .LBB96_2
11271 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
11272 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
11273 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
11274 ; RV64ZVE32F-NEXT: slli a2, a2, 48
11275 ; RV64ZVE32F-NEXT: srli a2, a2, 45
11276 ; RV64ZVE32F-NEXT: add a2, a0, a2
11277 ; RV64ZVE32F-NEXT: fsd fa0, 0(a2)
11278 ; RV64ZVE32F-NEXT: .LBB96_2: # %else
11279 ; RV64ZVE32F-NEXT: andi a2, a1, 2
11280 ; RV64ZVE32F-NEXT: beqz a2, .LBB96_4
11281 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
11282 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
11283 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
11284 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
11285 ; RV64ZVE32F-NEXT: slli a2, a2, 48
11286 ; RV64ZVE32F-NEXT: srli a2, a2, 45
11287 ; RV64ZVE32F-NEXT: add a2, a0, a2
11288 ; RV64ZVE32F-NEXT: fsd fa1, 0(a2)
11289 ; RV64ZVE32F-NEXT: .LBB96_4: # %else2
11290 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
11291 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
11292 ; RV64ZVE32F-NEXT: andi a2, a1, 4
11293 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
11294 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
11295 ; RV64ZVE32F-NEXT: bnez a2, .LBB96_12
11296 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
11297 ; RV64ZVE32F-NEXT: andi a2, a1, 8
11298 ; RV64ZVE32F-NEXT: bnez a2, .LBB96_13
11299 ; RV64ZVE32F-NEXT: .LBB96_6: # %else6
11300 ; RV64ZVE32F-NEXT: andi a2, a1, 16
11301 ; RV64ZVE32F-NEXT: bnez a2, .LBB96_14
11302 ; RV64ZVE32F-NEXT: .LBB96_7: # %else8
11303 ; RV64ZVE32F-NEXT: andi a2, a1, 32
11304 ; RV64ZVE32F-NEXT: beqz a2, .LBB96_9
11305 ; RV64ZVE32F-NEXT: .LBB96_8: # %cond.store9
11306 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
11307 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
11308 ; RV64ZVE32F-NEXT: slli a2, a2, 48
11309 ; RV64ZVE32F-NEXT: srli a2, a2, 45
11310 ; RV64ZVE32F-NEXT: add a2, a0, a2
11311 ; RV64ZVE32F-NEXT: fsd fa5, 0(a2)
11312 ; RV64ZVE32F-NEXT: .LBB96_9: # %else10
11313 ; RV64ZVE32F-NEXT: andi a2, a1, 64
11314 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
11315 ; RV64ZVE32F-NEXT: bnez a2, .LBB96_15
11316 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
11317 ; RV64ZVE32F-NEXT: andi a1, a1, -128
11318 ; RV64ZVE32F-NEXT: bnez a1, .LBB96_16
11319 ; RV64ZVE32F-NEXT: .LBB96_11: # %else14
11320 ; RV64ZVE32F-NEXT: ret
11321 ; RV64ZVE32F-NEXT: .LBB96_12: # %cond.store3
11322 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
11323 ; RV64ZVE32F-NEXT: slli a2, a2, 48
11324 ; RV64ZVE32F-NEXT: srli a2, a2, 45
11325 ; RV64ZVE32F-NEXT: add a2, a0, a2
11326 ; RV64ZVE32F-NEXT: fsd fa2, 0(a2)
11327 ; RV64ZVE32F-NEXT: andi a2, a1, 8
11328 ; RV64ZVE32F-NEXT: beqz a2, .LBB96_6
11329 ; RV64ZVE32F-NEXT: .LBB96_13: # %cond.store5
11330 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
11331 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
11332 ; RV64ZVE32F-NEXT: slli a2, a2, 48
11333 ; RV64ZVE32F-NEXT: srli a2, a2, 45
11334 ; RV64ZVE32F-NEXT: add a2, a0, a2
11335 ; RV64ZVE32F-NEXT: fsd fa3, 0(a2)
11336 ; RV64ZVE32F-NEXT: andi a2, a1, 16
11337 ; RV64ZVE32F-NEXT: beqz a2, .LBB96_7
11338 ; RV64ZVE32F-NEXT: .LBB96_14: # %cond.store7
11339 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
11340 ; RV64ZVE32F-NEXT: slli a2, a2, 48
11341 ; RV64ZVE32F-NEXT: srli a2, a2, 45
11342 ; RV64ZVE32F-NEXT: add a2, a0, a2
11343 ; RV64ZVE32F-NEXT: fsd fa4, 0(a2)
11344 ; RV64ZVE32F-NEXT: andi a2, a1, 32
11345 ; RV64ZVE32F-NEXT: bnez a2, .LBB96_8
11346 ; RV64ZVE32F-NEXT: j .LBB96_9
11347 ; RV64ZVE32F-NEXT: .LBB96_15: # %cond.store11
11348 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
11349 ; RV64ZVE32F-NEXT: slli a2, a2, 48
11350 ; RV64ZVE32F-NEXT: srli a2, a2, 45
11351 ; RV64ZVE32F-NEXT: add a2, a0, a2
11352 ; RV64ZVE32F-NEXT: fsd fa6, 0(a2)
11353 ; RV64ZVE32F-NEXT: andi a1, a1, -128
11354 ; RV64ZVE32F-NEXT: beqz a1, .LBB96_11
11355 ; RV64ZVE32F-NEXT: .LBB96_16: # %cond.store13
11356 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
11357 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8
11358 ; RV64ZVE32F-NEXT: slli a1, a1, 48
11359 ; RV64ZVE32F-NEXT: srli a1, a1, 45
11360 ; RV64ZVE32F-NEXT: add a0, a0, a1
11361 ; RV64ZVE32F-NEXT: fsd fa7, 0(a0)
11362 ; RV64ZVE32F-NEXT: ret
11363 %eidxs = zext <8 x i16> %idxs to <8 x i64>
11364 %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
11365 call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m)
11369 define void @mscatter_baseidx_v8i32_v8f64(<8 x double> %val, ptr %base, <8 x i32> %idxs, <8 x i1> %m) {
11370 ; RV32V-LABEL: mscatter_baseidx_v8i32_v8f64:
11372 ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
11373 ; RV32V-NEXT: vsll.vi v12, v12, 3
11374 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
11375 ; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t
11378 ; RV64V-LABEL: mscatter_baseidx_v8i32_v8f64:
11380 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
11381 ; RV64V-NEXT: vsext.vf2 v16, v12
11382 ; RV64V-NEXT: vsll.vi v12, v16, 3
11383 ; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t
11386 ; RV32ZVE32F-LABEL: mscatter_baseidx_v8i32_v8f64:
11387 ; RV32ZVE32F: # %bb.0:
11388 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
11389 ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3
11390 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
11391 ; RV32ZVE32F-NEXT: vmv.x.s a1, v0
11392 ; RV32ZVE32F-NEXT: andi a2, a1, 1
11393 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
11394 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0
11395 ; RV32ZVE32F-NEXT: bnez a2, .LBB97_9
11396 ; RV32ZVE32F-NEXT: # %bb.1: # %else
11397 ; RV32ZVE32F-NEXT: andi a0, a1, 2
11398 ; RV32ZVE32F-NEXT: bnez a0, .LBB97_10
11399 ; RV32ZVE32F-NEXT: .LBB97_2: # %else2
11400 ; RV32ZVE32F-NEXT: andi a0, a1, 4
11401 ; RV32ZVE32F-NEXT: bnez a0, .LBB97_11
11402 ; RV32ZVE32F-NEXT: .LBB97_3: # %else4
11403 ; RV32ZVE32F-NEXT: andi a0, a1, 8
11404 ; RV32ZVE32F-NEXT: bnez a0, .LBB97_12
11405 ; RV32ZVE32F-NEXT: .LBB97_4: # %else6
11406 ; RV32ZVE32F-NEXT: andi a0, a1, 16
11407 ; RV32ZVE32F-NEXT: bnez a0, .LBB97_13
11408 ; RV32ZVE32F-NEXT: .LBB97_5: # %else8
11409 ; RV32ZVE32F-NEXT: andi a0, a1, 32
11410 ; RV32ZVE32F-NEXT: bnez a0, .LBB97_14
11411 ; RV32ZVE32F-NEXT: .LBB97_6: # %else10
11412 ; RV32ZVE32F-NEXT: andi a0, a1, 64
11413 ; RV32ZVE32F-NEXT: bnez a0, .LBB97_15
11414 ; RV32ZVE32F-NEXT: .LBB97_7: # %else12
11415 ; RV32ZVE32F-NEXT: andi a0, a1, -128
11416 ; RV32ZVE32F-NEXT: bnez a0, .LBB97_16
11417 ; RV32ZVE32F-NEXT: .LBB97_8: # %else14
11418 ; RV32ZVE32F-NEXT: ret
11419 ; RV32ZVE32F-NEXT: .LBB97_9: # %cond.store
11420 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
11421 ; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
11422 ; RV32ZVE32F-NEXT: andi a0, a1, 2
11423 ; RV32ZVE32F-NEXT: beqz a0, .LBB97_2
11424 ; RV32ZVE32F-NEXT: .LBB97_10: # %cond.store1
11425 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
11426 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
11427 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
11428 ; RV32ZVE32F-NEXT: fsd fa1, 0(a0)
11429 ; RV32ZVE32F-NEXT: andi a0, a1, 4
11430 ; RV32ZVE32F-NEXT: beqz a0, .LBB97_3
11431 ; RV32ZVE32F-NEXT: .LBB97_11: # %cond.store3
11432 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
11433 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
11434 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
11435 ; RV32ZVE32F-NEXT: fsd fa2, 0(a0)
11436 ; RV32ZVE32F-NEXT: andi a0, a1, 8
11437 ; RV32ZVE32F-NEXT: beqz a0, .LBB97_4
11438 ; RV32ZVE32F-NEXT: .LBB97_12: # %cond.store5
11439 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
11440 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
11441 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
11442 ; RV32ZVE32F-NEXT: fsd fa3, 0(a0)
11443 ; RV32ZVE32F-NEXT: andi a0, a1, 16
11444 ; RV32ZVE32F-NEXT: beqz a0, .LBB97_5
11445 ; RV32ZVE32F-NEXT: .LBB97_13: # %cond.store7
11446 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
11447 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
11448 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
11449 ; RV32ZVE32F-NEXT: fsd fa4, 0(a0)
11450 ; RV32ZVE32F-NEXT: andi a0, a1, 32
11451 ; RV32ZVE32F-NEXT: beqz a0, .LBB97_6
11452 ; RV32ZVE32F-NEXT: .LBB97_14: # %cond.store9
11453 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
11454 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
11455 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
11456 ; RV32ZVE32F-NEXT: fsd fa5, 0(a0)
11457 ; RV32ZVE32F-NEXT: andi a0, a1, 64
11458 ; RV32ZVE32F-NEXT: beqz a0, .LBB97_7
11459 ; RV32ZVE32F-NEXT: .LBB97_15: # %cond.store11
11460 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
11461 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
11462 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
11463 ; RV32ZVE32F-NEXT: fsd fa6, 0(a0)
11464 ; RV32ZVE32F-NEXT: andi a0, a1, -128
11465 ; RV32ZVE32F-NEXT: beqz a0, .LBB97_8
11466 ; RV32ZVE32F-NEXT: .LBB97_16: # %cond.store13
11467 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
11468 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
11469 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
11470 ; RV32ZVE32F-NEXT: fsd fa7, 0(a0)
11471 ; RV32ZVE32F-NEXT: ret
11473 ; RV64ZVE32F-LABEL: mscatter_baseidx_v8i32_v8f64:
11474 ; RV64ZVE32F: # %bb.0:
11475 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
11476 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
11477 ; RV64ZVE32F-NEXT: andi a2, a1, 1
11478 ; RV64ZVE32F-NEXT: beqz a2, .LBB97_2
11479 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
11480 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
11481 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
11482 ; RV64ZVE32F-NEXT: slli a2, a2, 3
11483 ; RV64ZVE32F-NEXT: add a2, a0, a2
11484 ; RV64ZVE32F-NEXT: fsd fa0, 0(a2)
11485 ; RV64ZVE32F-NEXT: .LBB97_2: # %else
11486 ; RV64ZVE32F-NEXT: andi a2, a1, 2
11487 ; RV64ZVE32F-NEXT: beqz a2, .LBB97_4
11488 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
11489 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
11490 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
11491 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
11492 ; RV64ZVE32F-NEXT: slli a2, a2, 3
11493 ; RV64ZVE32F-NEXT: add a2, a0, a2
11494 ; RV64ZVE32F-NEXT: fsd fa1, 0(a2)
11495 ; RV64ZVE32F-NEXT: .LBB97_4: # %else2
11496 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
11497 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
11498 ; RV64ZVE32F-NEXT: andi a2, a1, 4
11499 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
11500 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
11501 ; RV64ZVE32F-NEXT: bnez a2, .LBB97_12
11502 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
11503 ; RV64ZVE32F-NEXT: andi a2, a1, 8
11504 ; RV64ZVE32F-NEXT: bnez a2, .LBB97_13
11505 ; RV64ZVE32F-NEXT: .LBB97_6: # %else6
11506 ; RV64ZVE32F-NEXT: andi a2, a1, 16
11507 ; RV64ZVE32F-NEXT: bnez a2, .LBB97_14
11508 ; RV64ZVE32F-NEXT: .LBB97_7: # %else8
11509 ; RV64ZVE32F-NEXT: andi a2, a1, 32
11510 ; RV64ZVE32F-NEXT: beqz a2, .LBB97_9
11511 ; RV64ZVE32F-NEXT: .LBB97_8: # %cond.store9
11512 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
11513 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
11514 ; RV64ZVE32F-NEXT: slli a2, a2, 3
11515 ; RV64ZVE32F-NEXT: add a2, a0, a2
11516 ; RV64ZVE32F-NEXT: fsd fa5, 0(a2)
11517 ; RV64ZVE32F-NEXT: .LBB97_9: # %else10
11518 ; RV64ZVE32F-NEXT: andi a2, a1, 64
11519 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
11520 ; RV64ZVE32F-NEXT: bnez a2, .LBB97_15
11521 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
11522 ; RV64ZVE32F-NEXT: andi a1, a1, -128
11523 ; RV64ZVE32F-NEXT: bnez a1, .LBB97_16
11524 ; RV64ZVE32F-NEXT: .LBB97_11: # %else14
11525 ; RV64ZVE32F-NEXT: ret
11526 ; RV64ZVE32F-NEXT: .LBB97_12: # %cond.store3
11527 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
11528 ; RV64ZVE32F-NEXT: slli a2, a2, 3
11529 ; RV64ZVE32F-NEXT: add a2, a0, a2
11530 ; RV64ZVE32F-NEXT: fsd fa2, 0(a2)
11531 ; RV64ZVE32F-NEXT: andi a2, a1, 8
11532 ; RV64ZVE32F-NEXT: beqz a2, .LBB97_6
11533 ; RV64ZVE32F-NEXT: .LBB97_13: # %cond.store5
11534 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
11535 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
11536 ; RV64ZVE32F-NEXT: slli a2, a2, 3
11537 ; RV64ZVE32F-NEXT: add a2, a0, a2
11538 ; RV64ZVE32F-NEXT: fsd fa3, 0(a2)
11539 ; RV64ZVE32F-NEXT: andi a2, a1, 16
11540 ; RV64ZVE32F-NEXT: beqz a2, .LBB97_7
11541 ; RV64ZVE32F-NEXT: .LBB97_14: # %cond.store7
11542 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
11543 ; RV64ZVE32F-NEXT: slli a2, a2, 3
11544 ; RV64ZVE32F-NEXT: add a2, a0, a2
11545 ; RV64ZVE32F-NEXT: fsd fa4, 0(a2)
11546 ; RV64ZVE32F-NEXT: andi a2, a1, 32
11547 ; RV64ZVE32F-NEXT: bnez a2, .LBB97_8
11548 ; RV64ZVE32F-NEXT: j .LBB97_9
11549 ; RV64ZVE32F-NEXT: .LBB97_15: # %cond.store11
11550 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
11551 ; RV64ZVE32F-NEXT: slli a2, a2, 3
11552 ; RV64ZVE32F-NEXT: add a2, a0, a2
11553 ; RV64ZVE32F-NEXT: fsd fa6, 0(a2)
11554 ; RV64ZVE32F-NEXT: andi a1, a1, -128
11555 ; RV64ZVE32F-NEXT: beqz a1, .LBB97_11
11556 ; RV64ZVE32F-NEXT: .LBB97_16: # %cond.store13
11557 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
11558 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8
11559 ; RV64ZVE32F-NEXT: slli a1, a1, 3
11560 ; RV64ZVE32F-NEXT: add a0, a0, a1
11561 ; RV64ZVE32F-NEXT: fsd fa7, 0(a0)
11562 ; RV64ZVE32F-NEXT: ret
11563 %ptrs = getelementptr inbounds double, ptr %base, <8 x i32> %idxs
11564 call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m)
11568 define void @mscatter_baseidx_sext_v8i32_v8f64(<8 x double> %val, ptr %base, <8 x i32> %idxs, <8 x i1> %m) {
11569 ; RV32V-LABEL: mscatter_baseidx_sext_v8i32_v8f64:
11571 ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
11572 ; RV32V-NEXT: vsll.vi v12, v12, 3
11573 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
11574 ; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t
11577 ; RV64V-LABEL: mscatter_baseidx_sext_v8i32_v8f64:
11579 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
11580 ; RV64V-NEXT: vsext.vf2 v16, v12
11581 ; RV64V-NEXT: vsll.vi v12, v16, 3
11582 ; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t
11585 ; RV32ZVE32F-LABEL: mscatter_baseidx_sext_v8i32_v8f64:
11586 ; RV32ZVE32F: # %bb.0:
11587 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
11588 ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3
11589 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
11590 ; RV32ZVE32F-NEXT: vmv.x.s a1, v0
11591 ; RV32ZVE32F-NEXT: andi a2, a1, 1
11592 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
11593 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0
11594 ; RV32ZVE32F-NEXT: bnez a2, .LBB98_9
11595 ; RV32ZVE32F-NEXT: # %bb.1: # %else
11596 ; RV32ZVE32F-NEXT: andi a0, a1, 2
11597 ; RV32ZVE32F-NEXT: bnez a0, .LBB98_10
11598 ; RV32ZVE32F-NEXT: .LBB98_2: # %else2
11599 ; RV32ZVE32F-NEXT: andi a0, a1, 4
11600 ; RV32ZVE32F-NEXT: bnez a0, .LBB98_11
11601 ; RV32ZVE32F-NEXT: .LBB98_3: # %else4
11602 ; RV32ZVE32F-NEXT: andi a0, a1, 8
11603 ; RV32ZVE32F-NEXT: bnez a0, .LBB98_12
11604 ; RV32ZVE32F-NEXT: .LBB98_4: # %else6
11605 ; RV32ZVE32F-NEXT: andi a0, a1, 16
11606 ; RV32ZVE32F-NEXT: bnez a0, .LBB98_13
11607 ; RV32ZVE32F-NEXT: .LBB98_5: # %else8
11608 ; RV32ZVE32F-NEXT: andi a0, a1, 32
11609 ; RV32ZVE32F-NEXT: bnez a0, .LBB98_14
11610 ; RV32ZVE32F-NEXT: .LBB98_6: # %else10
11611 ; RV32ZVE32F-NEXT: andi a0, a1, 64
11612 ; RV32ZVE32F-NEXT: bnez a0, .LBB98_15
11613 ; RV32ZVE32F-NEXT: .LBB98_7: # %else12
11614 ; RV32ZVE32F-NEXT: andi a0, a1, -128
11615 ; RV32ZVE32F-NEXT: bnez a0, .LBB98_16
11616 ; RV32ZVE32F-NEXT: .LBB98_8: # %else14
11617 ; RV32ZVE32F-NEXT: ret
11618 ; RV32ZVE32F-NEXT: .LBB98_9: # %cond.store
11619 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
11620 ; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
11621 ; RV32ZVE32F-NEXT: andi a0, a1, 2
11622 ; RV32ZVE32F-NEXT: beqz a0, .LBB98_2
11623 ; RV32ZVE32F-NEXT: .LBB98_10: # %cond.store1
11624 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
11625 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
11626 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
11627 ; RV32ZVE32F-NEXT: fsd fa1, 0(a0)
11628 ; RV32ZVE32F-NEXT: andi a0, a1, 4
11629 ; RV32ZVE32F-NEXT: beqz a0, .LBB98_3
11630 ; RV32ZVE32F-NEXT: .LBB98_11: # %cond.store3
11631 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
11632 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
11633 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
11634 ; RV32ZVE32F-NEXT: fsd fa2, 0(a0)
11635 ; RV32ZVE32F-NEXT: andi a0, a1, 8
11636 ; RV32ZVE32F-NEXT: beqz a0, .LBB98_4
11637 ; RV32ZVE32F-NEXT: .LBB98_12: # %cond.store5
11638 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
11639 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
11640 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
11641 ; RV32ZVE32F-NEXT: fsd fa3, 0(a0)
11642 ; RV32ZVE32F-NEXT: andi a0, a1, 16
11643 ; RV32ZVE32F-NEXT: beqz a0, .LBB98_5
11644 ; RV32ZVE32F-NEXT: .LBB98_13: # %cond.store7
11645 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
11646 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
11647 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
11648 ; RV32ZVE32F-NEXT: fsd fa4, 0(a0)
11649 ; RV32ZVE32F-NEXT: andi a0, a1, 32
11650 ; RV32ZVE32F-NEXT: beqz a0, .LBB98_6
11651 ; RV32ZVE32F-NEXT: .LBB98_14: # %cond.store9
11652 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
11653 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
11654 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
11655 ; RV32ZVE32F-NEXT: fsd fa5, 0(a0)
11656 ; RV32ZVE32F-NEXT: andi a0, a1, 64
11657 ; RV32ZVE32F-NEXT: beqz a0, .LBB98_7
11658 ; RV32ZVE32F-NEXT: .LBB98_15: # %cond.store11
11659 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
11660 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
11661 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
11662 ; RV32ZVE32F-NEXT: fsd fa6, 0(a0)
11663 ; RV32ZVE32F-NEXT: andi a0, a1, -128
11664 ; RV32ZVE32F-NEXT: beqz a0, .LBB98_8
11665 ; RV32ZVE32F-NEXT: .LBB98_16: # %cond.store13
11666 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
11667 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
11668 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
11669 ; RV32ZVE32F-NEXT: fsd fa7, 0(a0)
11670 ; RV32ZVE32F-NEXT: ret
11672 ; RV64ZVE32F-LABEL: mscatter_baseidx_sext_v8i32_v8f64:
11673 ; RV64ZVE32F: # %bb.0:
11674 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
11675 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
11676 ; RV64ZVE32F-NEXT: andi a2, a1, 1
11677 ; RV64ZVE32F-NEXT: beqz a2, .LBB98_2
11678 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
11679 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
11680 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
11681 ; RV64ZVE32F-NEXT: slli a2, a2, 3
11682 ; RV64ZVE32F-NEXT: add a2, a0, a2
11683 ; RV64ZVE32F-NEXT: fsd fa0, 0(a2)
11684 ; RV64ZVE32F-NEXT: .LBB98_2: # %else
11685 ; RV64ZVE32F-NEXT: andi a2, a1, 2
11686 ; RV64ZVE32F-NEXT: beqz a2, .LBB98_4
11687 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
11688 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
11689 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
11690 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
11691 ; RV64ZVE32F-NEXT: slli a2, a2, 3
11692 ; RV64ZVE32F-NEXT: add a2, a0, a2
11693 ; RV64ZVE32F-NEXT: fsd fa1, 0(a2)
11694 ; RV64ZVE32F-NEXT: .LBB98_4: # %else2
11695 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
11696 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
11697 ; RV64ZVE32F-NEXT: andi a2, a1, 4
11698 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
11699 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
11700 ; RV64ZVE32F-NEXT: bnez a2, .LBB98_12
11701 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
11702 ; RV64ZVE32F-NEXT: andi a2, a1, 8
11703 ; RV64ZVE32F-NEXT: bnez a2, .LBB98_13
11704 ; RV64ZVE32F-NEXT: .LBB98_6: # %else6
11705 ; RV64ZVE32F-NEXT: andi a2, a1, 16
11706 ; RV64ZVE32F-NEXT: bnez a2, .LBB98_14
11707 ; RV64ZVE32F-NEXT: .LBB98_7: # %else8
11708 ; RV64ZVE32F-NEXT: andi a2, a1, 32
11709 ; RV64ZVE32F-NEXT: beqz a2, .LBB98_9
11710 ; RV64ZVE32F-NEXT: .LBB98_8: # %cond.store9
11711 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
11712 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
11713 ; RV64ZVE32F-NEXT: slli a2, a2, 3
11714 ; RV64ZVE32F-NEXT: add a2, a0, a2
11715 ; RV64ZVE32F-NEXT: fsd fa5, 0(a2)
11716 ; RV64ZVE32F-NEXT: .LBB98_9: # %else10
11717 ; RV64ZVE32F-NEXT: andi a2, a1, 64
11718 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
11719 ; RV64ZVE32F-NEXT: bnez a2, .LBB98_15
11720 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
11721 ; RV64ZVE32F-NEXT: andi a1, a1, -128
11722 ; RV64ZVE32F-NEXT: bnez a1, .LBB98_16
11723 ; RV64ZVE32F-NEXT: .LBB98_11: # %else14
11724 ; RV64ZVE32F-NEXT: ret
11725 ; RV64ZVE32F-NEXT: .LBB98_12: # %cond.store3
11726 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
11727 ; RV64ZVE32F-NEXT: slli a2, a2, 3
11728 ; RV64ZVE32F-NEXT: add a2, a0, a2
11729 ; RV64ZVE32F-NEXT: fsd fa2, 0(a2)
11730 ; RV64ZVE32F-NEXT: andi a2, a1, 8
11731 ; RV64ZVE32F-NEXT: beqz a2, .LBB98_6
11732 ; RV64ZVE32F-NEXT: .LBB98_13: # %cond.store5
11733 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
11734 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
11735 ; RV64ZVE32F-NEXT: slli a2, a2, 3
11736 ; RV64ZVE32F-NEXT: add a2, a0, a2
11737 ; RV64ZVE32F-NEXT: fsd fa3, 0(a2)
11738 ; RV64ZVE32F-NEXT: andi a2, a1, 16
11739 ; RV64ZVE32F-NEXT: beqz a2, .LBB98_7
11740 ; RV64ZVE32F-NEXT: .LBB98_14: # %cond.store7
11741 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
11742 ; RV64ZVE32F-NEXT: slli a2, a2, 3
11743 ; RV64ZVE32F-NEXT: add a2, a0, a2
11744 ; RV64ZVE32F-NEXT: fsd fa4, 0(a2)
11745 ; RV64ZVE32F-NEXT: andi a2, a1, 32
11746 ; RV64ZVE32F-NEXT: bnez a2, .LBB98_8
11747 ; RV64ZVE32F-NEXT: j .LBB98_9
11748 ; RV64ZVE32F-NEXT: .LBB98_15: # %cond.store11
11749 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
11750 ; RV64ZVE32F-NEXT: slli a2, a2, 3
11751 ; RV64ZVE32F-NEXT: add a2, a0, a2
11752 ; RV64ZVE32F-NEXT: fsd fa6, 0(a2)
11753 ; RV64ZVE32F-NEXT: andi a1, a1, -128
11754 ; RV64ZVE32F-NEXT: beqz a1, .LBB98_11
11755 ; RV64ZVE32F-NEXT: .LBB98_16: # %cond.store13
11756 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
11757 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8
11758 ; RV64ZVE32F-NEXT: slli a1, a1, 3
11759 ; RV64ZVE32F-NEXT: add a0, a0, a1
11760 ; RV64ZVE32F-NEXT: fsd fa7, 0(a0)
11761 ; RV64ZVE32F-NEXT: ret
11762 %eidxs = sext <8 x i32> %idxs to <8 x i64>
11763 %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
11764 call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m)
11768 define void @mscatter_baseidx_zext_v8i32_v8f64(<8 x double> %val, ptr %base, <8 x i32> %idxs, <8 x i1> %m) {
11769 ; RV32V-LABEL: mscatter_baseidx_zext_v8i32_v8f64:
11771 ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
11772 ; RV32V-NEXT: vsll.vi v12, v12, 3
11773 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
11774 ; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t
11777 ; RV64V-LABEL: mscatter_baseidx_zext_v8i32_v8f64:
11779 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
11780 ; RV64V-NEXT: vzext.vf2 v16, v12
11781 ; RV64V-NEXT: vsll.vi v12, v16, 3
11782 ; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t
11785 ; RV32ZVE32F-LABEL: mscatter_baseidx_zext_v8i32_v8f64:
11786 ; RV32ZVE32F: # %bb.0:
11787 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
11788 ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3
11789 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
11790 ; RV32ZVE32F-NEXT: vmv.x.s a1, v0
11791 ; RV32ZVE32F-NEXT: andi a2, a1, 1
11792 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
11793 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0
11794 ; RV32ZVE32F-NEXT: bnez a2, .LBB99_9
11795 ; RV32ZVE32F-NEXT: # %bb.1: # %else
11796 ; RV32ZVE32F-NEXT: andi a0, a1, 2
11797 ; RV32ZVE32F-NEXT: bnez a0, .LBB99_10
11798 ; RV32ZVE32F-NEXT: .LBB99_2: # %else2
11799 ; RV32ZVE32F-NEXT: andi a0, a1, 4
11800 ; RV32ZVE32F-NEXT: bnez a0, .LBB99_11
11801 ; RV32ZVE32F-NEXT: .LBB99_3: # %else4
11802 ; RV32ZVE32F-NEXT: andi a0, a1, 8
11803 ; RV32ZVE32F-NEXT: bnez a0, .LBB99_12
11804 ; RV32ZVE32F-NEXT: .LBB99_4: # %else6
11805 ; RV32ZVE32F-NEXT: andi a0, a1, 16
11806 ; RV32ZVE32F-NEXT: bnez a0, .LBB99_13
11807 ; RV32ZVE32F-NEXT: .LBB99_5: # %else8
11808 ; RV32ZVE32F-NEXT: andi a0, a1, 32
11809 ; RV32ZVE32F-NEXT: bnez a0, .LBB99_14
11810 ; RV32ZVE32F-NEXT: .LBB99_6: # %else10
11811 ; RV32ZVE32F-NEXT: andi a0, a1, 64
11812 ; RV32ZVE32F-NEXT: bnez a0, .LBB99_15
11813 ; RV32ZVE32F-NEXT: .LBB99_7: # %else12
11814 ; RV32ZVE32F-NEXT: andi a0, a1, -128
11815 ; RV32ZVE32F-NEXT: bnez a0, .LBB99_16
11816 ; RV32ZVE32F-NEXT: .LBB99_8: # %else14
11817 ; RV32ZVE32F-NEXT: ret
11818 ; RV32ZVE32F-NEXT: .LBB99_9: # %cond.store
11819 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
11820 ; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
11821 ; RV32ZVE32F-NEXT: andi a0, a1, 2
11822 ; RV32ZVE32F-NEXT: beqz a0, .LBB99_2
11823 ; RV32ZVE32F-NEXT: .LBB99_10: # %cond.store1
11824 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
11825 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
11826 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
11827 ; RV32ZVE32F-NEXT: fsd fa1, 0(a0)
11828 ; RV32ZVE32F-NEXT: andi a0, a1, 4
11829 ; RV32ZVE32F-NEXT: beqz a0, .LBB99_3
11830 ; RV32ZVE32F-NEXT: .LBB99_11: # %cond.store3
11831 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
11832 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
11833 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
11834 ; RV32ZVE32F-NEXT: fsd fa2, 0(a0)
11835 ; RV32ZVE32F-NEXT: andi a0, a1, 8
11836 ; RV32ZVE32F-NEXT: beqz a0, .LBB99_4
11837 ; RV32ZVE32F-NEXT: .LBB99_12: # %cond.store5
11838 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
11839 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
11840 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
11841 ; RV32ZVE32F-NEXT: fsd fa3, 0(a0)
11842 ; RV32ZVE32F-NEXT: andi a0, a1, 16
11843 ; RV32ZVE32F-NEXT: beqz a0, .LBB99_5
11844 ; RV32ZVE32F-NEXT: .LBB99_13: # %cond.store7
11845 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
11846 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
11847 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
11848 ; RV32ZVE32F-NEXT: fsd fa4, 0(a0)
11849 ; RV32ZVE32F-NEXT: andi a0, a1, 32
11850 ; RV32ZVE32F-NEXT: beqz a0, .LBB99_6
11851 ; RV32ZVE32F-NEXT: .LBB99_14: # %cond.store9
11852 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
11853 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
11854 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
11855 ; RV32ZVE32F-NEXT: fsd fa5, 0(a0)
11856 ; RV32ZVE32F-NEXT: andi a0, a1, 64
11857 ; RV32ZVE32F-NEXT: beqz a0, .LBB99_7
11858 ; RV32ZVE32F-NEXT: .LBB99_15: # %cond.store11
11859 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
11860 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
11861 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
11862 ; RV32ZVE32F-NEXT: fsd fa6, 0(a0)
11863 ; RV32ZVE32F-NEXT: andi a0, a1, -128
11864 ; RV32ZVE32F-NEXT: beqz a0, .LBB99_8
11865 ; RV32ZVE32F-NEXT: .LBB99_16: # %cond.store13
11866 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
11867 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
11868 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
11869 ; RV32ZVE32F-NEXT: fsd fa7, 0(a0)
11870 ; RV32ZVE32F-NEXT: ret
11872 ; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i32_v8f64:
11873 ; RV64ZVE32F: # %bb.0:
11874 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
11875 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
11876 ; RV64ZVE32F-NEXT: andi a2, a1, 1
11877 ; RV64ZVE32F-NEXT: beqz a2, .LBB99_2
11878 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
11879 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
11880 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
11881 ; RV64ZVE32F-NEXT: slli a2, a2, 32
11882 ; RV64ZVE32F-NEXT: srli a2, a2, 29
11883 ; RV64ZVE32F-NEXT: add a2, a0, a2
11884 ; RV64ZVE32F-NEXT: fsd fa0, 0(a2)
11885 ; RV64ZVE32F-NEXT: .LBB99_2: # %else
11886 ; RV64ZVE32F-NEXT: andi a2, a1, 2
11887 ; RV64ZVE32F-NEXT: beqz a2, .LBB99_4
11888 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
11889 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
11890 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
11891 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
11892 ; RV64ZVE32F-NEXT: slli a2, a2, 32
11893 ; RV64ZVE32F-NEXT: srli a2, a2, 29
11894 ; RV64ZVE32F-NEXT: add a2, a0, a2
11895 ; RV64ZVE32F-NEXT: fsd fa1, 0(a2)
11896 ; RV64ZVE32F-NEXT: .LBB99_4: # %else2
11897 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
11898 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
11899 ; RV64ZVE32F-NEXT: andi a2, a1, 4
11900 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
11901 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
11902 ; RV64ZVE32F-NEXT: bnez a2, .LBB99_12
11903 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
11904 ; RV64ZVE32F-NEXT: andi a2, a1, 8
11905 ; RV64ZVE32F-NEXT: bnez a2, .LBB99_13
11906 ; RV64ZVE32F-NEXT: .LBB99_6: # %else6
11907 ; RV64ZVE32F-NEXT: andi a2, a1, 16
11908 ; RV64ZVE32F-NEXT: bnez a2, .LBB99_14
11909 ; RV64ZVE32F-NEXT: .LBB99_7: # %else8
11910 ; RV64ZVE32F-NEXT: andi a2, a1, 32
11911 ; RV64ZVE32F-NEXT: beqz a2, .LBB99_9
11912 ; RV64ZVE32F-NEXT: .LBB99_8: # %cond.store9
11913 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
11914 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
11915 ; RV64ZVE32F-NEXT: slli a2, a2, 32
11916 ; RV64ZVE32F-NEXT: srli a2, a2, 29
11917 ; RV64ZVE32F-NEXT: add a2, a0, a2
11918 ; RV64ZVE32F-NEXT: fsd fa5, 0(a2)
11919 ; RV64ZVE32F-NEXT: .LBB99_9: # %else10
11920 ; RV64ZVE32F-NEXT: andi a2, a1, 64
11921 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
11922 ; RV64ZVE32F-NEXT: bnez a2, .LBB99_15
11923 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
11924 ; RV64ZVE32F-NEXT: andi a1, a1, -128
11925 ; RV64ZVE32F-NEXT: bnez a1, .LBB99_16
11926 ; RV64ZVE32F-NEXT: .LBB99_11: # %else14
11927 ; RV64ZVE32F-NEXT: ret
11928 ; RV64ZVE32F-NEXT: .LBB99_12: # %cond.store3
11929 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
11930 ; RV64ZVE32F-NEXT: slli a2, a2, 32
11931 ; RV64ZVE32F-NEXT: srli a2, a2, 29
11932 ; RV64ZVE32F-NEXT: add a2, a0, a2
11933 ; RV64ZVE32F-NEXT: fsd fa2, 0(a2)
11934 ; RV64ZVE32F-NEXT: andi a2, a1, 8
11935 ; RV64ZVE32F-NEXT: beqz a2, .LBB99_6
11936 ; RV64ZVE32F-NEXT: .LBB99_13: # %cond.store5
11937 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
11938 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
11939 ; RV64ZVE32F-NEXT: slli a2, a2, 32
11940 ; RV64ZVE32F-NEXT: srli a2, a2, 29
11941 ; RV64ZVE32F-NEXT: add a2, a0, a2
11942 ; RV64ZVE32F-NEXT: fsd fa3, 0(a2)
11943 ; RV64ZVE32F-NEXT: andi a2, a1, 16
11944 ; RV64ZVE32F-NEXT: beqz a2, .LBB99_7
11945 ; RV64ZVE32F-NEXT: .LBB99_14: # %cond.store7
11946 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
11947 ; RV64ZVE32F-NEXT: slli a2, a2, 32
11948 ; RV64ZVE32F-NEXT: srli a2, a2, 29
11949 ; RV64ZVE32F-NEXT: add a2, a0, a2
11950 ; RV64ZVE32F-NEXT: fsd fa4, 0(a2)
11951 ; RV64ZVE32F-NEXT: andi a2, a1, 32
11952 ; RV64ZVE32F-NEXT: bnez a2, .LBB99_8
11953 ; RV64ZVE32F-NEXT: j .LBB99_9
11954 ; RV64ZVE32F-NEXT: .LBB99_15: # %cond.store11
11955 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
11956 ; RV64ZVE32F-NEXT: slli a2, a2, 32
11957 ; RV64ZVE32F-NEXT: srli a2, a2, 29
11958 ; RV64ZVE32F-NEXT: add a2, a0, a2
11959 ; RV64ZVE32F-NEXT: fsd fa6, 0(a2)
11960 ; RV64ZVE32F-NEXT: andi a1, a1, -128
11961 ; RV64ZVE32F-NEXT: beqz a1, .LBB99_11
11962 ; RV64ZVE32F-NEXT: .LBB99_16: # %cond.store13
11963 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
11964 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8
11965 ; RV64ZVE32F-NEXT: slli a1, a1, 32
11966 ; RV64ZVE32F-NEXT: srli a1, a1, 29
11967 ; RV64ZVE32F-NEXT: add a0, a0, a1
11968 ; RV64ZVE32F-NEXT: fsd fa7, 0(a0)
11969 ; RV64ZVE32F-NEXT: ret
11970 %eidxs = zext <8 x i32> %idxs to <8 x i64>
11971 %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
11972 call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m)
11976 define void @mscatter_baseidx_v8f64(<8 x double> %val, ptr %base, <8 x i64> %idxs, <8 x i1> %m) {
11977 ; RV32V-LABEL: mscatter_baseidx_v8f64:
11979 ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
11980 ; RV32V-NEXT: vnsrl.wi v16, v12, 0
11981 ; RV32V-NEXT: vsll.vi v12, v16, 3
11982 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
11983 ; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t
11986 ; RV64V-LABEL: mscatter_baseidx_v8f64:
11988 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
11989 ; RV64V-NEXT: vsll.vi v12, v12, 3
11990 ; RV64V-NEXT: vsoxei64.v v8, (a0), v12, v0.t
11993 ; RV32ZVE32F-LABEL: mscatter_baseidx_v8f64:
11994 ; RV32ZVE32F: # %bb.0:
11995 ; RV32ZVE32F-NEXT: lw a2, 32(a1)
11996 ; RV32ZVE32F-NEXT: lw a3, 40(a1)
11997 ; RV32ZVE32F-NEXT: lw a4, 48(a1)
11998 ; RV32ZVE32F-NEXT: lw a5, 56(a1)
11999 ; RV32ZVE32F-NEXT: lw a6, 0(a1)
12000 ; RV32ZVE32F-NEXT: lw a7, 8(a1)
12001 ; RV32ZVE32F-NEXT: lw t0, 16(a1)
12002 ; RV32ZVE32F-NEXT: lw t1, 24(a1)
12003 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
12004 ; RV32ZVE32F-NEXT: vmv.v.x v8, a6
12005 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
12006 ; RV32ZVE32F-NEXT: vmv.x.s a1, v0
12007 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
12008 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a7
12009 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, t0
12010 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, t1
12011 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a2
12012 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a3
12013 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a4
12014 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a5
12015 ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3
12016 ; RV32ZVE32F-NEXT: andi a2, a1, 1
12017 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0
12018 ; RV32ZVE32F-NEXT: bnez a2, .LBB100_9
12019 ; RV32ZVE32F-NEXT: # %bb.1: # %else
12020 ; RV32ZVE32F-NEXT: andi a0, a1, 2
12021 ; RV32ZVE32F-NEXT: bnez a0, .LBB100_10
12022 ; RV32ZVE32F-NEXT: .LBB100_2: # %else2
12023 ; RV32ZVE32F-NEXT: andi a0, a1, 4
12024 ; RV32ZVE32F-NEXT: bnez a0, .LBB100_11
12025 ; RV32ZVE32F-NEXT: .LBB100_3: # %else4
12026 ; RV32ZVE32F-NEXT: andi a0, a1, 8
12027 ; RV32ZVE32F-NEXT: bnez a0, .LBB100_12
12028 ; RV32ZVE32F-NEXT: .LBB100_4: # %else6
12029 ; RV32ZVE32F-NEXT: andi a0, a1, 16
12030 ; RV32ZVE32F-NEXT: bnez a0, .LBB100_13
12031 ; RV32ZVE32F-NEXT: .LBB100_5: # %else8
12032 ; RV32ZVE32F-NEXT: andi a0, a1, 32
12033 ; RV32ZVE32F-NEXT: bnez a0, .LBB100_14
12034 ; RV32ZVE32F-NEXT: .LBB100_6: # %else10
12035 ; RV32ZVE32F-NEXT: andi a0, a1, 64
12036 ; RV32ZVE32F-NEXT: bnez a0, .LBB100_15
12037 ; RV32ZVE32F-NEXT: .LBB100_7: # %else12
12038 ; RV32ZVE32F-NEXT: andi a0, a1, -128
12039 ; RV32ZVE32F-NEXT: bnez a0, .LBB100_16
12040 ; RV32ZVE32F-NEXT: .LBB100_8: # %else14
12041 ; RV32ZVE32F-NEXT: ret
12042 ; RV32ZVE32F-NEXT: .LBB100_9: # %cond.store
12043 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
12044 ; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
12045 ; RV32ZVE32F-NEXT: andi a0, a1, 2
12046 ; RV32ZVE32F-NEXT: beqz a0, .LBB100_2
12047 ; RV32ZVE32F-NEXT: .LBB100_10: # %cond.store1
12048 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
12049 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
12050 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
12051 ; RV32ZVE32F-NEXT: fsd fa1, 0(a0)
12052 ; RV32ZVE32F-NEXT: andi a0, a1, 4
12053 ; RV32ZVE32F-NEXT: beqz a0, .LBB100_3
12054 ; RV32ZVE32F-NEXT: .LBB100_11: # %cond.store3
12055 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
12056 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
12057 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
12058 ; RV32ZVE32F-NEXT: fsd fa2, 0(a0)
12059 ; RV32ZVE32F-NEXT: andi a0, a1, 8
12060 ; RV32ZVE32F-NEXT: beqz a0, .LBB100_4
12061 ; RV32ZVE32F-NEXT: .LBB100_12: # %cond.store5
12062 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
12063 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
12064 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
12065 ; RV32ZVE32F-NEXT: fsd fa3, 0(a0)
12066 ; RV32ZVE32F-NEXT: andi a0, a1, 16
12067 ; RV32ZVE32F-NEXT: beqz a0, .LBB100_5
12068 ; RV32ZVE32F-NEXT: .LBB100_13: # %cond.store7
12069 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
12070 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
12071 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
12072 ; RV32ZVE32F-NEXT: fsd fa4, 0(a0)
12073 ; RV32ZVE32F-NEXT: andi a0, a1, 32
12074 ; RV32ZVE32F-NEXT: beqz a0, .LBB100_6
12075 ; RV32ZVE32F-NEXT: .LBB100_14: # %cond.store9
12076 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
12077 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
12078 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
12079 ; RV32ZVE32F-NEXT: fsd fa5, 0(a0)
12080 ; RV32ZVE32F-NEXT: andi a0, a1, 64
12081 ; RV32ZVE32F-NEXT: beqz a0, .LBB100_7
12082 ; RV32ZVE32F-NEXT: .LBB100_15: # %cond.store11
12083 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
12084 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
12085 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
12086 ; RV32ZVE32F-NEXT: fsd fa6, 0(a0)
12087 ; RV32ZVE32F-NEXT: andi a0, a1, -128
12088 ; RV32ZVE32F-NEXT: beqz a0, .LBB100_8
12089 ; RV32ZVE32F-NEXT: .LBB100_16: # %cond.store13
12090 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
12091 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
12092 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
12093 ; RV32ZVE32F-NEXT: fsd fa7, 0(a0)
12094 ; RV32ZVE32F-NEXT: ret
12096 ; RV64ZVE32F-LABEL: mscatter_baseidx_v8f64:
12097 ; RV64ZVE32F: # %bb.0:
12098 ; RV64ZVE32F-NEXT: ld t1, 8(a1)
12099 ; RV64ZVE32F-NEXT: ld t0, 16(a1)
12100 ; RV64ZVE32F-NEXT: ld a7, 24(a1)
12101 ; RV64ZVE32F-NEXT: ld a6, 32(a1)
12102 ; RV64ZVE32F-NEXT: ld a5, 40(a1)
12103 ; RV64ZVE32F-NEXT: ld a4, 48(a1)
12104 ; RV64ZVE32F-NEXT: ld a2, 56(a1)
12105 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
12106 ; RV64ZVE32F-NEXT: vmv.x.s a3, v0
12107 ; RV64ZVE32F-NEXT: andi t2, a3, 1
12108 ; RV64ZVE32F-NEXT: bnez t2, .LBB100_9
12109 ; RV64ZVE32F-NEXT: # %bb.1: # %else
12110 ; RV64ZVE32F-NEXT: andi a1, a3, 2
12111 ; RV64ZVE32F-NEXT: bnez a1, .LBB100_10
12112 ; RV64ZVE32F-NEXT: .LBB100_2: # %else2
12113 ; RV64ZVE32F-NEXT: andi a1, a3, 4
12114 ; RV64ZVE32F-NEXT: bnez a1, .LBB100_11
12115 ; RV64ZVE32F-NEXT: .LBB100_3: # %else4
12116 ; RV64ZVE32F-NEXT: andi a1, a3, 8
12117 ; RV64ZVE32F-NEXT: bnez a1, .LBB100_12
12118 ; RV64ZVE32F-NEXT: .LBB100_4: # %else6
12119 ; RV64ZVE32F-NEXT: andi a1, a3, 16
12120 ; RV64ZVE32F-NEXT: bnez a1, .LBB100_13
12121 ; RV64ZVE32F-NEXT: .LBB100_5: # %else8
12122 ; RV64ZVE32F-NEXT: andi a1, a3, 32
12123 ; RV64ZVE32F-NEXT: bnez a1, .LBB100_14
12124 ; RV64ZVE32F-NEXT: .LBB100_6: # %else10
12125 ; RV64ZVE32F-NEXT: andi a1, a3, 64
12126 ; RV64ZVE32F-NEXT: bnez a1, .LBB100_15
12127 ; RV64ZVE32F-NEXT: .LBB100_7: # %else12
12128 ; RV64ZVE32F-NEXT: andi a1, a3, -128
12129 ; RV64ZVE32F-NEXT: bnez a1, .LBB100_16
12130 ; RV64ZVE32F-NEXT: .LBB100_8: # %else14
12131 ; RV64ZVE32F-NEXT: ret
12132 ; RV64ZVE32F-NEXT: .LBB100_9: # %cond.store
12133 ; RV64ZVE32F-NEXT: ld a1, 0(a1)
12134 ; RV64ZVE32F-NEXT: slli a1, a1, 3
12135 ; RV64ZVE32F-NEXT: add a1, a0, a1
12136 ; RV64ZVE32F-NEXT: fsd fa0, 0(a1)
12137 ; RV64ZVE32F-NEXT: andi a1, a3, 2
12138 ; RV64ZVE32F-NEXT: beqz a1, .LBB100_2
12139 ; RV64ZVE32F-NEXT: .LBB100_10: # %cond.store1
12140 ; RV64ZVE32F-NEXT: slli t1, t1, 3
12141 ; RV64ZVE32F-NEXT: add t1, a0, t1
12142 ; RV64ZVE32F-NEXT: fsd fa1, 0(t1)
12143 ; RV64ZVE32F-NEXT: andi a1, a3, 4
12144 ; RV64ZVE32F-NEXT: beqz a1, .LBB100_3
12145 ; RV64ZVE32F-NEXT: .LBB100_11: # %cond.store3
12146 ; RV64ZVE32F-NEXT: slli t0, t0, 3
12147 ; RV64ZVE32F-NEXT: add t0, a0, t0
12148 ; RV64ZVE32F-NEXT: fsd fa2, 0(t0)
12149 ; RV64ZVE32F-NEXT: andi a1, a3, 8
12150 ; RV64ZVE32F-NEXT: beqz a1, .LBB100_4
12151 ; RV64ZVE32F-NEXT: .LBB100_12: # %cond.store5
12152 ; RV64ZVE32F-NEXT: slli a7, a7, 3
12153 ; RV64ZVE32F-NEXT: add a7, a0, a7
12154 ; RV64ZVE32F-NEXT: fsd fa3, 0(a7)
12155 ; RV64ZVE32F-NEXT: andi a1, a3, 16
12156 ; RV64ZVE32F-NEXT: beqz a1, .LBB100_5
12157 ; RV64ZVE32F-NEXT: .LBB100_13: # %cond.store7
12158 ; RV64ZVE32F-NEXT: slli a6, a6, 3
12159 ; RV64ZVE32F-NEXT: add a6, a0, a6
12160 ; RV64ZVE32F-NEXT: fsd fa4, 0(a6)
12161 ; RV64ZVE32F-NEXT: andi a1, a3, 32
12162 ; RV64ZVE32F-NEXT: beqz a1, .LBB100_6
12163 ; RV64ZVE32F-NEXT: .LBB100_14: # %cond.store9
12164 ; RV64ZVE32F-NEXT: slli a5, a5, 3
12165 ; RV64ZVE32F-NEXT: add a5, a0, a5
12166 ; RV64ZVE32F-NEXT: fsd fa5, 0(a5)
12167 ; RV64ZVE32F-NEXT: andi a1, a3, 64
12168 ; RV64ZVE32F-NEXT: beqz a1, .LBB100_7
12169 ; RV64ZVE32F-NEXT: .LBB100_15: # %cond.store11
12170 ; RV64ZVE32F-NEXT: slli a4, a4, 3
12171 ; RV64ZVE32F-NEXT: add a4, a0, a4
12172 ; RV64ZVE32F-NEXT: fsd fa6, 0(a4)
12173 ; RV64ZVE32F-NEXT: andi a1, a3, -128
12174 ; RV64ZVE32F-NEXT: beqz a1, .LBB100_8
12175 ; RV64ZVE32F-NEXT: .LBB100_16: # %cond.store13
12176 ; RV64ZVE32F-NEXT: slli a2, a2, 3
12177 ; RV64ZVE32F-NEXT: add a0, a0, a2
12178 ; RV64ZVE32F-NEXT: fsd fa7, 0(a0)
12179 ; RV64ZVE32F-NEXT: ret
12180 %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %idxs
12181 call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m)
12185 declare void @llvm.masked.scatter.v16i8.v16p0(<16 x i8>, <16 x ptr>, i32, <16 x i1>)
12187 define void @mscatter_baseidx_v16i8(<16 x i8> %val, ptr %base, <16 x i8> %idxs, <16 x i1> %m) {
12188 ; RV32-LABEL: mscatter_baseidx_v16i8:
12190 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
12191 ; RV32-NEXT: vsext.vf4 v12, v9
12192 ; RV32-NEXT: vsetvli zero, zero, e8, m1, ta, ma
12193 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
12196 ; RV64V-LABEL: mscatter_baseidx_v16i8:
12198 ; RV64V-NEXT: vsetivli zero, 16, e64, m8, ta, ma
12199 ; RV64V-NEXT: vsext.vf8 v16, v9
12200 ; RV64V-NEXT: vsetvli zero, zero, e8, m1, ta, ma
12201 ; RV64V-NEXT: vsoxei64.v v8, (a0), v16, v0.t
12204 ; RV64ZVE32F-LABEL: mscatter_baseidx_v16i8:
12205 ; RV64ZVE32F: # %bb.0:
12206 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
12207 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
12208 ; RV64ZVE32F-NEXT: andi a2, a1, 1
12209 ; RV64ZVE32F-NEXT: beqz a2, .LBB101_2
12210 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
12211 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
12212 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
12213 ; RV64ZVE32F-NEXT: add a2, a0, a2
12214 ; RV64ZVE32F-NEXT: vse8.v v8, (a2)
12215 ; RV64ZVE32F-NEXT: .LBB101_2: # %else
12216 ; RV64ZVE32F-NEXT: andi a2, a1, 2
12217 ; RV64ZVE32F-NEXT: beqz a2, .LBB101_4
12218 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
12219 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
12220 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1
12221 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
12222 ; RV64ZVE32F-NEXT: add a2, a0, a2
12223 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
12224 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
12225 ; RV64ZVE32F-NEXT: vse8.v v10, (a2)
12226 ; RV64ZVE32F-NEXT: .LBB101_4: # %else2
12227 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
12228 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4
12229 ; RV64ZVE32F-NEXT: andi a2, a1, 4
12230 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
12231 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v9, 2
12232 ; RV64ZVE32F-NEXT: bnez a2, .LBB101_25
12233 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
12234 ; RV64ZVE32F-NEXT: andi a2, a1, 8
12235 ; RV64ZVE32F-NEXT: bnez a2, .LBB101_26
12236 ; RV64ZVE32F-NEXT: .LBB101_6: # %else6
12237 ; RV64ZVE32F-NEXT: andi a2, a1, 16
12238 ; RV64ZVE32F-NEXT: beqz a2, .LBB101_8
12239 ; RV64ZVE32F-NEXT: .LBB101_7: # %cond.store7
12240 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
12241 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
12242 ; RV64ZVE32F-NEXT: add a2, a0, a2
12243 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 4
12244 ; RV64ZVE32F-NEXT: vse8.v v11, (a2)
12245 ; RV64ZVE32F-NEXT: .LBB101_8: # %else8
12246 ; RV64ZVE32F-NEXT: andi a2, a1, 32
12247 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, ta, ma
12248 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 8
12249 ; RV64ZVE32F-NEXT: beqz a2, .LBB101_10
12250 ; RV64ZVE32F-NEXT: # %bb.9: # %cond.store9
12251 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
12252 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1
12253 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
12254 ; RV64ZVE32F-NEXT: add a2, a0, a2
12255 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
12256 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 5
12257 ; RV64ZVE32F-NEXT: vse8.v v11, (a2)
12258 ; RV64ZVE32F-NEXT: .LBB101_10: # %else10
12259 ; RV64ZVE32F-NEXT: andi a2, a1, 64
12260 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
12261 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
12262 ; RV64ZVE32F-NEXT: bnez a2, .LBB101_27
12263 ; RV64ZVE32F-NEXT: # %bb.11: # %else12
12264 ; RV64ZVE32F-NEXT: andi a2, a1, 128
12265 ; RV64ZVE32F-NEXT: bnez a2, .LBB101_28
12266 ; RV64ZVE32F-NEXT: .LBB101_12: # %else14
12267 ; RV64ZVE32F-NEXT: andi a2, a1, 256
12268 ; RV64ZVE32F-NEXT: bnez a2, .LBB101_29
12269 ; RV64ZVE32F-NEXT: .LBB101_13: # %else16
12270 ; RV64ZVE32F-NEXT: andi a2, a1, 512
12271 ; RV64ZVE32F-NEXT: beqz a2, .LBB101_15
12272 ; RV64ZVE32F-NEXT: .LBB101_14: # %cond.store17
12273 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
12274 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1
12275 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
12276 ; RV64ZVE32F-NEXT: add a2, a0, a2
12277 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
12278 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 9
12279 ; RV64ZVE32F-NEXT: vse8.v v10, (a2)
12280 ; RV64ZVE32F-NEXT: .LBB101_15: # %else18
12281 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
12282 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4
12283 ; RV64ZVE32F-NEXT: andi a2, a1, 1024
12284 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
12285 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2
12286 ; RV64ZVE32F-NEXT: bnez a2, .LBB101_30
12287 ; RV64ZVE32F-NEXT: # %bb.16: # %else20
12288 ; RV64ZVE32F-NEXT: slli a2, a1, 52
12289 ; RV64ZVE32F-NEXT: bltz a2, .LBB101_31
12290 ; RV64ZVE32F-NEXT: .LBB101_17: # %else22
12291 ; RV64ZVE32F-NEXT: slli a2, a1, 51
12292 ; RV64ZVE32F-NEXT: bltz a2, .LBB101_32
12293 ; RV64ZVE32F-NEXT: .LBB101_18: # %else24
12294 ; RV64ZVE32F-NEXT: slli a2, a1, 50
12295 ; RV64ZVE32F-NEXT: bgez a2, .LBB101_20
12296 ; RV64ZVE32F-NEXT: .LBB101_19: # %cond.store25
12297 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
12298 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 1
12299 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
12300 ; RV64ZVE32F-NEXT: add a2, a0, a2
12301 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
12302 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 13
12303 ; RV64ZVE32F-NEXT: vse8.v v9, (a2)
12304 ; RV64ZVE32F-NEXT: .LBB101_20: # %else26
12305 ; RV64ZVE32F-NEXT: slli a2, a1, 49
12306 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
12307 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 2
12308 ; RV64ZVE32F-NEXT: bgez a2, .LBB101_22
12309 ; RV64ZVE32F-NEXT: # %bb.21: # %cond.store27
12310 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
12311 ; RV64ZVE32F-NEXT: add a2, a0, a2
12312 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
12313 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 14
12314 ; RV64ZVE32F-NEXT: vse8.v v10, (a2)
12315 ; RV64ZVE32F-NEXT: .LBB101_22: # %else28
12316 ; RV64ZVE32F-NEXT: lui a2, 1048568
12317 ; RV64ZVE32F-NEXT: and a1, a1, a2
12318 ; RV64ZVE32F-NEXT: beqz a1, .LBB101_24
12319 ; RV64ZVE32F-NEXT: # %bb.23: # %cond.store29
12320 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
12321 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
12322 ; RV64ZVE32F-NEXT: vmv.x.s a1, v9
12323 ; RV64ZVE32F-NEXT: add a0, a0, a1
12324 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
12325 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 15
12326 ; RV64ZVE32F-NEXT: vse8.v v8, (a0)
12327 ; RV64ZVE32F-NEXT: .LBB101_24: # %else30
12328 ; RV64ZVE32F-NEXT: ret
12329 ; RV64ZVE32F-NEXT: .LBB101_25: # %cond.store3
12330 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
12331 ; RV64ZVE32F-NEXT: add a2, a0, a2
12332 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
12333 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2
12334 ; RV64ZVE32F-NEXT: vse8.v v12, (a2)
12335 ; RV64ZVE32F-NEXT: andi a2, a1, 8
12336 ; RV64ZVE32F-NEXT: beqz a2, .LBB101_6
12337 ; RV64ZVE32F-NEXT: .LBB101_26: # %cond.store5
12338 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
12339 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1
12340 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
12341 ; RV64ZVE32F-NEXT: add a2, a0, a2
12342 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
12343 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 3
12344 ; RV64ZVE32F-NEXT: vse8.v v11, (a2)
12345 ; RV64ZVE32F-NEXT: andi a2, a1, 16
12346 ; RV64ZVE32F-NEXT: bnez a2, .LBB101_7
12347 ; RV64ZVE32F-NEXT: j .LBB101_8
12348 ; RV64ZVE32F-NEXT: .LBB101_27: # %cond.store11
12349 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
12350 ; RV64ZVE32F-NEXT: add a2, a0, a2
12351 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
12352 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 6
12353 ; RV64ZVE32F-NEXT: vse8.v v11, (a2)
12354 ; RV64ZVE32F-NEXT: andi a2, a1, 128
12355 ; RV64ZVE32F-NEXT: beqz a2, .LBB101_12
12356 ; RV64ZVE32F-NEXT: .LBB101_28: # %cond.store13
12357 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
12358 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
12359 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
12360 ; RV64ZVE32F-NEXT: add a2, a0, a2
12361 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
12362 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 7
12363 ; RV64ZVE32F-NEXT: vse8.v v10, (a2)
12364 ; RV64ZVE32F-NEXT: andi a2, a1, 256
12365 ; RV64ZVE32F-NEXT: beqz a2, .LBB101_13
12366 ; RV64ZVE32F-NEXT: .LBB101_29: # %cond.store15
12367 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
12368 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
12369 ; RV64ZVE32F-NEXT: add a2, a0, a2
12370 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 8
12371 ; RV64ZVE32F-NEXT: vse8.v v10, (a2)
12372 ; RV64ZVE32F-NEXT: andi a2, a1, 512
12373 ; RV64ZVE32F-NEXT: bnez a2, .LBB101_14
12374 ; RV64ZVE32F-NEXT: j .LBB101_15
12375 ; RV64ZVE32F-NEXT: .LBB101_30: # %cond.store19
12376 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
12377 ; RV64ZVE32F-NEXT: add a2, a0, a2
12378 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
12379 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 10
12380 ; RV64ZVE32F-NEXT: vse8.v v11, (a2)
12381 ; RV64ZVE32F-NEXT: slli a2, a1, 52
12382 ; RV64ZVE32F-NEXT: bgez a2, .LBB101_17
12383 ; RV64ZVE32F-NEXT: .LBB101_31: # %cond.store21
12384 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
12385 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
12386 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
12387 ; RV64ZVE32F-NEXT: add a2, a0, a2
12388 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
12389 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 11
12390 ; RV64ZVE32F-NEXT: vse8.v v9, (a2)
12391 ; RV64ZVE32F-NEXT: slli a2, a1, 51
12392 ; RV64ZVE32F-NEXT: bgez a2, .LBB101_18
12393 ; RV64ZVE32F-NEXT: .LBB101_32: # %cond.store23
12394 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
12395 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
12396 ; RV64ZVE32F-NEXT: add a2, a0, a2
12397 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 12
12398 ; RV64ZVE32F-NEXT: vse8.v v9, (a2)
12399 ; RV64ZVE32F-NEXT: slli a2, a1, 50
12400 ; RV64ZVE32F-NEXT: bltz a2, .LBB101_19
12401 ; RV64ZVE32F-NEXT: j .LBB101_20
12402 %ptrs = getelementptr inbounds i8, ptr %base, <16 x i8> %idxs
12403 call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> %val, <16 x ptr> %ptrs, i32 1, <16 x i1> %m)
12407 declare void @llvm.masked.scatter.v32i8.v32p0(<32 x i8>, <32 x ptr>, i32, <32 x i1>)
12409 define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs, <32 x i1> %m) {
12410 ; RV32-LABEL: mscatter_baseidx_v32i8:
12412 ; RV32-NEXT: li a1, 32
12413 ; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma
12414 ; RV32-NEXT: vsext.vf4 v16, v10
12415 ; RV32-NEXT: vsetvli zero, zero, e8, m2, ta, ma
12416 ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
12419 ; RV64V-LABEL: mscatter_baseidx_v32i8:
12421 ; RV64V-NEXT: vsetivli zero, 16, e64, m8, ta, ma
12422 ; RV64V-NEXT: vsext.vf8 v16, v10
12423 ; RV64V-NEXT: vsetvli zero, zero, e8, m1, ta, ma
12424 ; RV64V-NEXT: vsoxei64.v v8, (a0), v16, v0.t
12425 ; RV64V-NEXT: vsetivli zero, 16, e8, m2, ta, ma
12426 ; RV64V-NEXT: vslidedown.vi v8, v8, 16
12427 ; RV64V-NEXT: vslidedown.vi v10, v10, 16
12428 ; RV64V-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
12429 ; RV64V-NEXT: vslidedown.vi v0, v0, 2
12430 ; RV64V-NEXT: vsetivli zero, 16, e64, m8, ta, ma
12431 ; RV64V-NEXT: vsext.vf8 v16, v10
12432 ; RV64V-NEXT: vsetvli zero, zero, e8, m1, ta, ma
12433 ; RV64V-NEXT: vsoxei64.v v8, (a0), v16, v0.t
12436 ; RV64ZVE32F-LABEL: mscatter_baseidx_v32i8:
12437 ; RV64ZVE32F: # %bb.0:
12438 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
12439 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
12440 ; RV64ZVE32F-NEXT: andi a2, a1, 1
12441 ; RV64ZVE32F-NEXT: beqz a2, .LBB102_2
12442 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
12443 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
12444 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
12445 ; RV64ZVE32F-NEXT: add a2, a0, a2
12446 ; RV64ZVE32F-NEXT: vse8.v v8, (a2)
12447 ; RV64ZVE32F-NEXT: .LBB102_2: # %else
12448 ; RV64ZVE32F-NEXT: andi a2, a1, 2
12449 ; RV64ZVE32F-NEXT: beqz a2, .LBB102_4
12450 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
12451 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
12452 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v10, 1
12453 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12
12454 ; RV64ZVE32F-NEXT: add a2, a0, a2
12455 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
12456 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 1
12457 ; RV64ZVE32F-NEXT: vse8.v v12, (a2)
12458 ; RV64ZVE32F-NEXT: .LBB102_4: # %else2
12459 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
12460 ; RV64ZVE32F-NEXT: vslidedown.vi v13, v10, 4
12461 ; RV64ZVE32F-NEXT: andi a2, a1, 4
12462 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
12463 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v10, 2
12464 ; RV64ZVE32F-NEXT: bnez a2, .LBB102_49
12465 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
12466 ; RV64ZVE32F-NEXT: andi a2, a1, 8
12467 ; RV64ZVE32F-NEXT: bnez a2, .LBB102_50
12468 ; RV64ZVE32F-NEXT: .LBB102_6: # %else6
12469 ; RV64ZVE32F-NEXT: andi a2, a1, 16
12470 ; RV64ZVE32F-NEXT: beqz a2, .LBB102_8
12471 ; RV64ZVE32F-NEXT: .LBB102_7: # %cond.store7
12472 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
12473 ; RV64ZVE32F-NEXT: vmv.x.s a2, v13
12474 ; RV64ZVE32F-NEXT: add a2, a0, a2
12475 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
12476 ; RV64ZVE32F-NEXT: vse8.v v12, (a2)
12477 ; RV64ZVE32F-NEXT: .LBB102_8: # %else8
12478 ; RV64ZVE32F-NEXT: andi a2, a1, 32
12479 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, ta, ma
12480 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v10, 8
12481 ; RV64ZVE32F-NEXT: beqz a2, .LBB102_10
12482 ; RV64ZVE32F-NEXT: # %bb.9: # %cond.store9
12483 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
12484 ; RV64ZVE32F-NEXT: vslidedown.vi v14, v13, 1
12485 ; RV64ZVE32F-NEXT: vmv.x.s a2, v14
12486 ; RV64ZVE32F-NEXT: add a2, a0, a2
12487 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
12488 ; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 5
12489 ; RV64ZVE32F-NEXT: vse8.v v14, (a2)
12490 ; RV64ZVE32F-NEXT: .LBB102_10: # %else10
12491 ; RV64ZVE32F-NEXT: andi a2, a1, 64
12492 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
12493 ; RV64ZVE32F-NEXT: vslidedown.vi v13, v13, 2
12494 ; RV64ZVE32F-NEXT: bnez a2, .LBB102_51
12495 ; RV64ZVE32F-NEXT: # %bb.11: # %else12
12496 ; RV64ZVE32F-NEXT: andi a2, a1, 128
12497 ; RV64ZVE32F-NEXT: bnez a2, .LBB102_52
12498 ; RV64ZVE32F-NEXT: .LBB102_12: # %else14
12499 ; RV64ZVE32F-NEXT: andi a2, a1, 256
12500 ; RV64ZVE32F-NEXT: bnez a2, .LBB102_53
12501 ; RV64ZVE32F-NEXT: .LBB102_13: # %else16
12502 ; RV64ZVE32F-NEXT: andi a2, a1, 512
12503 ; RV64ZVE32F-NEXT: beqz a2, .LBB102_15
12504 ; RV64ZVE32F-NEXT: .LBB102_14: # %cond.store17
12505 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
12506 ; RV64ZVE32F-NEXT: vslidedown.vi v13, v12, 1
12507 ; RV64ZVE32F-NEXT: vmv.x.s a2, v13
12508 ; RV64ZVE32F-NEXT: add a2, a0, a2
12509 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
12510 ; RV64ZVE32F-NEXT: vslidedown.vi v13, v8, 9
12511 ; RV64ZVE32F-NEXT: vse8.v v13, (a2)
12512 ; RV64ZVE32F-NEXT: .LBB102_15: # %else18
12513 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
12514 ; RV64ZVE32F-NEXT: vslidedown.vi v13, v12, 4
12515 ; RV64ZVE32F-NEXT: andi a2, a1, 1024
12516 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
12517 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 2
12518 ; RV64ZVE32F-NEXT: beqz a2, .LBB102_17
12519 ; RV64ZVE32F-NEXT: # %bb.16: # %cond.store19
12520 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12
12521 ; RV64ZVE32F-NEXT: add a2, a0, a2
12522 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
12523 ; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 10
12524 ; RV64ZVE32F-NEXT: vse8.v v14, (a2)
12525 ; RV64ZVE32F-NEXT: .LBB102_17: # %else20
12526 ; RV64ZVE32F-NEXT: slli a2, a1, 52
12527 ; RV64ZVE32F-NEXT: bgez a2, .LBB102_19
12528 ; RV64ZVE32F-NEXT: # %bb.18: # %cond.store21
12529 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
12530 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 1
12531 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12
12532 ; RV64ZVE32F-NEXT: add a2, a0, a2
12533 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
12534 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 11
12535 ; RV64ZVE32F-NEXT: vse8.v v12, (a2)
12536 ; RV64ZVE32F-NEXT: .LBB102_19: # %else22
12537 ; RV64ZVE32F-NEXT: slli a2, a1, 51
12538 ; RV64ZVE32F-NEXT: vsetivli zero, 16, e8, m2, ta, ma
12539 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 16
12540 ; RV64ZVE32F-NEXT: bgez a2, .LBB102_21
12541 ; RV64ZVE32F-NEXT: # %bb.20: # %cond.store23
12542 ; RV64ZVE32F-NEXT: vmv.x.s a2, v13
12543 ; RV64ZVE32F-NEXT: add a2, a0, a2
12544 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
12545 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 12
12546 ; RV64ZVE32F-NEXT: vse8.v v11, (a2)
12547 ; RV64ZVE32F-NEXT: .LBB102_21: # %else24
12548 ; RV64ZVE32F-NEXT: slli a2, a1, 50
12549 ; RV64ZVE32F-NEXT: bgez a2, .LBB102_23
12550 ; RV64ZVE32F-NEXT: # %bb.22: # %cond.store25
12551 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
12552 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v13, 1
12553 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
12554 ; RV64ZVE32F-NEXT: add a2, a0, a2
12555 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
12556 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 13
12557 ; RV64ZVE32F-NEXT: vse8.v v11, (a2)
12558 ; RV64ZVE32F-NEXT: .LBB102_23: # %else26
12559 ; RV64ZVE32F-NEXT: slli a2, a1, 49
12560 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
12561 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v13, 2
12562 ; RV64ZVE32F-NEXT: bltz a2, .LBB102_54
12563 ; RV64ZVE32F-NEXT: # %bb.24: # %else28
12564 ; RV64ZVE32F-NEXT: slli a2, a1, 48
12565 ; RV64ZVE32F-NEXT: bltz a2, .LBB102_55
12566 ; RV64ZVE32F-NEXT: .LBB102_25: # %else30
12567 ; RV64ZVE32F-NEXT: slli a2, a1, 47
12568 ; RV64ZVE32F-NEXT: bltz a2, .LBB102_56
12569 ; RV64ZVE32F-NEXT: .LBB102_26: # %else32
12570 ; RV64ZVE32F-NEXT: slli a2, a1, 46
12571 ; RV64ZVE32F-NEXT: bgez a2, .LBB102_28
12572 ; RV64ZVE32F-NEXT: .LBB102_27: # %cond.store33
12573 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
12574 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1
12575 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
12576 ; RV64ZVE32F-NEXT: add a2, a0, a2
12577 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
12578 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 17
12579 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
12580 ; RV64ZVE32F-NEXT: vse8.v v12, (a2)
12581 ; RV64ZVE32F-NEXT: .LBB102_28: # %else34
12582 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
12583 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4
12584 ; RV64ZVE32F-NEXT: slli a2, a1, 45
12585 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
12586 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v10, 2
12587 ; RV64ZVE32F-NEXT: bltz a2, .LBB102_57
12588 ; RV64ZVE32F-NEXT: # %bb.29: # %else36
12589 ; RV64ZVE32F-NEXT: slli a2, a1, 44
12590 ; RV64ZVE32F-NEXT: bltz a2, .LBB102_58
12591 ; RV64ZVE32F-NEXT: .LBB102_30: # %else38
12592 ; RV64ZVE32F-NEXT: slli a2, a1, 43
12593 ; RV64ZVE32F-NEXT: bgez a2, .LBB102_32
12594 ; RV64ZVE32F-NEXT: .LBB102_31: # %cond.store39
12595 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
12596 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
12597 ; RV64ZVE32F-NEXT: add a2, a0, a2
12598 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 20
12599 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
12600 ; RV64ZVE32F-NEXT: vse8.v v12, (a2)
12601 ; RV64ZVE32F-NEXT: .LBB102_32: # %else40
12602 ; RV64ZVE32F-NEXT: slli a2, a1, 42
12603 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, ta, ma
12604 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 8
12605 ; RV64ZVE32F-NEXT: bgez a2, .LBB102_34
12606 ; RV64ZVE32F-NEXT: # %bb.33: # %cond.store41
12607 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
12608 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v11, 1
12609 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12
12610 ; RV64ZVE32F-NEXT: add a2, a0, a2
12611 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
12612 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 21
12613 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
12614 ; RV64ZVE32F-NEXT: vse8.v v12, (a2)
12615 ; RV64ZVE32F-NEXT: .LBB102_34: # %else42
12616 ; RV64ZVE32F-NEXT: slli a2, a1, 41
12617 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
12618 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 2
12619 ; RV64ZVE32F-NEXT: bltz a2, .LBB102_59
12620 ; RV64ZVE32F-NEXT: # %bb.35: # %else44
12621 ; RV64ZVE32F-NEXT: slli a2, a1, 40
12622 ; RV64ZVE32F-NEXT: bltz a2, .LBB102_60
12623 ; RV64ZVE32F-NEXT: .LBB102_36: # %else46
12624 ; RV64ZVE32F-NEXT: slli a2, a1, 39
12625 ; RV64ZVE32F-NEXT: bltz a2, .LBB102_61
12626 ; RV64ZVE32F-NEXT: .LBB102_37: # %else48
12627 ; RV64ZVE32F-NEXT: slli a2, a1, 38
12628 ; RV64ZVE32F-NEXT: bgez a2, .LBB102_39
12629 ; RV64ZVE32F-NEXT: .LBB102_38: # %cond.store49
12630 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
12631 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1
12632 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
12633 ; RV64ZVE32F-NEXT: add a2, a0, a2
12634 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
12635 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 25
12636 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
12637 ; RV64ZVE32F-NEXT: vse8.v v12, (a2)
12638 ; RV64ZVE32F-NEXT: .LBB102_39: # %else50
12639 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
12640 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4
12641 ; RV64ZVE32F-NEXT: slli a2, a1, 37
12642 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
12643 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
12644 ; RV64ZVE32F-NEXT: bltz a2, .LBB102_62
12645 ; RV64ZVE32F-NEXT: # %bb.40: # %else52
12646 ; RV64ZVE32F-NEXT: slli a2, a1, 36
12647 ; RV64ZVE32F-NEXT: bltz a2, .LBB102_63
12648 ; RV64ZVE32F-NEXT: .LBB102_41: # %else54
12649 ; RV64ZVE32F-NEXT: slli a2, a1, 35
12650 ; RV64ZVE32F-NEXT: bltz a2, .LBB102_64
12651 ; RV64ZVE32F-NEXT: .LBB102_42: # %else56
12652 ; RV64ZVE32F-NEXT: slli a2, a1, 34
12653 ; RV64ZVE32F-NEXT: bgez a2, .LBB102_44
12654 ; RV64ZVE32F-NEXT: .LBB102_43: # %cond.store57
12655 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
12656 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 1
12657 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
12658 ; RV64ZVE32F-NEXT: add a2, a0, a2
12659 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
12660 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 29
12661 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
12662 ; RV64ZVE32F-NEXT: vse8.v v12, (a2)
12663 ; RV64ZVE32F-NEXT: .LBB102_44: # %else58
12664 ; RV64ZVE32F-NEXT: slli a2, a1, 33
12665 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
12666 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2
12667 ; RV64ZVE32F-NEXT: bgez a2, .LBB102_46
12668 ; RV64ZVE32F-NEXT: # %bb.45: # %cond.store59
12669 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
12670 ; RV64ZVE32F-NEXT: add a2, a0, a2
12671 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
12672 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 30
12673 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
12674 ; RV64ZVE32F-NEXT: vse8.v v12, (a2)
12675 ; RV64ZVE32F-NEXT: .LBB102_46: # %else60
12676 ; RV64ZVE32F-NEXT: lui a2, 524288
12677 ; RV64ZVE32F-NEXT: and a1, a1, a2
12678 ; RV64ZVE32F-NEXT: beqz a1, .LBB102_48
12679 ; RV64ZVE32F-NEXT: # %bb.47: # %cond.store61
12680 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
12681 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
12682 ; RV64ZVE32F-NEXT: vmv.x.s a1, v10
12683 ; RV64ZVE32F-NEXT: add a0, a0, a1
12684 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
12685 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 31
12686 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
12687 ; RV64ZVE32F-NEXT: vse8.v v8, (a0)
12688 ; RV64ZVE32F-NEXT: .LBB102_48: # %else62
12689 ; RV64ZVE32F-NEXT: ret
12690 ; RV64ZVE32F-NEXT: .LBB102_49: # %cond.store3
12691 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12
12692 ; RV64ZVE32F-NEXT: add a2, a0, a2
12693 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
12694 ; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 2
12695 ; RV64ZVE32F-NEXT: vse8.v v14, (a2)
12696 ; RV64ZVE32F-NEXT: andi a2, a1, 8
12697 ; RV64ZVE32F-NEXT: beqz a2, .LBB102_6
12698 ; RV64ZVE32F-NEXT: .LBB102_50: # %cond.store5
12699 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
12700 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 1
12701 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12
12702 ; RV64ZVE32F-NEXT: add a2, a0, a2
12703 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
12704 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 3
12705 ; RV64ZVE32F-NEXT: vse8.v v12, (a2)
12706 ; RV64ZVE32F-NEXT: andi a2, a1, 16
12707 ; RV64ZVE32F-NEXT: bnez a2, .LBB102_7
12708 ; RV64ZVE32F-NEXT: j .LBB102_8
12709 ; RV64ZVE32F-NEXT: .LBB102_51: # %cond.store11
12710 ; RV64ZVE32F-NEXT: vmv.x.s a2, v13
12711 ; RV64ZVE32F-NEXT: add a2, a0, a2
12712 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
12713 ; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 6
12714 ; RV64ZVE32F-NEXT: vse8.v v14, (a2)
12715 ; RV64ZVE32F-NEXT: andi a2, a1, 128
12716 ; RV64ZVE32F-NEXT: beqz a2, .LBB102_12
12717 ; RV64ZVE32F-NEXT: .LBB102_52: # %cond.store13
12718 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
12719 ; RV64ZVE32F-NEXT: vslidedown.vi v13, v13, 1
12720 ; RV64ZVE32F-NEXT: vmv.x.s a2, v13
12721 ; RV64ZVE32F-NEXT: add a2, a0, a2
12722 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
12723 ; RV64ZVE32F-NEXT: vslidedown.vi v13, v8, 7
12724 ; RV64ZVE32F-NEXT: vse8.v v13, (a2)
12725 ; RV64ZVE32F-NEXT: andi a2, a1, 256
12726 ; RV64ZVE32F-NEXT: beqz a2, .LBB102_13
12727 ; RV64ZVE32F-NEXT: .LBB102_53: # %cond.store15
12728 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
12729 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12
12730 ; RV64ZVE32F-NEXT: add a2, a0, a2
12731 ; RV64ZVE32F-NEXT: vslidedown.vi v13, v8, 8
12732 ; RV64ZVE32F-NEXT: vse8.v v13, (a2)
12733 ; RV64ZVE32F-NEXT: andi a2, a1, 512
12734 ; RV64ZVE32F-NEXT: bnez a2, .LBB102_14
12735 ; RV64ZVE32F-NEXT: j .LBB102_15
12736 ; RV64ZVE32F-NEXT: .LBB102_54: # %cond.store27
12737 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
12738 ; RV64ZVE32F-NEXT: add a2, a0, a2
12739 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
12740 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 14
12741 ; RV64ZVE32F-NEXT: vse8.v v12, (a2)
12742 ; RV64ZVE32F-NEXT: slli a2, a1, 48
12743 ; RV64ZVE32F-NEXT: bgez a2, .LBB102_25
12744 ; RV64ZVE32F-NEXT: .LBB102_55: # %cond.store29
12745 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
12746 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1
12747 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
12748 ; RV64ZVE32F-NEXT: add a2, a0, a2
12749 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
12750 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 15
12751 ; RV64ZVE32F-NEXT: vse8.v v11, (a2)
12752 ; RV64ZVE32F-NEXT: slli a2, a1, 47
12753 ; RV64ZVE32F-NEXT: bgez a2, .LBB102_26
12754 ; RV64ZVE32F-NEXT: .LBB102_56: # %cond.store31
12755 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
12756 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
12757 ; RV64ZVE32F-NEXT: add a2, a0, a2
12758 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 16
12759 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
12760 ; RV64ZVE32F-NEXT: vse8.v v12, (a2)
12761 ; RV64ZVE32F-NEXT: slli a2, a1, 46
12762 ; RV64ZVE32F-NEXT: bltz a2, .LBB102_27
12763 ; RV64ZVE32F-NEXT: j .LBB102_28
12764 ; RV64ZVE32F-NEXT: .LBB102_57: # %cond.store35
12765 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12
12766 ; RV64ZVE32F-NEXT: add a2, a0, a2
12767 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
12768 ; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 18
12769 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
12770 ; RV64ZVE32F-NEXT: vse8.v v14, (a2)
12771 ; RV64ZVE32F-NEXT: slli a2, a1, 44
12772 ; RV64ZVE32F-NEXT: bgez a2, .LBB102_30
12773 ; RV64ZVE32F-NEXT: .LBB102_58: # %cond.store37
12774 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
12775 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 1
12776 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12
12777 ; RV64ZVE32F-NEXT: add a2, a0, a2
12778 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
12779 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 19
12780 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
12781 ; RV64ZVE32F-NEXT: vse8.v v12, (a2)
12782 ; RV64ZVE32F-NEXT: slli a2, a1, 43
12783 ; RV64ZVE32F-NEXT: bltz a2, .LBB102_31
12784 ; RV64ZVE32F-NEXT: j .LBB102_32
12785 ; RV64ZVE32F-NEXT: .LBB102_59: # %cond.store43
12786 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
12787 ; RV64ZVE32F-NEXT: add a2, a0, a2
12788 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
12789 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 22
12790 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
12791 ; RV64ZVE32F-NEXT: vse8.v v12, (a2)
12792 ; RV64ZVE32F-NEXT: slli a2, a1, 40
12793 ; RV64ZVE32F-NEXT: bgez a2, .LBB102_36
12794 ; RV64ZVE32F-NEXT: .LBB102_60: # %cond.store45
12795 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
12796 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1
12797 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
12798 ; RV64ZVE32F-NEXT: add a2, a0, a2
12799 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
12800 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 23
12801 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
12802 ; RV64ZVE32F-NEXT: vse8.v v12, (a2)
12803 ; RV64ZVE32F-NEXT: slli a2, a1, 39
12804 ; RV64ZVE32F-NEXT: bgez a2, .LBB102_37
12805 ; RV64ZVE32F-NEXT: .LBB102_61: # %cond.store47
12806 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
12807 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
12808 ; RV64ZVE32F-NEXT: add a2, a0, a2
12809 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 24
12810 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
12811 ; RV64ZVE32F-NEXT: vse8.v v12, (a2)
12812 ; RV64ZVE32F-NEXT: slli a2, a1, 38
12813 ; RV64ZVE32F-NEXT: bltz a2, .LBB102_38
12814 ; RV64ZVE32F-NEXT: j .LBB102_39
12815 ; RV64ZVE32F-NEXT: .LBB102_62: # %cond.store51
12816 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
12817 ; RV64ZVE32F-NEXT: add a2, a0, a2
12818 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
12819 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 26
12820 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
12821 ; RV64ZVE32F-NEXT: vse8.v v12, (a2)
12822 ; RV64ZVE32F-NEXT: slli a2, a1, 36
12823 ; RV64ZVE32F-NEXT: bgez a2, .LBB102_41
12824 ; RV64ZVE32F-NEXT: .LBB102_63: # %cond.store53
12825 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
12826 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
12827 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
12828 ; RV64ZVE32F-NEXT: add a2, a0, a2
12829 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
12830 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 27
12831 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
12832 ; RV64ZVE32F-NEXT: vse8.v v12, (a2)
12833 ; RV64ZVE32F-NEXT: slli a2, a1, 35
12834 ; RV64ZVE32F-NEXT: bgez a2, .LBB102_42
12835 ; RV64ZVE32F-NEXT: .LBB102_64: # %cond.store55
12836 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
12837 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
12838 ; RV64ZVE32F-NEXT: add a2, a0, a2
12839 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 28
12840 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
12841 ; RV64ZVE32F-NEXT: vse8.v v12, (a2)
12842 ; RV64ZVE32F-NEXT: slli a2, a1, 34
12843 ; RV64ZVE32F-NEXT: bltz a2, .LBB102_43
12844 ; RV64ZVE32F-NEXT: j .LBB102_44
12845 %ptrs = getelementptr inbounds i8, ptr %base, <32 x i8> %idxs
12846 call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> %val, <32 x ptr> %ptrs, i32 1, <32 x i1> %m)
12850 define void @mscatter_unit_stride(<8 x i16> %val, ptr %base) {
12851 ; CHECK-LABEL: mscatter_unit_stride:
12853 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
12854 ; CHECK-NEXT: vse16.v v8, (a0)
12856 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>
12857 call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> splat (i1 true))
12861 define void @mscatter_unit_stride_with_offset(<8 x i16> %val, ptr %base) {
12862 ; CHECK-LABEL: mscatter_unit_stride_with_offset:
12864 ; CHECK-NEXT: addi a0, a0, 10
12865 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
12866 ; CHECK-NEXT: vse16.v v8, (a0)
12868 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> <i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12>
12869 call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> splat (i1 true))
12873 define void @mscatter_shuffle_reverse(<8 x i16> %val, ptr %base) {
12874 ; CHECK-LABEL: mscatter_shuffle_reverse:
12876 ; CHECK-NEXT: addi a0, a0, 14
12877 ; CHECK-NEXT: li a1, -2
12878 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
12879 ; CHECK-NEXT: vsse16.v v8, (a0), a1
12881 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> <i64 7, i64 6, i64 5, i64 4, i64 3, i64 2, i64 1, i64 0>
12882 call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> splat (i1 true))
12886 define void @mscatter_shuffle_rotate(<8 x i16> %val, ptr %base) {
12887 ; RV32-LABEL: mscatter_shuffle_rotate:
12889 ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
12890 ; RV32-NEXT: vslidedown.vi v9, v8, 4
12891 ; RV32-NEXT: vslideup.vi v9, v8, 4
12892 ; RV32-NEXT: vse16.v v9, (a0)
12895 ; RV64V-LABEL: mscatter_shuffle_rotate:
12897 ; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
12898 ; RV64V-NEXT: vslidedown.vi v9, v8, 4
12899 ; RV64V-NEXT: vslideup.vi v9, v8, 4
12900 ; RV64V-NEXT: vse16.v v9, (a0)
12903 ; RV64ZVE32F-LABEL: mscatter_shuffle_rotate:
12904 ; RV64ZVE32F: # %bb.0:
12905 ; RV64ZVE32F-NEXT: addi a1, a0, 6
12906 ; RV64ZVE32F-NEXT: addi a2, a0, 4
12907 ; RV64ZVE32F-NEXT: addi a3, a0, 2
12908 ; RV64ZVE32F-NEXT: addi a4, a0, 14
12909 ; RV64ZVE32F-NEXT: addi a5, a0, 12
12910 ; RV64ZVE32F-NEXT: addi a6, a0, 10
12911 ; RV64ZVE32F-NEXT: addi a7, a0, 8
12912 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
12913 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
12914 ; RV64ZVE32F-NEXT: vse16.v v8, (a7)
12915 ; RV64ZVE32F-NEXT: vse16.v v9, (a6)
12916 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
12917 ; RV64ZVE32F-NEXT: vse16.v v9, (a5)
12918 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 3
12919 ; RV64ZVE32F-NEXT: vse16.v v9, (a4)
12920 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
12921 ; RV64ZVE32F-NEXT: vse16.v v9, (a0)
12922 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5
12923 ; RV64ZVE32F-NEXT: vse16.v v9, (a3)
12924 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 6
12925 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
12926 ; RV64ZVE32F-NEXT: vse16.v v9, (a2)
12927 ; RV64ZVE32F-NEXT: vse16.v v8, (a1)
12928 ; RV64ZVE32F-NEXT: ret
12929 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> <i64 4, i64 5, i64 6, i64 7, i64 0, i64 1, i64 2, i64 3>
12930 call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> splat (i1 true))
12933 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
12934 ; RV32V-ZVFH: {{.*}}
12935 ; RV32V-ZVFHMIN: {{.*}}
12936 ; RV32ZVE32F-ZVFH: {{.*}}
12937 ; RV32ZVE32F-ZVFHMIN: {{.*}}
12939 ; RV64V-ZVFH: {{.*}}
12940 ; RV64V-ZVFHMIN: {{.*}}