1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=ilp32d \
3 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32V
4 ; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=lp64d \
5 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
6 ; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfh,+zve32f,+zvl128b -target-abi=ilp32d \
7 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32ZVE32F
8 ; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfh,+zve32f,+zvl128b -target-abi=lp64d \
9 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64ZVE32F
11 declare void @llvm.masked.scatter.v1i8.v1p0(<1 x i8>, <1 x ptr>, i32, <1 x i1>)
13 define void @mscatter_v1i8(<1 x i8> %val, <1 x ptr> %ptrs, <1 x i1> %m) {
14 ; RV32V-LABEL: mscatter_v1i8:
16 ; RV32V-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
17 ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t
20 ; RV64-LABEL: mscatter_v1i8:
22 ; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
23 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
26 ; RV32ZVE32F-LABEL: mscatter_v1i8:
27 ; RV32ZVE32F: # %bb.0:
28 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
29 ; RV32ZVE32F-NEXT: vsoxei32.v v8, (zero), v9, v0.t
30 ; RV32ZVE32F-NEXT: ret
32 ; RV64ZVE32F-LABEL: mscatter_v1i8:
33 ; RV64ZVE32F: # %bb.0:
34 ; RV64ZVE32F-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
35 ; RV64ZVE32F-NEXT: vfirst.m a1, v0
36 ; RV64ZVE32F-NEXT: bnez a1, .LBB0_2
37 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
38 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
39 ; RV64ZVE32F-NEXT: vse8.v v8, (a0)
40 ; RV64ZVE32F-NEXT: .LBB0_2: # %else
41 ; RV64ZVE32F-NEXT: ret
42 call void @llvm.masked.scatter.v1i8.v1p0(<1 x i8> %val, <1 x ptr> %ptrs, i32 1, <1 x i1> %m)
46 declare void @llvm.masked.scatter.v2i8.v2p0(<2 x i8>, <2 x ptr>, i32, <2 x i1>)
48 define void @mscatter_v2i8(<2 x i8> %val, <2 x ptr> %ptrs, <2 x i1> %m) {
49 ; RV32V-LABEL: mscatter_v2i8:
51 ; RV32V-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
52 ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t
55 ; RV64-LABEL: mscatter_v2i8:
57 ; RV64-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
58 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
61 ; RV32ZVE32F-LABEL: mscatter_v2i8:
62 ; RV32ZVE32F: # %bb.0:
63 ; RV32ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
64 ; RV32ZVE32F-NEXT: vsoxei32.v v8, (zero), v9, v0.t
65 ; RV32ZVE32F-NEXT: ret
67 ; RV64ZVE32F-LABEL: mscatter_v2i8:
68 ; RV64ZVE32F: # %bb.0:
69 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
70 ; RV64ZVE32F-NEXT: vmv.x.s a2, v0
71 ; RV64ZVE32F-NEXT: andi a3, a2, 1
72 ; RV64ZVE32F-NEXT: bnez a3, .LBB1_3
73 ; RV64ZVE32F-NEXT: # %bb.1: # %else
74 ; RV64ZVE32F-NEXT: andi a2, a2, 2
75 ; RV64ZVE32F-NEXT: bnez a2, .LBB1_4
76 ; RV64ZVE32F-NEXT: .LBB1_2: # %else2
77 ; RV64ZVE32F-NEXT: ret
78 ; RV64ZVE32F-NEXT: .LBB1_3: # %cond.store
79 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
80 ; RV64ZVE32F-NEXT: vse8.v v8, (a0)
81 ; RV64ZVE32F-NEXT: andi a2, a2, 2
82 ; RV64ZVE32F-NEXT: beqz a2, .LBB1_2
83 ; RV64ZVE32F-NEXT: .LBB1_4: # %cond.store1
84 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
85 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
86 ; RV64ZVE32F-NEXT: vse8.v v8, (a1)
87 ; RV64ZVE32F-NEXT: ret
88 call void @llvm.masked.scatter.v2i8.v2p0(<2 x i8> %val, <2 x ptr> %ptrs, i32 1, <2 x i1> %m)
92 define void @mscatter_v2i16_truncstore_v2i8(<2 x i16> %val, <2 x ptr> %ptrs, <2 x i1> %m) {
93 ; RV32V-LABEL: mscatter_v2i16_truncstore_v2i8:
95 ; RV32V-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
96 ; RV32V-NEXT: vnsrl.wi v8, v8, 0
97 ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t
100 ; RV64-LABEL: mscatter_v2i16_truncstore_v2i8:
102 ; RV64-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
103 ; RV64-NEXT: vnsrl.wi v8, v8, 0
104 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
107 ; RV32ZVE32F-LABEL: mscatter_v2i16_truncstore_v2i8:
108 ; RV32ZVE32F: # %bb.0:
109 ; RV32ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
110 ; RV32ZVE32F-NEXT: vnsrl.wi v8, v8, 0
111 ; RV32ZVE32F-NEXT: vsoxei32.v v8, (zero), v9, v0.t
112 ; RV32ZVE32F-NEXT: ret
114 ; RV64ZVE32F-LABEL: mscatter_v2i16_truncstore_v2i8:
115 ; RV64ZVE32F: # %bb.0:
116 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
117 ; RV64ZVE32F-NEXT: vmv.x.s a2, v0
118 ; RV64ZVE32F-NEXT: andi a3, a2, 1
119 ; RV64ZVE32F-NEXT: vnsrl.wi v8, v8, 0
120 ; RV64ZVE32F-NEXT: bnez a3, .LBB2_3
121 ; RV64ZVE32F-NEXT: # %bb.1: # %else
122 ; RV64ZVE32F-NEXT: andi a2, a2, 2
123 ; RV64ZVE32F-NEXT: bnez a2, .LBB2_4
124 ; RV64ZVE32F-NEXT: .LBB2_2: # %else2
125 ; RV64ZVE32F-NEXT: ret
126 ; RV64ZVE32F-NEXT: .LBB2_3: # %cond.store
127 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
128 ; RV64ZVE32F-NEXT: vse8.v v8, (a0)
129 ; RV64ZVE32F-NEXT: andi a2, a2, 2
130 ; RV64ZVE32F-NEXT: beqz a2, .LBB2_2
131 ; RV64ZVE32F-NEXT: .LBB2_4: # %cond.store1
132 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
133 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
134 ; RV64ZVE32F-NEXT: vse8.v v8, (a1)
135 ; RV64ZVE32F-NEXT: ret
136 %tval = trunc <2 x i16> %val to <2 x i8>
137 call void @llvm.masked.scatter.v2i8.v2p0(<2 x i8> %tval, <2 x ptr> %ptrs, i32 1, <2 x i1> %m)
141 define void @mscatter_v2i32_truncstore_v2i8(<2 x i32> %val, <2 x ptr> %ptrs, <2 x i1> %m) {
142 ; RV32V-LABEL: mscatter_v2i32_truncstore_v2i8:
144 ; RV32V-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
145 ; RV32V-NEXT: vnsrl.wi v8, v8, 0
146 ; RV32V-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
147 ; RV32V-NEXT: vnsrl.wi v8, v8, 0
148 ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t
151 ; RV64-LABEL: mscatter_v2i32_truncstore_v2i8:
153 ; RV64-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
154 ; RV64-NEXT: vnsrl.wi v8, v8, 0
155 ; RV64-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
156 ; RV64-NEXT: vnsrl.wi v8, v8, 0
157 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
160 ; RV32ZVE32F-LABEL: mscatter_v2i32_truncstore_v2i8:
161 ; RV32ZVE32F: # %bb.0:
162 ; RV32ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
163 ; RV32ZVE32F-NEXT: vnsrl.wi v8, v8, 0
164 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
165 ; RV32ZVE32F-NEXT: vnsrl.wi v8, v8, 0
166 ; RV32ZVE32F-NEXT: vsoxei32.v v8, (zero), v9, v0.t
167 ; RV32ZVE32F-NEXT: ret
169 ; RV64ZVE32F-LABEL: mscatter_v2i32_truncstore_v2i8:
170 ; RV64ZVE32F: # %bb.0:
171 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
172 ; RV64ZVE32F-NEXT: vnsrl.wi v8, v8, 0
173 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
174 ; RV64ZVE32F-NEXT: vmv.x.s a2, v0
175 ; RV64ZVE32F-NEXT: andi a3, a2, 1
176 ; RV64ZVE32F-NEXT: vnsrl.wi v8, v8, 0
177 ; RV64ZVE32F-NEXT: bnez a3, .LBB3_3
178 ; RV64ZVE32F-NEXT: # %bb.1: # %else
179 ; RV64ZVE32F-NEXT: andi a2, a2, 2
180 ; RV64ZVE32F-NEXT: bnez a2, .LBB3_4
181 ; RV64ZVE32F-NEXT: .LBB3_2: # %else2
182 ; RV64ZVE32F-NEXT: ret
183 ; RV64ZVE32F-NEXT: .LBB3_3: # %cond.store
184 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
185 ; RV64ZVE32F-NEXT: vse8.v v8, (a0)
186 ; RV64ZVE32F-NEXT: andi a2, a2, 2
187 ; RV64ZVE32F-NEXT: beqz a2, .LBB3_2
188 ; RV64ZVE32F-NEXT: .LBB3_4: # %cond.store1
189 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
190 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
191 ; RV64ZVE32F-NEXT: vse8.v v8, (a1)
192 ; RV64ZVE32F-NEXT: ret
193 %tval = trunc <2 x i32> %val to <2 x i8>
194 call void @llvm.masked.scatter.v2i8.v2p0(<2 x i8> %tval, <2 x ptr> %ptrs, i32 1, <2 x i1> %m)
198 define void @mscatter_v2i64_truncstore_v2i8(<2 x i64> %val, <2 x ptr> %ptrs, <2 x i1> %m) {
199 ; RV32V-LABEL: mscatter_v2i64_truncstore_v2i8:
201 ; RV32V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
202 ; RV32V-NEXT: vnsrl.wi v8, v8, 0
203 ; RV32V-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
204 ; RV32V-NEXT: vnsrl.wi v8, v8, 0
205 ; RV32V-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
206 ; RV32V-NEXT: vnsrl.wi v8, v8, 0
207 ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t
210 ; RV64-LABEL: mscatter_v2i64_truncstore_v2i8:
212 ; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
213 ; RV64-NEXT: vnsrl.wi v8, v8, 0
214 ; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
215 ; RV64-NEXT: vnsrl.wi v8, v8, 0
216 ; RV64-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
217 ; RV64-NEXT: vnsrl.wi v8, v8, 0
218 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
221 ; RV32ZVE32F-LABEL: mscatter_v2i64_truncstore_v2i8:
222 ; RV32ZVE32F: # %bb.0:
223 ; RV32ZVE32F-NEXT: lw a1, 8(a0)
224 ; RV32ZVE32F-NEXT: lw a0, 0(a0)
225 ; RV32ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
226 ; RV32ZVE32F-NEXT: vmv.s.x v9, a1
227 ; RV32ZVE32F-NEXT: vmv.s.x v10, a0
228 ; RV32ZVE32F-NEXT: vslideup.vi v10, v9, 1
229 ; RV32ZVE32F-NEXT: vsoxei32.v v10, (zero), v8, v0.t
230 ; RV32ZVE32F-NEXT: ret
232 ; RV64ZVE32F-LABEL: mscatter_v2i64_truncstore_v2i8:
233 ; RV64ZVE32F: # %bb.0:
234 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
235 ; RV64ZVE32F-NEXT: vmv.s.x v9, a1
236 ; RV64ZVE32F-NEXT: vmv.s.x v8, a0
237 ; RV64ZVE32F-NEXT: vmv.x.s a0, v0
238 ; RV64ZVE32F-NEXT: andi a1, a0, 1
239 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
240 ; RV64ZVE32F-NEXT: bnez a1, .LBB4_3
241 ; RV64ZVE32F-NEXT: # %bb.1: # %else
242 ; RV64ZVE32F-NEXT: andi a0, a0, 2
243 ; RV64ZVE32F-NEXT: bnez a0, .LBB4_4
244 ; RV64ZVE32F-NEXT: .LBB4_2: # %else2
245 ; RV64ZVE32F-NEXT: ret
246 ; RV64ZVE32F-NEXT: .LBB4_3: # %cond.store
247 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
248 ; RV64ZVE32F-NEXT: vse8.v v8, (a2)
249 ; RV64ZVE32F-NEXT: andi a0, a0, 2
250 ; RV64ZVE32F-NEXT: beqz a0, .LBB4_2
251 ; RV64ZVE32F-NEXT: .LBB4_4: # %cond.store1
252 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
253 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
254 ; RV64ZVE32F-NEXT: vse8.v v8, (a3)
255 ; RV64ZVE32F-NEXT: ret
256 %tval = trunc <2 x i64> %val to <2 x i8>
257 call void @llvm.masked.scatter.v2i8.v2p0(<2 x i8> %tval, <2 x ptr> %ptrs, i32 1, <2 x i1> %m)
261 declare void @llvm.masked.scatter.v4i8.v4p0(<4 x i8>, <4 x ptr>, i32, <4 x i1>)
263 define void @mscatter_v4i8(<4 x i8> %val, <4 x ptr> %ptrs, <4 x i1> %m) {
264 ; RV32-LABEL: mscatter_v4i8:
266 ; RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
267 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
270 ; RV64-LABEL: mscatter_v4i8:
272 ; RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
273 ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
276 ; RV64ZVE32F-LABEL: mscatter_v4i8:
277 ; RV64ZVE32F: # %bb.0:
278 ; RV64ZVE32F-NEXT: ld a1, 24(a0)
279 ; RV64ZVE32F-NEXT: ld a2, 16(a0)
280 ; RV64ZVE32F-NEXT: ld a4, 8(a0)
281 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
282 ; RV64ZVE32F-NEXT: vmv.x.s a3, v0
283 ; RV64ZVE32F-NEXT: andi a5, a3, 1
284 ; RV64ZVE32F-NEXT: bnez a5, .LBB5_5
285 ; RV64ZVE32F-NEXT: # %bb.1: # %else
286 ; RV64ZVE32F-NEXT: andi a0, a3, 2
287 ; RV64ZVE32F-NEXT: bnez a0, .LBB5_6
288 ; RV64ZVE32F-NEXT: .LBB5_2: # %else2
289 ; RV64ZVE32F-NEXT: andi a0, a3, 4
290 ; RV64ZVE32F-NEXT: bnez a0, .LBB5_7
291 ; RV64ZVE32F-NEXT: .LBB5_3: # %else4
292 ; RV64ZVE32F-NEXT: andi a3, a3, 8
293 ; RV64ZVE32F-NEXT: bnez a3, .LBB5_8
294 ; RV64ZVE32F-NEXT: .LBB5_4: # %else6
295 ; RV64ZVE32F-NEXT: ret
296 ; RV64ZVE32F-NEXT: .LBB5_5: # %cond.store
297 ; RV64ZVE32F-NEXT: ld a0, 0(a0)
298 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
299 ; RV64ZVE32F-NEXT: vse8.v v8, (a0)
300 ; RV64ZVE32F-NEXT: andi a0, a3, 2
301 ; RV64ZVE32F-NEXT: beqz a0, .LBB5_2
302 ; RV64ZVE32F-NEXT: .LBB5_6: # %cond.store1
303 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
304 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
305 ; RV64ZVE32F-NEXT: vse8.v v9, (a4)
306 ; RV64ZVE32F-NEXT: andi a0, a3, 4
307 ; RV64ZVE32F-NEXT: beqz a0, .LBB5_3
308 ; RV64ZVE32F-NEXT: .LBB5_7: # %cond.store3
309 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
310 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
311 ; RV64ZVE32F-NEXT: vse8.v v9, (a2)
312 ; RV64ZVE32F-NEXT: andi a3, a3, 8
313 ; RV64ZVE32F-NEXT: beqz a3, .LBB5_4
314 ; RV64ZVE32F-NEXT: .LBB5_8: # %cond.store5
315 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
316 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 3
317 ; RV64ZVE32F-NEXT: vse8.v v8, (a1)
318 ; RV64ZVE32F-NEXT: ret
319 call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> %m)
323 define void @mscatter_truemask_v4i8(<4 x i8> %val, <4 x ptr> %ptrs) {
324 ; RV32-LABEL: mscatter_truemask_v4i8:
326 ; RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
327 ; RV32-NEXT: vsoxei32.v v8, (zero), v9
330 ; RV64-LABEL: mscatter_truemask_v4i8:
332 ; RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
333 ; RV64-NEXT: vsoxei64.v v8, (zero), v10
336 ; RV64ZVE32F-LABEL: mscatter_truemask_v4i8:
337 ; RV64ZVE32F: # %bb.0:
338 ; RV64ZVE32F-NEXT: ld a1, 0(a0)
339 ; RV64ZVE32F-NEXT: ld a2, 24(a0)
340 ; RV64ZVE32F-NEXT: ld a3, 8(a0)
341 ; RV64ZVE32F-NEXT: ld a0, 16(a0)
342 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
343 ; RV64ZVE32F-NEXT: vse8.v v8, (a1)
344 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
345 ; RV64ZVE32F-NEXT: vse8.v v9, (a3)
346 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
347 ; RV64ZVE32F-NEXT: vse8.v v9, (a0)
348 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 3
349 ; RV64ZVE32F-NEXT: vse8.v v8, (a2)
350 ; RV64ZVE32F-NEXT: ret
351 call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> splat (i1 1))
355 define void @mscatter_falsemask_v4i8(<4 x i8> %val, <4 x ptr> %ptrs) {
356 ; CHECK-LABEL: mscatter_falsemask_v4i8:
359 call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> zeroinitializer)
363 declare void @llvm.masked.scatter.v8i8.v8p0(<8 x i8>, <8 x ptr>, i32, <8 x i1>)
365 define void @mscatter_v8i8(<8 x i8> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
366 ; RV32-LABEL: mscatter_v8i8:
368 ; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
369 ; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t
372 ; RV64-LABEL: mscatter_v8i8:
374 ; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
375 ; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t
378 ; RV64ZVE32F-LABEL: mscatter_v8i8:
379 ; RV64ZVE32F: # %bb.0:
380 ; RV64ZVE32F-NEXT: ld a1, 56(a0)
381 ; RV64ZVE32F-NEXT: ld a2, 48(a0)
382 ; RV64ZVE32F-NEXT: ld a4, 40(a0)
383 ; RV64ZVE32F-NEXT: ld a5, 32(a0)
384 ; RV64ZVE32F-NEXT: ld a6, 24(a0)
385 ; RV64ZVE32F-NEXT: ld a7, 16(a0)
386 ; RV64ZVE32F-NEXT: ld t0, 8(a0)
387 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
388 ; RV64ZVE32F-NEXT: vmv.x.s a3, v0
389 ; RV64ZVE32F-NEXT: andi t1, a3, 1
390 ; RV64ZVE32F-NEXT: bnez t1, .LBB8_9
391 ; RV64ZVE32F-NEXT: # %bb.1: # %else
392 ; RV64ZVE32F-NEXT: andi a0, a3, 2
393 ; RV64ZVE32F-NEXT: bnez a0, .LBB8_10
394 ; RV64ZVE32F-NEXT: .LBB8_2: # %else2
395 ; RV64ZVE32F-NEXT: andi a0, a3, 4
396 ; RV64ZVE32F-NEXT: bnez a0, .LBB8_11
397 ; RV64ZVE32F-NEXT: .LBB8_3: # %else4
398 ; RV64ZVE32F-NEXT: andi a0, a3, 8
399 ; RV64ZVE32F-NEXT: bnez a0, .LBB8_12
400 ; RV64ZVE32F-NEXT: .LBB8_4: # %else6
401 ; RV64ZVE32F-NEXT: andi a0, a3, 16
402 ; RV64ZVE32F-NEXT: bnez a0, .LBB8_13
403 ; RV64ZVE32F-NEXT: .LBB8_5: # %else8
404 ; RV64ZVE32F-NEXT: andi a0, a3, 32
405 ; RV64ZVE32F-NEXT: bnez a0, .LBB8_14
406 ; RV64ZVE32F-NEXT: .LBB8_6: # %else10
407 ; RV64ZVE32F-NEXT: andi a0, a3, 64
408 ; RV64ZVE32F-NEXT: bnez a0, .LBB8_15
409 ; RV64ZVE32F-NEXT: .LBB8_7: # %else12
410 ; RV64ZVE32F-NEXT: andi a0, a3, -128
411 ; RV64ZVE32F-NEXT: bnez a0, .LBB8_16
412 ; RV64ZVE32F-NEXT: .LBB8_8: # %else14
413 ; RV64ZVE32F-NEXT: ret
414 ; RV64ZVE32F-NEXT: .LBB8_9: # %cond.store
415 ; RV64ZVE32F-NEXT: ld a0, 0(a0)
416 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
417 ; RV64ZVE32F-NEXT: vse8.v v8, (a0)
418 ; RV64ZVE32F-NEXT: andi a0, a3, 2
419 ; RV64ZVE32F-NEXT: beqz a0, .LBB8_2
420 ; RV64ZVE32F-NEXT: .LBB8_10: # %cond.store1
421 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
422 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
423 ; RV64ZVE32F-NEXT: vse8.v v9, (t0)
424 ; RV64ZVE32F-NEXT: andi a0, a3, 4
425 ; RV64ZVE32F-NEXT: beqz a0, .LBB8_3
426 ; RV64ZVE32F-NEXT: .LBB8_11: # %cond.store3
427 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
428 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
429 ; RV64ZVE32F-NEXT: vse8.v v9, (a7)
430 ; RV64ZVE32F-NEXT: andi a0, a3, 8
431 ; RV64ZVE32F-NEXT: beqz a0, .LBB8_4
432 ; RV64ZVE32F-NEXT: .LBB8_12: # %cond.store5
433 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
434 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 3
435 ; RV64ZVE32F-NEXT: vse8.v v9, (a6)
436 ; RV64ZVE32F-NEXT: andi a0, a3, 16
437 ; RV64ZVE32F-NEXT: beqz a0, .LBB8_5
438 ; RV64ZVE32F-NEXT: .LBB8_13: # %cond.store7
439 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
440 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
441 ; RV64ZVE32F-NEXT: vse8.v v9, (a5)
442 ; RV64ZVE32F-NEXT: andi a0, a3, 32
443 ; RV64ZVE32F-NEXT: beqz a0, .LBB8_6
444 ; RV64ZVE32F-NEXT: .LBB8_14: # %cond.store9
445 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
446 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5
447 ; RV64ZVE32F-NEXT: vse8.v v9, (a4)
448 ; RV64ZVE32F-NEXT: andi a0, a3, 64
449 ; RV64ZVE32F-NEXT: beqz a0, .LBB8_7
450 ; RV64ZVE32F-NEXT: .LBB8_15: # %cond.store11
451 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
452 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 6
453 ; RV64ZVE32F-NEXT: vse8.v v9, (a2)
454 ; RV64ZVE32F-NEXT: andi a0, a3, -128
455 ; RV64ZVE32F-NEXT: beqz a0, .LBB8_8
456 ; RV64ZVE32F-NEXT: .LBB8_16: # %cond.store13
457 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
458 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
459 ; RV64ZVE32F-NEXT: vse8.v v8, (a1)
460 ; RV64ZVE32F-NEXT: ret
461 call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> %val, <8 x ptr> %ptrs, i32 1, <8 x i1> %m)
465 define void @mscatter_baseidx_v8i8(<8 x i8> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) {
466 ; RV32-LABEL: mscatter_baseidx_v8i8:
468 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
469 ; RV32-NEXT: vsext.vf4 v10, v9
470 ; RV32-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
471 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
474 ; RV64-LABEL: mscatter_baseidx_v8i8:
476 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
477 ; RV64-NEXT: vsext.vf8 v12, v9
478 ; RV64-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
479 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
482 ; RV64ZVE32F-LABEL: mscatter_baseidx_v8i8:
483 ; RV64ZVE32F: # %bb.0:
484 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
485 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
486 ; RV64ZVE32F-NEXT: andi a2, a1, 1
487 ; RV64ZVE32F-NEXT: beqz a2, .LBB9_2
488 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
489 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
490 ; RV64ZVE32F-NEXT: add a2, a0, a2
491 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
492 ; RV64ZVE32F-NEXT: vse8.v v8, (a2)
493 ; RV64ZVE32F-NEXT: .LBB9_2: # %else
494 ; RV64ZVE32F-NEXT: andi a2, a1, 2
495 ; RV64ZVE32F-NEXT: beqz a2, .LBB9_4
496 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
497 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
498 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1
499 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
500 ; RV64ZVE32F-NEXT: add a2, a0, a2
501 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
502 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
503 ; RV64ZVE32F-NEXT: vse8.v v10, (a2)
504 ; RV64ZVE32F-NEXT: .LBB9_4: # %else2
505 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
506 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4
507 ; RV64ZVE32F-NEXT: andi a2, a1, 4
508 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
509 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2
510 ; RV64ZVE32F-NEXT: bnez a2, .LBB9_12
511 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
512 ; RV64ZVE32F-NEXT: andi a2, a1, 8
513 ; RV64ZVE32F-NEXT: bnez a2, .LBB9_13
514 ; RV64ZVE32F-NEXT: .LBB9_6: # %else6
515 ; RV64ZVE32F-NEXT: andi a2, a1, 16
516 ; RV64ZVE32F-NEXT: bnez a2, .LBB9_14
517 ; RV64ZVE32F-NEXT: .LBB9_7: # %else8
518 ; RV64ZVE32F-NEXT: andi a2, a1, 32
519 ; RV64ZVE32F-NEXT: beqz a2, .LBB9_9
520 ; RV64ZVE32F-NEXT: .LBB9_8: # %cond.store9
521 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
522 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 1
523 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
524 ; RV64ZVE32F-NEXT: add a2, a0, a2
525 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
526 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5
527 ; RV64ZVE32F-NEXT: vse8.v v9, (a2)
528 ; RV64ZVE32F-NEXT: .LBB9_9: # %else10
529 ; RV64ZVE32F-NEXT: andi a2, a1, 64
530 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
531 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 2
532 ; RV64ZVE32F-NEXT: bnez a2, .LBB9_15
533 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
534 ; RV64ZVE32F-NEXT: andi a1, a1, -128
535 ; RV64ZVE32F-NEXT: bnez a1, .LBB9_16
536 ; RV64ZVE32F-NEXT: .LBB9_11: # %else14
537 ; RV64ZVE32F-NEXT: ret
538 ; RV64ZVE32F-NEXT: .LBB9_12: # %cond.store3
539 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
540 ; RV64ZVE32F-NEXT: add a2, a0, a2
541 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
542 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2
543 ; RV64ZVE32F-NEXT: vse8.v v11, (a2)
544 ; RV64ZVE32F-NEXT: andi a2, a1, 8
545 ; RV64ZVE32F-NEXT: beqz a2, .LBB9_6
546 ; RV64ZVE32F-NEXT: .LBB9_13: # %cond.store5
547 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
548 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
549 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
550 ; RV64ZVE32F-NEXT: add a2, a0, a2
551 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
552 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 3
553 ; RV64ZVE32F-NEXT: vse8.v v9, (a2)
554 ; RV64ZVE32F-NEXT: andi a2, a1, 16
555 ; RV64ZVE32F-NEXT: beqz a2, .LBB9_7
556 ; RV64ZVE32F-NEXT: .LBB9_14: # %cond.store7
557 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
558 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
559 ; RV64ZVE32F-NEXT: add a2, a0, a2
560 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
561 ; RV64ZVE32F-NEXT: vse8.v v9, (a2)
562 ; RV64ZVE32F-NEXT: andi a2, a1, 32
563 ; RV64ZVE32F-NEXT: bnez a2, .LBB9_8
564 ; RV64ZVE32F-NEXT: j .LBB9_9
565 ; RV64ZVE32F-NEXT: .LBB9_15: # %cond.store11
566 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
567 ; RV64ZVE32F-NEXT: add a2, a0, a2
568 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
569 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 6
570 ; RV64ZVE32F-NEXT: vse8.v v10, (a2)
571 ; RV64ZVE32F-NEXT: andi a1, a1, -128
572 ; RV64ZVE32F-NEXT: beqz a1, .LBB9_11
573 ; RV64ZVE32F-NEXT: .LBB9_16: # %cond.store13
574 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
575 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
576 ; RV64ZVE32F-NEXT: vmv.x.s a1, v9
577 ; RV64ZVE32F-NEXT: add a0, a0, a1
578 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
579 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
580 ; RV64ZVE32F-NEXT: vse8.v v8, (a0)
581 ; RV64ZVE32F-NEXT: ret
582 %ptrs = getelementptr inbounds i8, ptr %base, <8 x i8> %idxs
583 call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> %val, <8 x ptr> %ptrs, i32 1, <8 x i1> %m)
587 declare void @llvm.masked.scatter.v1i16.v1p0(<1 x i16>, <1 x ptr>, i32, <1 x i1>)
589 define void @mscatter_v1i16(<1 x i16> %val, <1 x ptr> %ptrs, <1 x i1> %m) {
590 ; RV32V-LABEL: mscatter_v1i16:
592 ; RV32V-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
593 ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t
596 ; RV64-LABEL: mscatter_v1i16:
598 ; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
599 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
602 ; RV32ZVE32F-LABEL: mscatter_v1i16:
603 ; RV32ZVE32F: # %bb.0:
604 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
605 ; RV32ZVE32F-NEXT: vsoxei32.v v8, (zero), v9, v0.t
606 ; RV32ZVE32F-NEXT: ret
608 ; RV64ZVE32F-LABEL: mscatter_v1i16:
609 ; RV64ZVE32F: # %bb.0:
610 ; RV64ZVE32F-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
611 ; RV64ZVE32F-NEXT: vfirst.m a1, v0
612 ; RV64ZVE32F-NEXT: bnez a1, .LBB10_2
613 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
614 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
615 ; RV64ZVE32F-NEXT: vse16.v v8, (a0)
616 ; RV64ZVE32F-NEXT: .LBB10_2: # %else
617 ; RV64ZVE32F-NEXT: ret
618 call void @llvm.masked.scatter.v1i16.v1p0(<1 x i16> %val, <1 x ptr> %ptrs, i32 2, <1 x i1> %m)
622 declare void @llvm.masked.scatter.v2i16.v2p0(<2 x i16>, <2 x ptr>, i32, <2 x i1>)
624 define void @mscatter_v2i16(<2 x i16> %val, <2 x ptr> %ptrs, <2 x i1> %m) {
625 ; RV32V-LABEL: mscatter_v2i16:
627 ; RV32V-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
628 ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t
631 ; RV64-LABEL: mscatter_v2i16:
633 ; RV64-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
634 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
637 ; RV32ZVE32F-LABEL: mscatter_v2i16:
638 ; RV32ZVE32F: # %bb.0:
639 ; RV32ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
640 ; RV32ZVE32F-NEXT: vsoxei32.v v8, (zero), v9, v0.t
641 ; RV32ZVE32F-NEXT: ret
643 ; RV64ZVE32F-LABEL: mscatter_v2i16:
644 ; RV64ZVE32F: # %bb.0:
645 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
646 ; RV64ZVE32F-NEXT: vmv.x.s a2, v0
647 ; RV64ZVE32F-NEXT: andi a3, a2, 1
648 ; RV64ZVE32F-NEXT: bnez a3, .LBB11_3
649 ; RV64ZVE32F-NEXT: # %bb.1: # %else
650 ; RV64ZVE32F-NEXT: andi a2, a2, 2
651 ; RV64ZVE32F-NEXT: bnez a2, .LBB11_4
652 ; RV64ZVE32F-NEXT: .LBB11_2: # %else2
653 ; RV64ZVE32F-NEXT: ret
654 ; RV64ZVE32F-NEXT: .LBB11_3: # %cond.store
655 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
656 ; RV64ZVE32F-NEXT: vse16.v v8, (a0)
657 ; RV64ZVE32F-NEXT: andi a2, a2, 2
658 ; RV64ZVE32F-NEXT: beqz a2, .LBB11_2
659 ; RV64ZVE32F-NEXT: .LBB11_4: # %cond.store1
660 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
661 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
662 ; RV64ZVE32F-NEXT: vse16.v v8, (a1)
663 ; RV64ZVE32F-NEXT: ret
664 call void @llvm.masked.scatter.v2i16.v2p0(<2 x i16> %val, <2 x ptr> %ptrs, i32 2, <2 x i1> %m)
668 define void @mscatter_v2i32_truncstore_v2i16(<2 x i32> %val, <2 x ptr> %ptrs, <2 x i1> %m) {
669 ; RV32V-LABEL: mscatter_v2i32_truncstore_v2i16:
671 ; RV32V-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
672 ; RV32V-NEXT: vnsrl.wi v8, v8, 0
673 ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t
676 ; RV64-LABEL: mscatter_v2i32_truncstore_v2i16:
678 ; RV64-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
679 ; RV64-NEXT: vnsrl.wi v8, v8, 0
680 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
683 ; RV32ZVE32F-LABEL: mscatter_v2i32_truncstore_v2i16:
684 ; RV32ZVE32F: # %bb.0:
685 ; RV32ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
686 ; RV32ZVE32F-NEXT: vnsrl.wi v8, v8, 0
687 ; RV32ZVE32F-NEXT: vsoxei32.v v8, (zero), v9, v0.t
688 ; RV32ZVE32F-NEXT: ret
690 ; RV64ZVE32F-LABEL: mscatter_v2i32_truncstore_v2i16:
691 ; RV64ZVE32F: # %bb.0:
692 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
693 ; RV64ZVE32F-NEXT: vmv.x.s a2, v0
694 ; RV64ZVE32F-NEXT: andi a3, a2, 1
695 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
696 ; RV64ZVE32F-NEXT: vnsrl.wi v8, v8, 0
697 ; RV64ZVE32F-NEXT: bnez a3, .LBB12_3
698 ; RV64ZVE32F-NEXT: # %bb.1: # %else
699 ; RV64ZVE32F-NEXT: andi a2, a2, 2
700 ; RV64ZVE32F-NEXT: bnez a2, .LBB12_4
701 ; RV64ZVE32F-NEXT: .LBB12_2: # %else2
702 ; RV64ZVE32F-NEXT: ret
703 ; RV64ZVE32F-NEXT: .LBB12_3: # %cond.store
704 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
705 ; RV64ZVE32F-NEXT: vse16.v v8, (a0)
706 ; RV64ZVE32F-NEXT: andi a2, a2, 2
707 ; RV64ZVE32F-NEXT: beqz a2, .LBB12_2
708 ; RV64ZVE32F-NEXT: .LBB12_4: # %cond.store1
709 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
710 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
711 ; RV64ZVE32F-NEXT: vse16.v v8, (a1)
712 ; RV64ZVE32F-NEXT: ret
713 %tval = trunc <2 x i32> %val to <2 x i16>
714 call void @llvm.masked.scatter.v2i16.v2p0(<2 x i16> %tval, <2 x ptr> %ptrs, i32 2, <2 x i1> %m)
718 define void @mscatter_v2i64_truncstore_v2i16(<2 x i64> %val, <2 x ptr> %ptrs, <2 x i1> %m) {
719 ; RV32V-LABEL: mscatter_v2i64_truncstore_v2i16:
721 ; RV32V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
722 ; RV32V-NEXT: vnsrl.wi v8, v8, 0
723 ; RV32V-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
724 ; RV32V-NEXT: vnsrl.wi v8, v8, 0
725 ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t
728 ; RV64-LABEL: mscatter_v2i64_truncstore_v2i16:
730 ; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
731 ; RV64-NEXT: vnsrl.wi v8, v8, 0
732 ; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
733 ; RV64-NEXT: vnsrl.wi v8, v8, 0
734 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
737 ; RV32ZVE32F-LABEL: mscatter_v2i64_truncstore_v2i16:
738 ; RV32ZVE32F: # %bb.0:
739 ; RV32ZVE32F-NEXT: lw a1, 8(a0)
740 ; RV32ZVE32F-NEXT: lw a0, 0(a0)
741 ; RV32ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
742 ; RV32ZVE32F-NEXT: vmv.s.x v9, a1
743 ; RV32ZVE32F-NEXT: vmv.s.x v10, a0
744 ; RV32ZVE32F-NEXT: vslideup.vi v10, v9, 1
745 ; RV32ZVE32F-NEXT: vsoxei32.v v10, (zero), v8, v0.t
746 ; RV32ZVE32F-NEXT: ret
748 ; RV64ZVE32F-LABEL: mscatter_v2i64_truncstore_v2i16:
749 ; RV64ZVE32F: # %bb.0:
750 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
751 ; RV64ZVE32F-NEXT: vmv.s.x v9, a1
752 ; RV64ZVE32F-NEXT: vmv.s.x v8, a0
753 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
754 ; RV64ZVE32F-NEXT: vmv.x.s a0, v0
755 ; RV64ZVE32F-NEXT: andi a1, a0, 1
756 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
757 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
758 ; RV64ZVE32F-NEXT: bnez a1, .LBB13_3
759 ; RV64ZVE32F-NEXT: # %bb.1: # %else
760 ; RV64ZVE32F-NEXT: andi a0, a0, 2
761 ; RV64ZVE32F-NEXT: bnez a0, .LBB13_4
762 ; RV64ZVE32F-NEXT: .LBB13_2: # %else2
763 ; RV64ZVE32F-NEXT: ret
764 ; RV64ZVE32F-NEXT: .LBB13_3: # %cond.store
765 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
766 ; RV64ZVE32F-NEXT: vse16.v v8, (a2)
767 ; RV64ZVE32F-NEXT: andi a0, a0, 2
768 ; RV64ZVE32F-NEXT: beqz a0, .LBB13_2
769 ; RV64ZVE32F-NEXT: .LBB13_4: # %cond.store1
770 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
771 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
772 ; RV64ZVE32F-NEXT: vse16.v v8, (a3)
773 ; RV64ZVE32F-NEXT: ret
774 %tval = trunc <2 x i64> %val to <2 x i16>
775 call void @llvm.masked.scatter.v2i16.v2p0(<2 x i16> %tval, <2 x ptr> %ptrs, i32 2, <2 x i1> %m)
779 declare void @llvm.masked.scatter.v4i16.v4p0(<4 x i16>, <4 x ptr>, i32, <4 x i1>)
781 define void @mscatter_v4i16(<4 x i16> %val, <4 x ptr> %ptrs, <4 x i1> %m) {
782 ; RV32-LABEL: mscatter_v4i16:
784 ; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
785 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
788 ; RV64-LABEL: mscatter_v4i16:
790 ; RV64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
791 ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
794 ; RV64ZVE32F-LABEL: mscatter_v4i16:
795 ; RV64ZVE32F: # %bb.0:
796 ; RV64ZVE32F-NEXT: ld a1, 24(a0)
797 ; RV64ZVE32F-NEXT: ld a2, 16(a0)
798 ; RV64ZVE32F-NEXT: ld a4, 8(a0)
799 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
800 ; RV64ZVE32F-NEXT: vmv.x.s a3, v0
801 ; RV64ZVE32F-NEXT: andi a5, a3, 1
802 ; RV64ZVE32F-NEXT: bnez a5, .LBB14_5
803 ; RV64ZVE32F-NEXT: # %bb.1: # %else
804 ; RV64ZVE32F-NEXT: andi a0, a3, 2
805 ; RV64ZVE32F-NEXT: bnez a0, .LBB14_6
806 ; RV64ZVE32F-NEXT: .LBB14_2: # %else2
807 ; RV64ZVE32F-NEXT: andi a0, a3, 4
808 ; RV64ZVE32F-NEXT: bnez a0, .LBB14_7
809 ; RV64ZVE32F-NEXT: .LBB14_3: # %else4
810 ; RV64ZVE32F-NEXT: andi a3, a3, 8
811 ; RV64ZVE32F-NEXT: bnez a3, .LBB14_8
812 ; RV64ZVE32F-NEXT: .LBB14_4: # %else6
813 ; RV64ZVE32F-NEXT: ret
814 ; RV64ZVE32F-NEXT: .LBB14_5: # %cond.store
815 ; RV64ZVE32F-NEXT: ld a0, 0(a0)
816 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
817 ; RV64ZVE32F-NEXT: vse16.v v8, (a0)
818 ; RV64ZVE32F-NEXT: andi a0, a3, 2
819 ; RV64ZVE32F-NEXT: beqz a0, .LBB14_2
820 ; RV64ZVE32F-NEXT: .LBB14_6: # %cond.store1
821 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
822 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
823 ; RV64ZVE32F-NEXT: vse16.v v9, (a4)
824 ; RV64ZVE32F-NEXT: andi a0, a3, 4
825 ; RV64ZVE32F-NEXT: beqz a0, .LBB14_3
826 ; RV64ZVE32F-NEXT: .LBB14_7: # %cond.store3
827 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
828 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
829 ; RV64ZVE32F-NEXT: vse16.v v9, (a2)
830 ; RV64ZVE32F-NEXT: andi a3, a3, 8
831 ; RV64ZVE32F-NEXT: beqz a3, .LBB14_4
832 ; RV64ZVE32F-NEXT: .LBB14_8: # %cond.store5
833 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
834 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 3
835 ; RV64ZVE32F-NEXT: vse16.v v8, (a1)
836 ; RV64ZVE32F-NEXT: ret
837 call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> %val, <4 x ptr> %ptrs, i32 2, <4 x i1> %m)
841 define void @mscatter_truemask_v4i16(<4 x i16> %val, <4 x ptr> %ptrs) {
842 ; RV32-LABEL: mscatter_truemask_v4i16:
844 ; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
845 ; RV32-NEXT: vsoxei32.v v8, (zero), v9
848 ; RV64-LABEL: mscatter_truemask_v4i16:
850 ; RV64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
851 ; RV64-NEXT: vsoxei64.v v8, (zero), v10
854 ; RV64ZVE32F-LABEL: mscatter_truemask_v4i16:
855 ; RV64ZVE32F: # %bb.0:
856 ; RV64ZVE32F-NEXT: ld a1, 0(a0)
857 ; RV64ZVE32F-NEXT: ld a2, 24(a0)
858 ; RV64ZVE32F-NEXT: ld a3, 8(a0)
859 ; RV64ZVE32F-NEXT: ld a0, 16(a0)
860 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
861 ; RV64ZVE32F-NEXT: vse16.v v8, (a1)
862 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
863 ; RV64ZVE32F-NEXT: vse16.v v9, (a3)
864 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
865 ; RV64ZVE32F-NEXT: vse16.v v9, (a0)
866 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 3
867 ; RV64ZVE32F-NEXT: vse16.v v8, (a2)
868 ; RV64ZVE32F-NEXT: ret
869 call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> %val, <4 x ptr> %ptrs, i32 2, <4 x i1> splat (i1 1))
873 define void @mscatter_falsemask_v4i16(<4 x i16> %val, <4 x ptr> %ptrs) {
874 ; CHECK-LABEL: mscatter_falsemask_v4i16:
877 call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> %val, <4 x ptr> %ptrs, i32 2, <4 x i1> zeroinitializer)
881 declare void @llvm.masked.scatter.v8i16.v8p0(<8 x i16>, <8 x ptr>, i32, <8 x i1>)
883 define void @mscatter_v8i16(<8 x i16> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
884 ; RV32-LABEL: mscatter_v8i16:
886 ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
887 ; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t
890 ; RV64-LABEL: mscatter_v8i16:
892 ; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
893 ; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t
896 ; RV64ZVE32F-LABEL: mscatter_v8i16:
897 ; RV64ZVE32F: # %bb.0:
898 ; RV64ZVE32F-NEXT: ld a1, 56(a0)
899 ; RV64ZVE32F-NEXT: ld a2, 48(a0)
900 ; RV64ZVE32F-NEXT: ld a4, 40(a0)
901 ; RV64ZVE32F-NEXT: ld a5, 32(a0)
902 ; RV64ZVE32F-NEXT: ld a6, 24(a0)
903 ; RV64ZVE32F-NEXT: ld a7, 16(a0)
904 ; RV64ZVE32F-NEXT: ld t0, 8(a0)
905 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
906 ; RV64ZVE32F-NEXT: vmv.x.s a3, v0
907 ; RV64ZVE32F-NEXT: andi t1, a3, 1
908 ; RV64ZVE32F-NEXT: bnez t1, .LBB17_9
909 ; RV64ZVE32F-NEXT: # %bb.1: # %else
910 ; RV64ZVE32F-NEXT: andi a0, a3, 2
911 ; RV64ZVE32F-NEXT: bnez a0, .LBB17_10
912 ; RV64ZVE32F-NEXT: .LBB17_2: # %else2
913 ; RV64ZVE32F-NEXT: andi a0, a3, 4
914 ; RV64ZVE32F-NEXT: bnez a0, .LBB17_11
915 ; RV64ZVE32F-NEXT: .LBB17_3: # %else4
916 ; RV64ZVE32F-NEXT: andi a0, a3, 8
917 ; RV64ZVE32F-NEXT: bnez a0, .LBB17_12
918 ; RV64ZVE32F-NEXT: .LBB17_4: # %else6
919 ; RV64ZVE32F-NEXT: andi a0, a3, 16
920 ; RV64ZVE32F-NEXT: bnez a0, .LBB17_13
921 ; RV64ZVE32F-NEXT: .LBB17_5: # %else8
922 ; RV64ZVE32F-NEXT: andi a0, a3, 32
923 ; RV64ZVE32F-NEXT: bnez a0, .LBB17_14
924 ; RV64ZVE32F-NEXT: .LBB17_6: # %else10
925 ; RV64ZVE32F-NEXT: andi a0, a3, 64
926 ; RV64ZVE32F-NEXT: bnez a0, .LBB17_15
927 ; RV64ZVE32F-NEXT: .LBB17_7: # %else12
928 ; RV64ZVE32F-NEXT: andi a0, a3, -128
929 ; RV64ZVE32F-NEXT: bnez a0, .LBB17_16
930 ; RV64ZVE32F-NEXT: .LBB17_8: # %else14
931 ; RV64ZVE32F-NEXT: ret
932 ; RV64ZVE32F-NEXT: .LBB17_9: # %cond.store
933 ; RV64ZVE32F-NEXT: ld a0, 0(a0)
934 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
935 ; RV64ZVE32F-NEXT: vse16.v v8, (a0)
936 ; RV64ZVE32F-NEXT: andi a0, a3, 2
937 ; RV64ZVE32F-NEXT: beqz a0, .LBB17_2
938 ; RV64ZVE32F-NEXT: .LBB17_10: # %cond.store1
939 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
940 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
941 ; RV64ZVE32F-NEXT: vse16.v v9, (t0)
942 ; RV64ZVE32F-NEXT: andi a0, a3, 4
943 ; RV64ZVE32F-NEXT: beqz a0, .LBB17_3
944 ; RV64ZVE32F-NEXT: .LBB17_11: # %cond.store3
945 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
946 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
947 ; RV64ZVE32F-NEXT: vse16.v v9, (a7)
948 ; RV64ZVE32F-NEXT: andi a0, a3, 8
949 ; RV64ZVE32F-NEXT: beqz a0, .LBB17_4
950 ; RV64ZVE32F-NEXT: .LBB17_12: # %cond.store5
951 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
952 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 3
953 ; RV64ZVE32F-NEXT: vse16.v v9, (a6)
954 ; RV64ZVE32F-NEXT: andi a0, a3, 16
955 ; RV64ZVE32F-NEXT: beqz a0, .LBB17_5
956 ; RV64ZVE32F-NEXT: .LBB17_13: # %cond.store7
957 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
958 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
959 ; RV64ZVE32F-NEXT: vse16.v v9, (a5)
960 ; RV64ZVE32F-NEXT: andi a0, a3, 32
961 ; RV64ZVE32F-NEXT: beqz a0, .LBB17_6
962 ; RV64ZVE32F-NEXT: .LBB17_14: # %cond.store9
963 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
964 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5
965 ; RV64ZVE32F-NEXT: vse16.v v9, (a4)
966 ; RV64ZVE32F-NEXT: andi a0, a3, 64
967 ; RV64ZVE32F-NEXT: beqz a0, .LBB17_7
968 ; RV64ZVE32F-NEXT: .LBB17_15: # %cond.store11
969 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
970 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 6
971 ; RV64ZVE32F-NEXT: vse16.v v9, (a2)
972 ; RV64ZVE32F-NEXT: andi a0, a3, -128
973 ; RV64ZVE32F-NEXT: beqz a0, .LBB17_8
974 ; RV64ZVE32F-NEXT: .LBB17_16: # %cond.store13
975 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
976 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
977 ; RV64ZVE32F-NEXT: vse16.v v8, (a1)
978 ; RV64ZVE32F-NEXT: ret
979 call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> %m)
983 define void @mscatter_baseidx_v8i8_v8i16(<8 x i16> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) {
984 ; RV32-LABEL: mscatter_baseidx_v8i8_v8i16:
986 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
987 ; RV32-NEXT: vsext.vf4 v10, v9
988 ; RV32-NEXT: vadd.vv v10, v10, v10
989 ; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma
990 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
993 ; RV64-LABEL: mscatter_baseidx_v8i8_v8i16:
995 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
996 ; RV64-NEXT: vsext.vf8 v12, v9
997 ; RV64-NEXT: vadd.vv v12, v12, v12
998 ; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, ma
999 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
1002 ; RV64ZVE32F-LABEL: mscatter_baseidx_v8i8_v8i16:
1003 ; RV64ZVE32F: # %bb.0:
1004 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
1005 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
1006 ; RV64ZVE32F-NEXT: andi a2, a1, 1
1007 ; RV64ZVE32F-NEXT: beqz a2, .LBB18_2
1008 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
1009 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
1010 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1011 ; RV64ZVE32F-NEXT: add a2, a0, a2
1012 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1013 ; RV64ZVE32F-NEXT: vse16.v v8, (a2)
1014 ; RV64ZVE32F-NEXT: .LBB18_2: # %else
1015 ; RV64ZVE32F-NEXT: andi a2, a1, 2
1016 ; RV64ZVE32F-NEXT: beqz a2, .LBB18_4
1017 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
1018 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1019 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1
1020 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
1021 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1022 ; RV64ZVE32F-NEXT: add a2, a0, a2
1023 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1024 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
1025 ; RV64ZVE32F-NEXT: vse16.v v10, (a2)
1026 ; RV64ZVE32F-NEXT: .LBB18_4: # %else2
1027 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
1028 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4
1029 ; RV64ZVE32F-NEXT: andi a2, a1, 4
1030 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
1031 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2
1032 ; RV64ZVE32F-NEXT: bnez a2, .LBB18_12
1033 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
1034 ; RV64ZVE32F-NEXT: andi a2, a1, 8
1035 ; RV64ZVE32F-NEXT: bnez a2, .LBB18_13
1036 ; RV64ZVE32F-NEXT: .LBB18_6: # %else6
1037 ; RV64ZVE32F-NEXT: andi a2, a1, 16
1038 ; RV64ZVE32F-NEXT: bnez a2, .LBB18_14
1039 ; RV64ZVE32F-NEXT: .LBB18_7: # %else8
1040 ; RV64ZVE32F-NEXT: andi a2, a1, 32
1041 ; RV64ZVE32F-NEXT: beqz a2, .LBB18_9
1042 ; RV64ZVE32F-NEXT: .LBB18_8: # %cond.store9
1043 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1044 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 1
1045 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
1046 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1047 ; RV64ZVE32F-NEXT: add a2, a0, a2
1048 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1049 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5
1050 ; RV64ZVE32F-NEXT: vse16.v v9, (a2)
1051 ; RV64ZVE32F-NEXT: .LBB18_9: # %else10
1052 ; RV64ZVE32F-NEXT: andi a2, a1, 64
1053 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
1054 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 2
1055 ; RV64ZVE32F-NEXT: bnez a2, .LBB18_15
1056 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
1057 ; RV64ZVE32F-NEXT: andi a1, a1, -128
1058 ; RV64ZVE32F-NEXT: bnez a1, .LBB18_16
1059 ; RV64ZVE32F-NEXT: .LBB18_11: # %else14
1060 ; RV64ZVE32F-NEXT: ret
1061 ; RV64ZVE32F-NEXT: .LBB18_12: # %cond.store3
1062 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
1063 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1064 ; RV64ZVE32F-NEXT: add a2, a0, a2
1065 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1066 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2
1067 ; RV64ZVE32F-NEXT: vse16.v v11, (a2)
1068 ; RV64ZVE32F-NEXT: andi a2, a1, 8
1069 ; RV64ZVE32F-NEXT: beqz a2, .LBB18_6
1070 ; RV64ZVE32F-NEXT: .LBB18_13: # %cond.store5
1071 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1072 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
1073 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
1074 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1075 ; RV64ZVE32F-NEXT: add a2, a0, a2
1076 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1077 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 3
1078 ; RV64ZVE32F-NEXT: vse16.v v9, (a2)
1079 ; RV64ZVE32F-NEXT: andi a2, a1, 16
1080 ; RV64ZVE32F-NEXT: beqz a2, .LBB18_7
1081 ; RV64ZVE32F-NEXT: .LBB18_14: # %cond.store7
1082 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
1083 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
1084 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1085 ; RV64ZVE32F-NEXT: add a2, a0, a2
1086 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1087 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
1088 ; RV64ZVE32F-NEXT: vse16.v v9, (a2)
1089 ; RV64ZVE32F-NEXT: andi a2, a1, 32
1090 ; RV64ZVE32F-NEXT: bnez a2, .LBB18_8
1091 ; RV64ZVE32F-NEXT: j .LBB18_9
1092 ; RV64ZVE32F-NEXT: .LBB18_15: # %cond.store11
1093 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
1094 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1095 ; RV64ZVE32F-NEXT: add a2, a0, a2
1096 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1097 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 6
1098 ; RV64ZVE32F-NEXT: vse16.v v10, (a2)
1099 ; RV64ZVE32F-NEXT: andi a1, a1, -128
1100 ; RV64ZVE32F-NEXT: beqz a1, .LBB18_11
1101 ; RV64ZVE32F-NEXT: .LBB18_16: # %cond.store13
1102 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1103 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
1104 ; RV64ZVE32F-NEXT: vmv.x.s a1, v9
1105 ; RV64ZVE32F-NEXT: slli a1, a1, 1
1106 ; RV64ZVE32F-NEXT: add a0, a0, a1
1107 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1108 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
1109 ; RV64ZVE32F-NEXT: vse16.v v8, (a0)
1110 ; RV64ZVE32F-NEXT: ret
1111 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i8> %idxs
1112 call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> %m)
1116 define void @mscatter_baseidx_sext_v8i8_v8i16(<8 x i16> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) {
1117 ; RV32-LABEL: mscatter_baseidx_sext_v8i8_v8i16:
1119 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1120 ; RV32-NEXT: vsext.vf4 v10, v9
1121 ; RV32-NEXT: vadd.vv v10, v10, v10
1122 ; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma
1123 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
1126 ; RV64-LABEL: mscatter_baseidx_sext_v8i8_v8i16:
1128 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
1129 ; RV64-NEXT: vsext.vf8 v12, v9
1130 ; RV64-NEXT: vadd.vv v12, v12, v12
1131 ; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, ma
1132 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
1135 ; RV64ZVE32F-LABEL: mscatter_baseidx_sext_v8i8_v8i16:
1136 ; RV64ZVE32F: # %bb.0:
1137 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
1138 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
1139 ; RV64ZVE32F-NEXT: andi a2, a1, 1
1140 ; RV64ZVE32F-NEXT: beqz a2, .LBB19_2
1141 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
1142 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
1143 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1144 ; RV64ZVE32F-NEXT: add a2, a0, a2
1145 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1146 ; RV64ZVE32F-NEXT: vse16.v v8, (a2)
1147 ; RV64ZVE32F-NEXT: .LBB19_2: # %else
1148 ; RV64ZVE32F-NEXT: andi a2, a1, 2
1149 ; RV64ZVE32F-NEXT: beqz a2, .LBB19_4
1150 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
1151 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1152 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1
1153 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
1154 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1155 ; RV64ZVE32F-NEXT: add a2, a0, a2
1156 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1157 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
1158 ; RV64ZVE32F-NEXT: vse16.v v10, (a2)
1159 ; RV64ZVE32F-NEXT: .LBB19_4: # %else2
1160 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
1161 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4
1162 ; RV64ZVE32F-NEXT: andi a2, a1, 4
1163 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
1164 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2
1165 ; RV64ZVE32F-NEXT: bnez a2, .LBB19_12
1166 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
1167 ; RV64ZVE32F-NEXT: andi a2, a1, 8
1168 ; RV64ZVE32F-NEXT: bnez a2, .LBB19_13
1169 ; RV64ZVE32F-NEXT: .LBB19_6: # %else6
1170 ; RV64ZVE32F-NEXT: andi a2, a1, 16
1171 ; RV64ZVE32F-NEXT: bnez a2, .LBB19_14
1172 ; RV64ZVE32F-NEXT: .LBB19_7: # %else8
1173 ; RV64ZVE32F-NEXT: andi a2, a1, 32
1174 ; RV64ZVE32F-NEXT: beqz a2, .LBB19_9
1175 ; RV64ZVE32F-NEXT: .LBB19_8: # %cond.store9
1176 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1177 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 1
1178 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
1179 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1180 ; RV64ZVE32F-NEXT: add a2, a0, a2
1181 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1182 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5
1183 ; RV64ZVE32F-NEXT: vse16.v v9, (a2)
1184 ; RV64ZVE32F-NEXT: .LBB19_9: # %else10
1185 ; RV64ZVE32F-NEXT: andi a2, a1, 64
1186 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
1187 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 2
1188 ; RV64ZVE32F-NEXT: bnez a2, .LBB19_15
1189 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
1190 ; RV64ZVE32F-NEXT: andi a1, a1, -128
1191 ; RV64ZVE32F-NEXT: bnez a1, .LBB19_16
1192 ; RV64ZVE32F-NEXT: .LBB19_11: # %else14
1193 ; RV64ZVE32F-NEXT: ret
1194 ; RV64ZVE32F-NEXT: .LBB19_12: # %cond.store3
1195 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
1196 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1197 ; RV64ZVE32F-NEXT: add a2, a0, a2
1198 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1199 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2
1200 ; RV64ZVE32F-NEXT: vse16.v v11, (a2)
1201 ; RV64ZVE32F-NEXT: andi a2, a1, 8
1202 ; RV64ZVE32F-NEXT: beqz a2, .LBB19_6
1203 ; RV64ZVE32F-NEXT: .LBB19_13: # %cond.store5
1204 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1205 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
1206 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
1207 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1208 ; RV64ZVE32F-NEXT: add a2, a0, a2
1209 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1210 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 3
1211 ; RV64ZVE32F-NEXT: vse16.v v9, (a2)
1212 ; RV64ZVE32F-NEXT: andi a2, a1, 16
1213 ; RV64ZVE32F-NEXT: beqz a2, .LBB19_7
1214 ; RV64ZVE32F-NEXT: .LBB19_14: # %cond.store7
1215 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
1216 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
1217 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1218 ; RV64ZVE32F-NEXT: add a2, a0, a2
1219 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1220 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
1221 ; RV64ZVE32F-NEXT: vse16.v v9, (a2)
1222 ; RV64ZVE32F-NEXT: andi a2, a1, 32
1223 ; RV64ZVE32F-NEXT: bnez a2, .LBB19_8
1224 ; RV64ZVE32F-NEXT: j .LBB19_9
1225 ; RV64ZVE32F-NEXT: .LBB19_15: # %cond.store11
1226 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
1227 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1228 ; RV64ZVE32F-NEXT: add a2, a0, a2
1229 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1230 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 6
1231 ; RV64ZVE32F-NEXT: vse16.v v10, (a2)
1232 ; RV64ZVE32F-NEXT: andi a1, a1, -128
1233 ; RV64ZVE32F-NEXT: beqz a1, .LBB19_11
1234 ; RV64ZVE32F-NEXT: .LBB19_16: # %cond.store13
1235 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1236 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
1237 ; RV64ZVE32F-NEXT: vmv.x.s a1, v9
1238 ; RV64ZVE32F-NEXT: slli a1, a1, 1
1239 ; RV64ZVE32F-NEXT: add a0, a0, a1
1240 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1241 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
1242 ; RV64ZVE32F-NEXT: vse16.v v8, (a0)
1243 ; RV64ZVE32F-NEXT: ret
1244 %eidxs = sext <8 x i8> %idxs to <8 x i16>
1245 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i16> %eidxs
1246 call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> %m)
1250 define void @mscatter_baseidx_zext_v8i8_v8i16(<8 x i16> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) {
1251 ; RV32-LABEL: mscatter_baseidx_zext_v8i8_v8i16:
1253 ; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
1254 ; RV32-NEXT: vwaddu.vv v10, v9, v9
1255 ; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma
1256 ; RV32-NEXT: vsoxei16.v v8, (a0), v10, v0.t
1259 ; RV64-LABEL: mscatter_baseidx_zext_v8i8_v8i16:
1261 ; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
1262 ; RV64-NEXT: vwaddu.vv v10, v9, v9
1263 ; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, ma
1264 ; RV64-NEXT: vsoxei16.v v8, (a0), v10, v0.t
1267 ; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i8_v8i16:
1268 ; RV64ZVE32F: # %bb.0:
1269 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
1270 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
1271 ; RV64ZVE32F-NEXT: andi a2, a1, 1
1272 ; RV64ZVE32F-NEXT: beqz a2, .LBB20_2
1273 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
1274 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
1275 ; RV64ZVE32F-NEXT: andi a2, a2, 255
1276 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1277 ; RV64ZVE32F-NEXT: add a2, a0, a2
1278 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1279 ; RV64ZVE32F-NEXT: vse16.v v8, (a2)
1280 ; RV64ZVE32F-NEXT: .LBB20_2: # %else
1281 ; RV64ZVE32F-NEXT: andi a2, a1, 2
1282 ; RV64ZVE32F-NEXT: beqz a2, .LBB20_4
1283 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
1284 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1285 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1
1286 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
1287 ; RV64ZVE32F-NEXT: andi a2, a2, 255
1288 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1289 ; RV64ZVE32F-NEXT: add a2, a0, a2
1290 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1291 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
1292 ; RV64ZVE32F-NEXT: vse16.v v10, (a2)
1293 ; RV64ZVE32F-NEXT: .LBB20_4: # %else2
1294 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
1295 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4
1296 ; RV64ZVE32F-NEXT: andi a2, a1, 4
1297 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
1298 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2
1299 ; RV64ZVE32F-NEXT: bnez a2, .LBB20_12
1300 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
1301 ; RV64ZVE32F-NEXT: andi a2, a1, 8
1302 ; RV64ZVE32F-NEXT: bnez a2, .LBB20_13
1303 ; RV64ZVE32F-NEXT: .LBB20_6: # %else6
1304 ; RV64ZVE32F-NEXT: andi a2, a1, 16
1305 ; RV64ZVE32F-NEXT: bnez a2, .LBB20_14
1306 ; RV64ZVE32F-NEXT: .LBB20_7: # %else8
1307 ; RV64ZVE32F-NEXT: andi a2, a1, 32
1308 ; RV64ZVE32F-NEXT: beqz a2, .LBB20_9
1309 ; RV64ZVE32F-NEXT: .LBB20_8: # %cond.store9
1310 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1311 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 1
1312 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
1313 ; RV64ZVE32F-NEXT: andi a2, a2, 255
1314 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1315 ; RV64ZVE32F-NEXT: add a2, a0, a2
1316 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1317 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5
1318 ; RV64ZVE32F-NEXT: vse16.v v9, (a2)
1319 ; RV64ZVE32F-NEXT: .LBB20_9: # %else10
1320 ; RV64ZVE32F-NEXT: andi a2, a1, 64
1321 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
1322 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 2
1323 ; RV64ZVE32F-NEXT: bnez a2, .LBB20_15
1324 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
1325 ; RV64ZVE32F-NEXT: andi a1, a1, -128
1326 ; RV64ZVE32F-NEXT: bnez a1, .LBB20_16
1327 ; RV64ZVE32F-NEXT: .LBB20_11: # %else14
1328 ; RV64ZVE32F-NEXT: ret
1329 ; RV64ZVE32F-NEXT: .LBB20_12: # %cond.store3
1330 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
1331 ; RV64ZVE32F-NEXT: andi a2, a2, 255
1332 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1333 ; RV64ZVE32F-NEXT: add a2, a0, a2
1334 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1335 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2
1336 ; RV64ZVE32F-NEXT: vse16.v v11, (a2)
1337 ; RV64ZVE32F-NEXT: andi a2, a1, 8
1338 ; RV64ZVE32F-NEXT: beqz a2, .LBB20_6
1339 ; RV64ZVE32F-NEXT: .LBB20_13: # %cond.store5
1340 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1341 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
1342 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
1343 ; RV64ZVE32F-NEXT: andi a2, a2, 255
1344 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1345 ; RV64ZVE32F-NEXT: add a2, a0, a2
1346 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1347 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 3
1348 ; RV64ZVE32F-NEXT: vse16.v v9, (a2)
1349 ; RV64ZVE32F-NEXT: andi a2, a1, 16
1350 ; RV64ZVE32F-NEXT: beqz a2, .LBB20_7
1351 ; RV64ZVE32F-NEXT: .LBB20_14: # %cond.store7
1352 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
1353 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
1354 ; RV64ZVE32F-NEXT: andi a2, a2, 255
1355 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1356 ; RV64ZVE32F-NEXT: add a2, a0, a2
1357 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1358 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
1359 ; RV64ZVE32F-NEXT: vse16.v v9, (a2)
1360 ; RV64ZVE32F-NEXT: andi a2, a1, 32
1361 ; RV64ZVE32F-NEXT: bnez a2, .LBB20_8
1362 ; RV64ZVE32F-NEXT: j .LBB20_9
1363 ; RV64ZVE32F-NEXT: .LBB20_15: # %cond.store11
1364 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
1365 ; RV64ZVE32F-NEXT: andi a2, a2, 255
1366 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1367 ; RV64ZVE32F-NEXT: add a2, a0, a2
1368 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1369 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 6
1370 ; RV64ZVE32F-NEXT: vse16.v v10, (a2)
1371 ; RV64ZVE32F-NEXT: andi a1, a1, -128
1372 ; RV64ZVE32F-NEXT: beqz a1, .LBB20_11
1373 ; RV64ZVE32F-NEXT: .LBB20_16: # %cond.store13
1374 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1375 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
1376 ; RV64ZVE32F-NEXT: vmv.x.s a1, v9
1377 ; RV64ZVE32F-NEXT: andi a1, a1, 255
1378 ; RV64ZVE32F-NEXT: slli a1, a1, 1
1379 ; RV64ZVE32F-NEXT: add a0, a0, a1
1380 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1381 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
1382 ; RV64ZVE32F-NEXT: vse16.v v8, (a0)
1383 ; RV64ZVE32F-NEXT: ret
1384 %eidxs = zext <8 x i8> %idxs to <8 x i16>
1385 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i16> %eidxs
1386 call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> %m)
1390 define void @mscatter_baseidx_v8i16(<8 x i16> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m) {
1391 ; RV32-LABEL: mscatter_baseidx_v8i16:
1393 ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1394 ; RV32-NEXT: vwadd.vv v10, v9, v9
1395 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
1398 ; RV64-LABEL: mscatter_baseidx_v8i16:
1400 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
1401 ; RV64-NEXT: vsext.vf4 v12, v9
1402 ; RV64-NEXT: vadd.vv v12, v12, v12
1403 ; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, ma
1404 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
1407 ; RV64ZVE32F-LABEL: mscatter_baseidx_v8i16:
1408 ; RV64ZVE32F: # %bb.0:
1409 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
1410 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
1411 ; RV64ZVE32F-NEXT: andi a2, a1, 1
1412 ; RV64ZVE32F-NEXT: beqz a2, .LBB21_2
1413 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
1414 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1415 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
1416 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1417 ; RV64ZVE32F-NEXT: add a2, a0, a2
1418 ; RV64ZVE32F-NEXT: vse16.v v8, (a2)
1419 ; RV64ZVE32F-NEXT: .LBB21_2: # %else
1420 ; RV64ZVE32F-NEXT: andi a2, a1, 2
1421 ; RV64ZVE32F-NEXT: beqz a2, .LBB21_4
1422 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
1423 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
1424 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1
1425 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
1426 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1427 ; RV64ZVE32F-NEXT: add a2, a0, a2
1428 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1429 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
1430 ; RV64ZVE32F-NEXT: vse16.v v10, (a2)
1431 ; RV64ZVE32F-NEXT: .LBB21_4: # %else2
1432 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
1433 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4
1434 ; RV64ZVE32F-NEXT: andi a2, a1, 4
1435 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
1436 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2
1437 ; RV64ZVE32F-NEXT: bnez a2, .LBB21_12
1438 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
1439 ; RV64ZVE32F-NEXT: andi a2, a1, 8
1440 ; RV64ZVE32F-NEXT: bnez a2, .LBB21_13
1441 ; RV64ZVE32F-NEXT: .LBB21_6: # %else6
1442 ; RV64ZVE32F-NEXT: andi a2, a1, 16
1443 ; RV64ZVE32F-NEXT: bnez a2, .LBB21_14
1444 ; RV64ZVE32F-NEXT: .LBB21_7: # %else8
1445 ; RV64ZVE32F-NEXT: andi a2, a1, 32
1446 ; RV64ZVE32F-NEXT: beqz a2, .LBB21_9
1447 ; RV64ZVE32F-NEXT: .LBB21_8: # %cond.store9
1448 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
1449 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 1
1450 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
1451 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1452 ; RV64ZVE32F-NEXT: add a2, a0, a2
1453 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1454 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5
1455 ; RV64ZVE32F-NEXT: vse16.v v9, (a2)
1456 ; RV64ZVE32F-NEXT: .LBB21_9: # %else10
1457 ; RV64ZVE32F-NEXT: andi a2, a1, 64
1458 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
1459 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 2
1460 ; RV64ZVE32F-NEXT: bnez a2, .LBB21_15
1461 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
1462 ; RV64ZVE32F-NEXT: andi a1, a1, -128
1463 ; RV64ZVE32F-NEXT: bnez a1, .LBB21_16
1464 ; RV64ZVE32F-NEXT: .LBB21_11: # %else14
1465 ; RV64ZVE32F-NEXT: ret
1466 ; RV64ZVE32F-NEXT: .LBB21_12: # %cond.store3
1467 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
1468 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1469 ; RV64ZVE32F-NEXT: add a2, a0, a2
1470 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1471 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2
1472 ; RV64ZVE32F-NEXT: vse16.v v11, (a2)
1473 ; RV64ZVE32F-NEXT: andi a2, a1, 8
1474 ; RV64ZVE32F-NEXT: beqz a2, .LBB21_6
1475 ; RV64ZVE32F-NEXT: .LBB21_13: # %cond.store5
1476 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
1477 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
1478 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
1479 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1480 ; RV64ZVE32F-NEXT: add a2, a0, a2
1481 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1482 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 3
1483 ; RV64ZVE32F-NEXT: vse16.v v9, (a2)
1484 ; RV64ZVE32F-NEXT: andi a2, a1, 16
1485 ; RV64ZVE32F-NEXT: beqz a2, .LBB21_7
1486 ; RV64ZVE32F-NEXT: .LBB21_14: # %cond.store7
1487 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1488 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
1489 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1490 ; RV64ZVE32F-NEXT: add a2, a0, a2
1491 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
1492 ; RV64ZVE32F-NEXT: vse16.v v9, (a2)
1493 ; RV64ZVE32F-NEXT: andi a2, a1, 32
1494 ; RV64ZVE32F-NEXT: bnez a2, .LBB21_8
1495 ; RV64ZVE32F-NEXT: j .LBB21_9
1496 ; RV64ZVE32F-NEXT: .LBB21_15: # %cond.store11
1497 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
1498 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1499 ; RV64ZVE32F-NEXT: add a2, a0, a2
1500 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1501 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 6
1502 ; RV64ZVE32F-NEXT: vse16.v v10, (a2)
1503 ; RV64ZVE32F-NEXT: andi a1, a1, -128
1504 ; RV64ZVE32F-NEXT: beqz a1, .LBB21_11
1505 ; RV64ZVE32F-NEXT: .LBB21_16: # %cond.store13
1506 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
1507 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
1508 ; RV64ZVE32F-NEXT: vmv.x.s a1, v9
1509 ; RV64ZVE32F-NEXT: slli a1, a1, 1
1510 ; RV64ZVE32F-NEXT: add a0, a0, a1
1511 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1512 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
1513 ; RV64ZVE32F-NEXT: vse16.v v8, (a0)
1514 ; RV64ZVE32F-NEXT: ret
1515 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i16> %idxs
1516 call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> %m)
1520 declare void @llvm.masked.scatter.v1i32.v1p0(<1 x i32>, <1 x ptr>, i32, <1 x i1>)
1522 define void @mscatter_v1i32(<1 x i32> %val, <1 x ptr> %ptrs, <1 x i1> %m) {
1523 ; RV32V-LABEL: mscatter_v1i32:
1525 ; RV32V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
1526 ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t
1529 ; RV64-LABEL: mscatter_v1i32:
1531 ; RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
1532 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
1535 ; RV32ZVE32F-LABEL: mscatter_v1i32:
1536 ; RV32ZVE32F: # %bb.0:
1537 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1538 ; RV32ZVE32F-NEXT: vsoxei32.v v8, (zero), v9, v0.t
1539 ; RV32ZVE32F-NEXT: ret
1541 ; RV64ZVE32F-LABEL: mscatter_v1i32:
1542 ; RV64ZVE32F: # %bb.0:
1543 ; RV64ZVE32F-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
1544 ; RV64ZVE32F-NEXT: vfirst.m a1, v0
1545 ; RV64ZVE32F-NEXT: bnez a1, .LBB22_2
1546 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
1547 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1548 ; RV64ZVE32F-NEXT: vse32.v v8, (a0)
1549 ; RV64ZVE32F-NEXT: .LBB22_2: # %else
1550 ; RV64ZVE32F-NEXT: ret
1551 call void @llvm.masked.scatter.v1i32.v1p0(<1 x i32> %val, <1 x ptr> %ptrs, i32 4, <1 x i1> %m)
1555 declare void @llvm.masked.scatter.v2i32.v2p0(<2 x i32>, <2 x ptr>, i32, <2 x i1>)
1557 define void @mscatter_v2i32(<2 x i32> %val, <2 x ptr> %ptrs, <2 x i1> %m) {
1558 ; RV32V-LABEL: mscatter_v2i32:
1560 ; RV32V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
1561 ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t
1564 ; RV64-LABEL: mscatter_v2i32:
1566 ; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
1567 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
1570 ; RV32ZVE32F-LABEL: mscatter_v2i32:
1571 ; RV32ZVE32F: # %bb.0:
1572 ; RV32ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
1573 ; RV32ZVE32F-NEXT: vsoxei32.v v8, (zero), v9, v0.t
1574 ; RV32ZVE32F-NEXT: ret
1576 ; RV64ZVE32F-LABEL: mscatter_v2i32:
1577 ; RV64ZVE32F: # %bb.0:
1578 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
1579 ; RV64ZVE32F-NEXT: vmv.x.s a2, v0
1580 ; RV64ZVE32F-NEXT: andi a3, a2, 1
1581 ; RV64ZVE32F-NEXT: bnez a3, .LBB23_3
1582 ; RV64ZVE32F-NEXT: # %bb.1: # %else
1583 ; RV64ZVE32F-NEXT: andi a2, a2, 2
1584 ; RV64ZVE32F-NEXT: bnez a2, .LBB23_4
1585 ; RV64ZVE32F-NEXT: .LBB23_2: # %else2
1586 ; RV64ZVE32F-NEXT: ret
1587 ; RV64ZVE32F-NEXT: .LBB23_3: # %cond.store
1588 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1589 ; RV64ZVE32F-NEXT: vse32.v v8, (a0)
1590 ; RV64ZVE32F-NEXT: andi a2, a2, 2
1591 ; RV64ZVE32F-NEXT: beqz a2, .LBB23_2
1592 ; RV64ZVE32F-NEXT: .LBB23_4: # %cond.store1
1593 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1594 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
1595 ; RV64ZVE32F-NEXT: vse32.v v8, (a1)
1596 ; RV64ZVE32F-NEXT: ret
1597 call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> %val, <2 x ptr> %ptrs, i32 4, <2 x i1> %m)
1601 define void @mscatter_v2i64_truncstore_v2i32(<2 x i64> %val, <2 x ptr> %ptrs, <2 x i1> %m) {
1602 ; RV32V-LABEL: mscatter_v2i64_truncstore_v2i32:
1604 ; RV32V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
1605 ; RV32V-NEXT: vnsrl.wi v8, v8, 0
1606 ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t
1609 ; RV64-LABEL: mscatter_v2i64_truncstore_v2i32:
1611 ; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
1612 ; RV64-NEXT: vnsrl.wi v8, v8, 0
1613 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
1616 ; RV32ZVE32F-LABEL: mscatter_v2i64_truncstore_v2i32:
1617 ; RV32ZVE32F: # %bb.0:
1618 ; RV32ZVE32F-NEXT: lw a1, 0(a0)
1619 ; RV32ZVE32F-NEXT: lw a0, 8(a0)
1620 ; RV32ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
1621 ; RV32ZVE32F-NEXT: vmv.v.x v9, a1
1622 ; RV32ZVE32F-NEXT: vslide1down.vx v9, v9, a0
1623 ; RV32ZVE32F-NEXT: vsoxei32.v v9, (zero), v8, v0.t
1624 ; RV32ZVE32F-NEXT: ret
1626 ; RV64ZVE32F-LABEL: mscatter_v2i64_truncstore_v2i32:
1627 ; RV64ZVE32F: # %bb.0:
1628 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
1629 ; RV64ZVE32F-NEXT: vmv.v.x v8, a0
1630 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
1631 ; RV64ZVE32F-NEXT: vmv.x.s a0, v0
1632 ; RV64ZVE32F-NEXT: andi a4, a0, 1
1633 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
1634 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a1
1635 ; RV64ZVE32F-NEXT: bnez a4, .LBB24_3
1636 ; RV64ZVE32F-NEXT: # %bb.1: # %else
1637 ; RV64ZVE32F-NEXT: andi a0, a0, 2
1638 ; RV64ZVE32F-NEXT: bnez a0, .LBB24_4
1639 ; RV64ZVE32F-NEXT: .LBB24_2: # %else2
1640 ; RV64ZVE32F-NEXT: ret
1641 ; RV64ZVE32F-NEXT: .LBB24_3: # %cond.store
1642 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1643 ; RV64ZVE32F-NEXT: vse32.v v8, (a2)
1644 ; RV64ZVE32F-NEXT: andi a0, a0, 2
1645 ; RV64ZVE32F-NEXT: beqz a0, .LBB24_2
1646 ; RV64ZVE32F-NEXT: .LBB24_4: # %cond.store1
1647 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1648 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
1649 ; RV64ZVE32F-NEXT: vse32.v v8, (a3)
1650 ; RV64ZVE32F-NEXT: ret
1651 %tval = trunc <2 x i64> %val to <2 x i32>
1652 call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> %tval, <2 x ptr> %ptrs, i32 4, <2 x i1> %m)
1656 declare void @llvm.masked.scatter.v4i32.v4p0(<4 x i32>, <4 x ptr>, i32, <4 x i1>)
1658 define void @mscatter_v4i32(<4 x i32> %val, <4 x ptr> %ptrs, <4 x i1> %m) {
1659 ; RV32-LABEL: mscatter_v4i32:
1661 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1662 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
1665 ; RV64-LABEL: mscatter_v4i32:
1667 ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1668 ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
1671 ; RV64ZVE32F-LABEL: mscatter_v4i32:
1672 ; RV64ZVE32F: # %bb.0:
1673 ; RV64ZVE32F-NEXT: ld a1, 24(a0)
1674 ; RV64ZVE32F-NEXT: ld a2, 16(a0)
1675 ; RV64ZVE32F-NEXT: ld a4, 8(a0)
1676 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
1677 ; RV64ZVE32F-NEXT: vmv.x.s a3, v0
1678 ; RV64ZVE32F-NEXT: andi a5, a3, 1
1679 ; RV64ZVE32F-NEXT: bnez a5, .LBB25_5
1680 ; RV64ZVE32F-NEXT: # %bb.1: # %else
1681 ; RV64ZVE32F-NEXT: andi a0, a3, 2
1682 ; RV64ZVE32F-NEXT: bnez a0, .LBB25_6
1683 ; RV64ZVE32F-NEXT: .LBB25_2: # %else2
1684 ; RV64ZVE32F-NEXT: andi a0, a3, 4
1685 ; RV64ZVE32F-NEXT: bnez a0, .LBB25_7
1686 ; RV64ZVE32F-NEXT: .LBB25_3: # %else4
1687 ; RV64ZVE32F-NEXT: andi a3, a3, 8
1688 ; RV64ZVE32F-NEXT: bnez a3, .LBB25_8
1689 ; RV64ZVE32F-NEXT: .LBB25_4: # %else6
1690 ; RV64ZVE32F-NEXT: ret
1691 ; RV64ZVE32F-NEXT: .LBB25_5: # %cond.store
1692 ; RV64ZVE32F-NEXT: ld a0, 0(a0)
1693 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1694 ; RV64ZVE32F-NEXT: vse32.v v8, (a0)
1695 ; RV64ZVE32F-NEXT: andi a0, a3, 2
1696 ; RV64ZVE32F-NEXT: beqz a0, .LBB25_2
1697 ; RV64ZVE32F-NEXT: .LBB25_6: # %cond.store1
1698 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1699 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
1700 ; RV64ZVE32F-NEXT: vse32.v v9, (a4)
1701 ; RV64ZVE32F-NEXT: andi a0, a3, 4
1702 ; RV64ZVE32F-NEXT: beqz a0, .LBB25_3
1703 ; RV64ZVE32F-NEXT: .LBB25_7: # %cond.store3
1704 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1705 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
1706 ; RV64ZVE32F-NEXT: vse32.v v9, (a2)
1707 ; RV64ZVE32F-NEXT: andi a3, a3, 8
1708 ; RV64ZVE32F-NEXT: beqz a3, .LBB25_4
1709 ; RV64ZVE32F-NEXT: .LBB25_8: # %cond.store5
1710 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1711 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 3
1712 ; RV64ZVE32F-NEXT: vse32.v v8, (a1)
1713 ; RV64ZVE32F-NEXT: ret
1714 call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %val, <4 x ptr> %ptrs, i32 4, <4 x i1> %m)
1718 define void @mscatter_truemask_v4i32(<4 x i32> %val, <4 x ptr> %ptrs) {
1719 ; RV32-LABEL: mscatter_truemask_v4i32:
1721 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1722 ; RV32-NEXT: vsoxei32.v v8, (zero), v9
1725 ; RV64-LABEL: mscatter_truemask_v4i32:
1727 ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1728 ; RV64-NEXT: vsoxei64.v v8, (zero), v10
1731 ; RV64ZVE32F-LABEL: mscatter_truemask_v4i32:
1732 ; RV64ZVE32F: # %bb.0:
1733 ; RV64ZVE32F-NEXT: ld a1, 0(a0)
1734 ; RV64ZVE32F-NEXT: ld a2, 24(a0)
1735 ; RV64ZVE32F-NEXT: ld a3, 8(a0)
1736 ; RV64ZVE32F-NEXT: ld a0, 16(a0)
1737 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1738 ; RV64ZVE32F-NEXT: vse32.v v8, (a1)
1739 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
1740 ; RV64ZVE32F-NEXT: vse32.v v9, (a3)
1741 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
1742 ; RV64ZVE32F-NEXT: vse32.v v9, (a0)
1743 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 3
1744 ; RV64ZVE32F-NEXT: vse32.v v8, (a2)
1745 ; RV64ZVE32F-NEXT: ret
1746 call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %val, <4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 1))
1750 define void @mscatter_falsemask_v4i32(<4 x i32> %val, <4 x ptr> %ptrs) {
1751 ; CHECK-LABEL: mscatter_falsemask_v4i32:
1754 call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %val, <4 x ptr> %ptrs, i32 4, <4 x i1> zeroinitializer)
1758 declare void @llvm.masked.scatter.v8i32.v8p0(<8 x i32>, <8 x ptr>, i32, <8 x i1>)
1760 define void @mscatter_v8i32(<8 x i32> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
1761 ; RV32-LABEL: mscatter_v8i32:
1763 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1764 ; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t
1767 ; RV64-LABEL: mscatter_v8i32:
1769 ; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1770 ; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t
1773 ; RV64ZVE32F-LABEL: mscatter_v8i32:
1774 ; RV64ZVE32F: # %bb.0:
1775 ; RV64ZVE32F-NEXT: ld a1, 56(a0)
1776 ; RV64ZVE32F-NEXT: ld a2, 48(a0)
1777 ; RV64ZVE32F-NEXT: ld a4, 40(a0)
1778 ; RV64ZVE32F-NEXT: ld a5, 32(a0)
1779 ; RV64ZVE32F-NEXT: ld a6, 24(a0)
1780 ; RV64ZVE32F-NEXT: ld a7, 16(a0)
1781 ; RV64ZVE32F-NEXT: ld t0, 8(a0)
1782 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
1783 ; RV64ZVE32F-NEXT: vmv.x.s a3, v0
1784 ; RV64ZVE32F-NEXT: andi t1, a3, 1
1785 ; RV64ZVE32F-NEXT: bnez t1, .LBB28_9
1786 ; RV64ZVE32F-NEXT: # %bb.1: # %else
1787 ; RV64ZVE32F-NEXT: andi a0, a3, 2
1788 ; RV64ZVE32F-NEXT: bnez a0, .LBB28_10
1789 ; RV64ZVE32F-NEXT: .LBB28_2: # %else2
1790 ; RV64ZVE32F-NEXT: andi a0, a3, 4
1791 ; RV64ZVE32F-NEXT: bnez a0, .LBB28_11
1792 ; RV64ZVE32F-NEXT: .LBB28_3: # %else4
1793 ; RV64ZVE32F-NEXT: andi a0, a3, 8
1794 ; RV64ZVE32F-NEXT: bnez a0, .LBB28_12
1795 ; RV64ZVE32F-NEXT: .LBB28_4: # %else6
1796 ; RV64ZVE32F-NEXT: andi a0, a3, 16
1797 ; RV64ZVE32F-NEXT: bnez a0, .LBB28_13
1798 ; RV64ZVE32F-NEXT: .LBB28_5: # %else8
1799 ; RV64ZVE32F-NEXT: andi a0, a3, 32
1800 ; RV64ZVE32F-NEXT: bnez a0, .LBB28_14
1801 ; RV64ZVE32F-NEXT: .LBB28_6: # %else10
1802 ; RV64ZVE32F-NEXT: andi a0, a3, 64
1803 ; RV64ZVE32F-NEXT: bnez a0, .LBB28_15
1804 ; RV64ZVE32F-NEXT: .LBB28_7: # %else12
1805 ; RV64ZVE32F-NEXT: andi a0, a3, -128
1806 ; RV64ZVE32F-NEXT: bnez a0, .LBB28_16
1807 ; RV64ZVE32F-NEXT: .LBB28_8: # %else14
1808 ; RV64ZVE32F-NEXT: ret
1809 ; RV64ZVE32F-NEXT: .LBB28_9: # %cond.store
1810 ; RV64ZVE32F-NEXT: ld a0, 0(a0)
1811 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1812 ; RV64ZVE32F-NEXT: vse32.v v8, (a0)
1813 ; RV64ZVE32F-NEXT: andi a0, a3, 2
1814 ; RV64ZVE32F-NEXT: beqz a0, .LBB28_2
1815 ; RV64ZVE32F-NEXT: .LBB28_10: # %cond.store1
1816 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1817 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
1818 ; RV64ZVE32F-NEXT: vse32.v v10, (t0)
1819 ; RV64ZVE32F-NEXT: andi a0, a3, 4
1820 ; RV64ZVE32F-NEXT: beqz a0, .LBB28_3
1821 ; RV64ZVE32F-NEXT: .LBB28_11: # %cond.store3
1822 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1823 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 2
1824 ; RV64ZVE32F-NEXT: vse32.v v10, (a7)
1825 ; RV64ZVE32F-NEXT: andi a0, a3, 8
1826 ; RV64ZVE32F-NEXT: beqz a0, .LBB28_4
1827 ; RV64ZVE32F-NEXT: .LBB28_12: # %cond.store5
1828 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1829 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
1830 ; RV64ZVE32F-NEXT: vse32.v v10, (a6)
1831 ; RV64ZVE32F-NEXT: andi a0, a3, 16
1832 ; RV64ZVE32F-NEXT: beqz a0, .LBB28_5
1833 ; RV64ZVE32F-NEXT: .LBB28_13: # %cond.store7
1834 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
1835 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
1836 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1837 ; RV64ZVE32F-NEXT: vse32.v v10, (a5)
1838 ; RV64ZVE32F-NEXT: andi a0, a3, 32
1839 ; RV64ZVE32F-NEXT: beqz a0, .LBB28_6
1840 ; RV64ZVE32F-NEXT: .LBB28_14: # %cond.store9
1841 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
1842 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 5
1843 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1844 ; RV64ZVE32F-NEXT: vse32.v v10, (a4)
1845 ; RV64ZVE32F-NEXT: andi a0, a3, 64
1846 ; RV64ZVE32F-NEXT: beqz a0, .LBB28_7
1847 ; RV64ZVE32F-NEXT: .LBB28_15: # %cond.store11
1848 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
1849 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 6
1850 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1851 ; RV64ZVE32F-NEXT: vse32.v v10, (a2)
1852 ; RV64ZVE32F-NEXT: andi a0, a3, -128
1853 ; RV64ZVE32F-NEXT: beqz a0, .LBB28_8
1854 ; RV64ZVE32F-NEXT: .LBB28_16: # %cond.store13
1855 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
1856 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
1857 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1858 ; RV64ZVE32F-NEXT: vse32.v v8, (a1)
1859 ; RV64ZVE32F-NEXT: ret
1860 call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %val, <8 x ptr> %ptrs, i32 4, <8 x i1> %m)
1864 define void @mscatter_baseidx_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) {
1865 ; RV32-LABEL: mscatter_baseidx_v8i8_v8i32:
1867 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1868 ; RV32-NEXT: vsext.vf4 v12, v10
1869 ; RV32-NEXT: vsll.vi v10, v12, 2
1870 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
1873 ; RV64-LABEL: mscatter_baseidx_v8i8_v8i32:
1875 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
1876 ; RV64-NEXT: vsext.vf8 v12, v10
1877 ; RV64-NEXT: vsll.vi v12, v12, 2
1878 ; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
1879 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
1882 ; RV64ZVE32F-LABEL: mscatter_baseidx_v8i8_v8i32:
1883 ; RV64ZVE32F: # %bb.0:
1884 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
1885 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
1886 ; RV64ZVE32F-NEXT: andi a2, a1, 1
1887 ; RV64ZVE32F-NEXT: beqz a2, .LBB29_2
1888 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
1889 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
1890 ; RV64ZVE32F-NEXT: slli a2, a2, 2
1891 ; RV64ZVE32F-NEXT: add a2, a0, a2
1892 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1893 ; RV64ZVE32F-NEXT: vse32.v v8, (a2)
1894 ; RV64ZVE32F-NEXT: .LBB29_2: # %else
1895 ; RV64ZVE32F-NEXT: andi a2, a1, 2
1896 ; RV64ZVE32F-NEXT: beqz a2, .LBB29_4
1897 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
1898 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1899 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1
1900 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
1901 ; RV64ZVE32F-NEXT: slli a2, a2, 2
1902 ; RV64ZVE32F-NEXT: add a2, a0, a2
1903 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
1904 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 1
1905 ; RV64ZVE32F-NEXT: vse32.v v11, (a2)
1906 ; RV64ZVE32F-NEXT: .LBB29_4: # %else2
1907 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
1908 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4
1909 ; RV64ZVE32F-NEXT: andi a2, a1, 4
1910 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
1911 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
1912 ; RV64ZVE32F-NEXT: bnez a2, .LBB29_12
1913 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
1914 ; RV64ZVE32F-NEXT: andi a2, a1, 8
1915 ; RV64ZVE32F-NEXT: bnez a2, .LBB29_13
1916 ; RV64ZVE32F-NEXT: .LBB29_6: # %else6
1917 ; RV64ZVE32F-NEXT: andi a2, a1, 16
1918 ; RV64ZVE32F-NEXT: bnez a2, .LBB29_14
1919 ; RV64ZVE32F-NEXT: .LBB29_7: # %else8
1920 ; RV64ZVE32F-NEXT: andi a2, a1, 32
1921 ; RV64ZVE32F-NEXT: beqz a2, .LBB29_9
1922 ; RV64ZVE32F-NEXT: .LBB29_8: # %cond.store9
1923 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1924 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 1
1925 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
1926 ; RV64ZVE32F-NEXT: slli a2, a2, 2
1927 ; RV64ZVE32F-NEXT: add a2, a0, a2
1928 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
1929 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5
1930 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1931 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
1932 ; RV64ZVE32F-NEXT: .LBB29_9: # %else10
1933 ; RV64ZVE32F-NEXT: andi a2, a1, 64
1934 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
1935 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2
1936 ; RV64ZVE32F-NEXT: bnez a2, .LBB29_15
1937 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
1938 ; RV64ZVE32F-NEXT: andi a1, a1, -128
1939 ; RV64ZVE32F-NEXT: bnez a1, .LBB29_16
1940 ; RV64ZVE32F-NEXT: .LBB29_11: # %else14
1941 ; RV64ZVE32F-NEXT: ret
1942 ; RV64ZVE32F-NEXT: .LBB29_12: # %cond.store3
1943 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
1944 ; RV64ZVE32F-NEXT: slli a2, a2, 2
1945 ; RV64ZVE32F-NEXT: add a2, a0, a2
1946 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
1947 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2
1948 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1949 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
1950 ; RV64ZVE32F-NEXT: andi a2, a1, 8
1951 ; RV64ZVE32F-NEXT: beqz a2, .LBB29_6
1952 ; RV64ZVE32F-NEXT: .LBB29_13: # %cond.store5
1953 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1954 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
1955 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
1956 ; RV64ZVE32F-NEXT: slli a2, a2, 2
1957 ; RV64ZVE32F-NEXT: add a2, a0, a2
1958 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
1959 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
1960 ; RV64ZVE32F-NEXT: vse32.v v10, (a2)
1961 ; RV64ZVE32F-NEXT: andi a2, a1, 16
1962 ; RV64ZVE32F-NEXT: beqz a2, .LBB29_7
1963 ; RV64ZVE32F-NEXT: .LBB29_14: # %cond.store7
1964 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
1965 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
1966 ; RV64ZVE32F-NEXT: slli a2, a2, 2
1967 ; RV64ZVE32F-NEXT: add a2, a0, a2
1968 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
1969 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
1970 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1971 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
1972 ; RV64ZVE32F-NEXT: andi a2, a1, 32
1973 ; RV64ZVE32F-NEXT: bnez a2, .LBB29_8
1974 ; RV64ZVE32F-NEXT: j .LBB29_9
1975 ; RV64ZVE32F-NEXT: .LBB29_15: # %cond.store11
1976 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
1977 ; RV64ZVE32F-NEXT: slli a2, a2, 2
1978 ; RV64ZVE32F-NEXT: add a2, a0, a2
1979 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
1980 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6
1981 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1982 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
1983 ; RV64ZVE32F-NEXT: andi a1, a1, -128
1984 ; RV64ZVE32F-NEXT: beqz a1, .LBB29_11
1985 ; RV64ZVE32F-NEXT: .LBB29_16: # %cond.store13
1986 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1987 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
1988 ; RV64ZVE32F-NEXT: vmv.x.s a1, v10
1989 ; RV64ZVE32F-NEXT: slli a1, a1, 2
1990 ; RV64ZVE32F-NEXT: add a0, a0, a1
1991 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
1992 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
1993 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1994 ; RV64ZVE32F-NEXT: vse32.v v8, (a0)
1995 ; RV64ZVE32F-NEXT: ret
1996 %ptrs = getelementptr inbounds i32, ptr %base, <8 x i8> %idxs
1997 call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %val, <8 x ptr> %ptrs, i32 4, <8 x i1> %m)
2001 define void @mscatter_baseidx_sext_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) {
2002 ; RV32-LABEL: mscatter_baseidx_sext_v8i8_v8i32:
2004 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
2005 ; RV32-NEXT: vsext.vf4 v12, v10
2006 ; RV32-NEXT: vsll.vi v10, v12, 2
2007 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
2010 ; RV64-LABEL: mscatter_baseidx_sext_v8i8_v8i32:
2012 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
2013 ; RV64-NEXT: vsext.vf8 v12, v10
2014 ; RV64-NEXT: vsll.vi v12, v12, 2
2015 ; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
2016 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
2019 ; RV64ZVE32F-LABEL: mscatter_baseidx_sext_v8i8_v8i32:
2020 ; RV64ZVE32F: # %bb.0:
2021 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
2022 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
2023 ; RV64ZVE32F-NEXT: andi a2, a1, 1
2024 ; RV64ZVE32F-NEXT: beqz a2, .LBB30_2
2025 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
2026 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2027 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2028 ; RV64ZVE32F-NEXT: add a2, a0, a2
2029 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2030 ; RV64ZVE32F-NEXT: vse32.v v8, (a2)
2031 ; RV64ZVE32F-NEXT: .LBB30_2: # %else
2032 ; RV64ZVE32F-NEXT: andi a2, a1, 2
2033 ; RV64ZVE32F-NEXT: beqz a2, .LBB30_4
2034 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
2035 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
2036 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1
2037 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
2038 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2039 ; RV64ZVE32F-NEXT: add a2, a0, a2
2040 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2041 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 1
2042 ; RV64ZVE32F-NEXT: vse32.v v11, (a2)
2043 ; RV64ZVE32F-NEXT: .LBB30_4: # %else2
2044 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
2045 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4
2046 ; RV64ZVE32F-NEXT: andi a2, a1, 4
2047 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
2048 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
2049 ; RV64ZVE32F-NEXT: bnez a2, .LBB30_12
2050 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
2051 ; RV64ZVE32F-NEXT: andi a2, a1, 8
2052 ; RV64ZVE32F-NEXT: bnez a2, .LBB30_13
2053 ; RV64ZVE32F-NEXT: .LBB30_6: # %else6
2054 ; RV64ZVE32F-NEXT: andi a2, a1, 16
2055 ; RV64ZVE32F-NEXT: bnez a2, .LBB30_14
2056 ; RV64ZVE32F-NEXT: .LBB30_7: # %else8
2057 ; RV64ZVE32F-NEXT: andi a2, a1, 32
2058 ; RV64ZVE32F-NEXT: beqz a2, .LBB30_9
2059 ; RV64ZVE32F-NEXT: .LBB30_8: # %cond.store9
2060 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
2061 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 1
2062 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2063 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2064 ; RV64ZVE32F-NEXT: add a2, a0, a2
2065 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
2066 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5
2067 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2068 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
2069 ; RV64ZVE32F-NEXT: .LBB30_9: # %else10
2070 ; RV64ZVE32F-NEXT: andi a2, a1, 64
2071 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
2072 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2
2073 ; RV64ZVE32F-NEXT: bnez a2, .LBB30_15
2074 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
2075 ; RV64ZVE32F-NEXT: andi a1, a1, -128
2076 ; RV64ZVE32F-NEXT: bnez a1, .LBB30_16
2077 ; RV64ZVE32F-NEXT: .LBB30_11: # %else14
2078 ; RV64ZVE32F-NEXT: ret
2079 ; RV64ZVE32F-NEXT: .LBB30_12: # %cond.store3
2080 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2081 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2082 ; RV64ZVE32F-NEXT: add a2, a0, a2
2083 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2084 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2
2085 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2086 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
2087 ; RV64ZVE32F-NEXT: andi a2, a1, 8
2088 ; RV64ZVE32F-NEXT: beqz a2, .LBB30_6
2089 ; RV64ZVE32F-NEXT: .LBB30_13: # %cond.store5
2090 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
2091 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
2092 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2093 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2094 ; RV64ZVE32F-NEXT: add a2, a0, a2
2095 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2096 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
2097 ; RV64ZVE32F-NEXT: vse32.v v10, (a2)
2098 ; RV64ZVE32F-NEXT: andi a2, a1, 16
2099 ; RV64ZVE32F-NEXT: beqz a2, .LBB30_7
2100 ; RV64ZVE32F-NEXT: .LBB30_14: # %cond.store7
2101 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
2102 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
2103 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2104 ; RV64ZVE32F-NEXT: add a2, a0, a2
2105 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
2106 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
2107 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2108 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
2109 ; RV64ZVE32F-NEXT: andi a2, a1, 32
2110 ; RV64ZVE32F-NEXT: bnez a2, .LBB30_8
2111 ; RV64ZVE32F-NEXT: j .LBB30_9
2112 ; RV64ZVE32F-NEXT: .LBB30_15: # %cond.store11
2113 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2114 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2115 ; RV64ZVE32F-NEXT: add a2, a0, a2
2116 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
2117 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6
2118 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2119 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
2120 ; RV64ZVE32F-NEXT: andi a1, a1, -128
2121 ; RV64ZVE32F-NEXT: beqz a1, .LBB30_11
2122 ; RV64ZVE32F-NEXT: .LBB30_16: # %cond.store13
2123 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
2124 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
2125 ; RV64ZVE32F-NEXT: vmv.x.s a1, v10
2126 ; RV64ZVE32F-NEXT: slli a1, a1, 2
2127 ; RV64ZVE32F-NEXT: add a0, a0, a1
2128 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
2129 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
2130 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2131 ; RV64ZVE32F-NEXT: vse32.v v8, (a0)
2132 ; RV64ZVE32F-NEXT: ret
2133 %eidxs = sext <8 x i8> %idxs to <8 x i32>
2134 %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %eidxs
2135 call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %val, <8 x ptr> %ptrs, i32 4, <8 x i1> %m)
2139 define void @mscatter_baseidx_zext_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) {
2140 ; RV32-LABEL: mscatter_baseidx_zext_v8i8_v8i32:
2142 ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
2143 ; RV32-NEXT: vzext.vf2 v11, v10
2144 ; RV32-NEXT: vsll.vi v10, v11, 2
2145 ; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
2146 ; RV32-NEXT: vsoxei16.v v8, (a0), v10, v0.t
2149 ; RV64-LABEL: mscatter_baseidx_zext_v8i8_v8i32:
2151 ; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
2152 ; RV64-NEXT: vzext.vf2 v11, v10
2153 ; RV64-NEXT: vsll.vi v10, v11, 2
2154 ; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
2155 ; RV64-NEXT: vsoxei16.v v8, (a0), v10, v0.t
2158 ; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i8_v8i32:
2159 ; RV64ZVE32F: # %bb.0:
2160 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
2161 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
2162 ; RV64ZVE32F-NEXT: andi a2, a1, 1
2163 ; RV64ZVE32F-NEXT: beqz a2, .LBB31_2
2164 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
2165 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2166 ; RV64ZVE32F-NEXT: andi a2, a2, 255
2167 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2168 ; RV64ZVE32F-NEXT: add a2, a0, a2
2169 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2170 ; RV64ZVE32F-NEXT: vse32.v v8, (a2)
2171 ; RV64ZVE32F-NEXT: .LBB31_2: # %else
2172 ; RV64ZVE32F-NEXT: andi a2, a1, 2
2173 ; RV64ZVE32F-NEXT: beqz a2, .LBB31_4
2174 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
2175 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
2176 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1
2177 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
2178 ; RV64ZVE32F-NEXT: andi a2, a2, 255
2179 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2180 ; RV64ZVE32F-NEXT: add a2, a0, a2
2181 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2182 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 1
2183 ; RV64ZVE32F-NEXT: vse32.v v11, (a2)
2184 ; RV64ZVE32F-NEXT: .LBB31_4: # %else2
2185 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
2186 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4
2187 ; RV64ZVE32F-NEXT: andi a2, a1, 4
2188 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
2189 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
2190 ; RV64ZVE32F-NEXT: bnez a2, .LBB31_12
2191 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
2192 ; RV64ZVE32F-NEXT: andi a2, a1, 8
2193 ; RV64ZVE32F-NEXT: bnez a2, .LBB31_13
2194 ; RV64ZVE32F-NEXT: .LBB31_6: # %else6
2195 ; RV64ZVE32F-NEXT: andi a2, a1, 16
2196 ; RV64ZVE32F-NEXT: bnez a2, .LBB31_14
2197 ; RV64ZVE32F-NEXT: .LBB31_7: # %else8
2198 ; RV64ZVE32F-NEXT: andi a2, a1, 32
2199 ; RV64ZVE32F-NEXT: beqz a2, .LBB31_9
2200 ; RV64ZVE32F-NEXT: .LBB31_8: # %cond.store9
2201 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
2202 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 1
2203 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2204 ; RV64ZVE32F-NEXT: andi a2, a2, 255
2205 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2206 ; RV64ZVE32F-NEXT: add a2, a0, a2
2207 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
2208 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5
2209 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2210 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
2211 ; RV64ZVE32F-NEXT: .LBB31_9: # %else10
2212 ; RV64ZVE32F-NEXT: andi a2, a1, 64
2213 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
2214 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2
2215 ; RV64ZVE32F-NEXT: bnez a2, .LBB31_15
2216 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
2217 ; RV64ZVE32F-NEXT: andi a1, a1, -128
2218 ; RV64ZVE32F-NEXT: bnez a1, .LBB31_16
2219 ; RV64ZVE32F-NEXT: .LBB31_11: # %else14
2220 ; RV64ZVE32F-NEXT: ret
2221 ; RV64ZVE32F-NEXT: .LBB31_12: # %cond.store3
2222 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2223 ; RV64ZVE32F-NEXT: andi a2, a2, 255
2224 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2225 ; RV64ZVE32F-NEXT: add a2, a0, a2
2226 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2227 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2
2228 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2229 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
2230 ; RV64ZVE32F-NEXT: andi a2, a1, 8
2231 ; RV64ZVE32F-NEXT: beqz a2, .LBB31_6
2232 ; RV64ZVE32F-NEXT: .LBB31_13: # %cond.store5
2233 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
2234 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
2235 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2236 ; RV64ZVE32F-NEXT: andi a2, a2, 255
2237 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2238 ; RV64ZVE32F-NEXT: add a2, a0, a2
2239 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2240 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
2241 ; RV64ZVE32F-NEXT: vse32.v v10, (a2)
2242 ; RV64ZVE32F-NEXT: andi a2, a1, 16
2243 ; RV64ZVE32F-NEXT: beqz a2, .LBB31_7
2244 ; RV64ZVE32F-NEXT: .LBB31_14: # %cond.store7
2245 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
2246 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
2247 ; RV64ZVE32F-NEXT: andi a2, a2, 255
2248 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2249 ; RV64ZVE32F-NEXT: add a2, a0, a2
2250 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
2251 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
2252 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2253 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
2254 ; RV64ZVE32F-NEXT: andi a2, a1, 32
2255 ; RV64ZVE32F-NEXT: bnez a2, .LBB31_8
2256 ; RV64ZVE32F-NEXT: j .LBB31_9
2257 ; RV64ZVE32F-NEXT: .LBB31_15: # %cond.store11
2258 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2259 ; RV64ZVE32F-NEXT: andi a2, a2, 255
2260 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2261 ; RV64ZVE32F-NEXT: add a2, a0, a2
2262 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
2263 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6
2264 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2265 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
2266 ; RV64ZVE32F-NEXT: andi a1, a1, -128
2267 ; RV64ZVE32F-NEXT: beqz a1, .LBB31_11
2268 ; RV64ZVE32F-NEXT: .LBB31_16: # %cond.store13
2269 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
2270 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
2271 ; RV64ZVE32F-NEXT: vmv.x.s a1, v10
2272 ; RV64ZVE32F-NEXT: andi a1, a1, 255
2273 ; RV64ZVE32F-NEXT: slli a1, a1, 2
2274 ; RV64ZVE32F-NEXT: add a0, a0, a1
2275 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
2276 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
2277 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2278 ; RV64ZVE32F-NEXT: vse32.v v8, (a0)
2279 ; RV64ZVE32F-NEXT: ret
2280 %eidxs = zext <8 x i8> %idxs to <8 x i32>
2281 %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %eidxs
2282 call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %val, <8 x ptr> %ptrs, i32 4, <8 x i1> %m)
2286 define void @mscatter_baseidx_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m) {
2287 ; RV32-LABEL: mscatter_baseidx_v8i16_v8i32:
2289 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
2290 ; RV32-NEXT: vsext.vf2 v12, v10
2291 ; RV32-NEXT: vsll.vi v10, v12, 2
2292 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
2295 ; RV64-LABEL: mscatter_baseidx_v8i16_v8i32:
2297 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
2298 ; RV64-NEXT: vsext.vf4 v12, v10
2299 ; RV64-NEXT: vsll.vi v12, v12, 2
2300 ; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
2301 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
2304 ; RV64ZVE32F-LABEL: mscatter_baseidx_v8i16_v8i32:
2305 ; RV64ZVE32F: # %bb.0:
2306 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
2307 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
2308 ; RV64ZVE32F-NEXT: andi a2, a1, 1
2309 ; RV64ZVE32F-NEXT: beqz a2, .LBB32_2
2310 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
2311 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
2312 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2313 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2314 ; RV64ZVE32F-NEXT: add a2, a0, a2
2315 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2316 ; RV64ZVE32F-NEXT: vse32.v v8, (a2)
2317 ; RV64ZVE32F-NEXT: .LBB32_2: # %else
2318 ; RV64ZVE32F-NEXT: andi a2, a1, 2
2319 ; RV64ZVE32F-NEXT: beqz a2, .LBB32_4
2320 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
2321 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
2322 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1
2323 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
2324 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2325 ; RV64ZVE32F-NEXT: add a2, a0, a2
2326 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2327 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 1
2328 ; RV64ZVE32F-NEXT: vse32.v v11, (a2)
2329 ; RV64ZVE32F-NEXT: .LBB32_4: # %else2
2330 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
2331 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4
2332 ; RV64ZVE32F-NEXT: andi a2, a1, 4
2333 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
2334 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
2335 ; RV64ZVE32F-NEXT: bnez a2, .LBB32_12
2336 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
2337 ; RV64ZVE32F-NEXT: andi a2, a1, 8
2338 ; RV64ZVE32F-NEXT: bnez a2, .LBB32_13
2339 ; RV64ZVE32F-NEXT: .LBB32_6: # %else6
2340 ; RV64ZVE32F-NEXT: andi a2, a1, 16
2341 ; RV64ZVE32F-NEXT: bnez a2, .LBB32_14
2342 ; RV64ZVE32F-NEXT: .LBB32_7: # %else8
2343 ; RV64ZVE32F-NEXT: andi a2, a1, 32
2344 ; RV64ZVE32F-NEXT: beqz a2, .LBB32_9
2345 ; RV64ZVE32F-NEXT: .LBB32_8: # %cond.store9
2346 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
2347 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 1
2348 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2349 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2350 ; RV64ZVE32F-NEXT: add a2, a0, a2
2351 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
2352 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5
2353 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2354 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
2355 ; RV64ZVE32F-NEXT: .LBB32_9: # %else10
2356 ; RV64ZVE32F-NEXT: andi a2, a1, 64
2357 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
2358 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2
2359 ; RV64ZVE32F-NEXT: bnez a2, .LBB32_15
2360 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
2361 ; RV64ZVE32F-NEXT: andi a1, a1, -128
2362 ; RV64ZVE32F-NEXT: bnez a1, .LBB32_16
2363 ; RV64ZVE32F-NEXT: .LBB32_11: # %else14
2364 ; RV64ZVE32F-NEXT: ret
2365 ; RV64ZVE32F-NEXT: .LBB32_12: # %cond.store3
2366 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2367 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2368 ; RV64ZVE32F-NEXT: add a2, a0, a2
2369 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2370 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2
2371 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2372 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
2373 ; RV64ZVE32F-NEXT: andi a2, a1, 8
2374 ; RV64ZVE32F-NEXT: beqz a2, .LBB32_6
2375 ; RV64ZVE32F-NEXT: .LBB32_13: # %cond.store5
2376 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
2377 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
2378 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2379 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2380 ; RV64ZVE32F-NEXT: add a2, a0, a2
2381 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2382 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
2383 ; RV64ZVE32F-NEXT: vse32.v v10, (a2)
2384 ; RV64ZVE32F-NEXT: andi a2, a1, 16
2385 ; RV64ZVE32F-NEXT: beqz a2, .LBB32_7
2386 ; RV64ZVE32F-NEXT: .LBB32_14: # %cond.store7
2387 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
2388 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
2389 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2390 ; RV64ZVE32F-NEXT: add a2, a0, a2
2391 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
2392 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
2393 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2394 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
2395 ; RV64ZVE32F-NEXT: andi a2, a1, 32
2396 ; RV64ZVE32F-NEXT: bnez a2, .LBB32_8
2397 ; RV64ZVE32F-NEXT: j .LBB32_9
2398 ; RV64ZVE32F-NEXT: .LBB32_15: # %cond.store11
2399 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2400 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2401 ; RV64ZVE32F-NEXT: add a2, a0, a2
2402 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
2403 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6
2404 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2405 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
2406 ; RV64ZVE32F-NEXT: andi a1, a1, -128
2407 ; RV64ZVE32F-NEXT: beqz a1, .LBB32_11
2408 ; RV64ZVE32F-NEXT: .LBB32_16: # %cond.store13
2409 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
2410 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
2411 ; RV64ZVE32F-NEXT: vmv.x.s a1, v10
2412 ; RV64ZVE32F-NEXT: slli a1, a1, 2
2413 ; RV64ZVE32F-NEXT: add a0, a0, a1
2414 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
2415 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
2416 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2417 ; RV64ZVE32F-NEXT: vse32.v v8, (a0)
2418 ; RV64ZVE32F-NEXT: ret
2419 %ptrs = getelementptr inbounds i32, ptr %base, <8 x i16> %idxs
2420 call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %val, <8 x ptr> %ptrs, i32 4, <8 x i1> %m)
2424 define void @mscatter_baseidx_sext_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m) {
2425 ; RV32-LABEL: mscatter_baseidx_sext_v8i16_v8i32:
2427 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
2428 ; RV32-NEXT: vsext.vf2 v12, v10
2429 ; RV32-NEXT: vsll.vi v10, v12, 2
2430 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
2433 ; RV64-LABEL: mscatter_baseidx_sext_v8i16_v8i32:
2435 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
2436 ; RV64-NEXT: vsext.vf4 v12, v10
2437 ; RV64-NEXT: vsll.vi v12, v12, 2
2438 ; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
2439 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
2442 ; RV64ZVE32F-LABEL: mscatter_baseidx_sext_v8i16_v8i32:
2443 ; RV64ZVE32F: # %bb.0:
2444 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
2445 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
2446 ; RV64ZVE32F-NEXT: andi a2, a1, 1
2447 ; RV64ZVE32F-NEXT: beqz a2, .LBB33_2
2448 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
2449 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
2450 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2451 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2452 ; RV64ZVE32F-NEXT: add a2, a0, a2
2453 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2454 ; RV64ZVE32F-NEXT: vse32.v v8, (a2)
2455 ; RV64ZVE32F-NEXT: .LBB33_2: # %else
2456 ; RV64ZVE32F-NEXT: andi a2, a1, 2
2457 ; RV64ZVE32F-NEXT: beqz a2, .LBB33_4
2458 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
2459 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
2460 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1
2461 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
2462 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2463 ; RV64ZVE32F-NEXT: add a2, a0, a2
2464 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2465 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 1
2466 ; RV64ZVE32F-NEXT: vse32.v v11, (a2)
2467 ; RV64ZVE32F-NEXT: .LBB33_4: # %else2
2468 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
2469 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4
2470 ; RV64ZVE32F-NEXT: andi a2, a1, 4
2471 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
2472 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
2473 ; RV64ZVE32F-NEXT: bnez a2, .LBB33_12
2474 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
2475 ; RV64ZVE32F-NEXT: andi a2, a1, 8
2476 ; RV64ZVE32F-NEXT: bnez a2, .LBB33_13
2477 ; RV64ZVE32F-NEXT: .LBB33_6: # %else6
2478 ; RV64ZVE32F-NEXT: andi a2, a1, 16
2479 ; RV64ZVE32F-NEXT: bnez a2, .LBB33_14
2480 ; RV64ZVE32F-NEXT: .LBB33_7: # %else8
2481 ; RV64ZVE32F-NEXT: andi a2, a1, 32
2482 ; RV64ZVE32F-NEXT: beqz a2, .LBB33_9
2483 ; RV64ZVE32F-NEXT: .LBB33_8: # %cond.store9
2484 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
2485 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 1
2486 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2487 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2488 ; RV64ZVE32F-NEXT: add a2, a0, a2
2489 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
2490 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5
2491 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2492 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
2493 ; RV64ZVE32F-NEXT: .LBB33_9: # %else10
2494 ; RV64ZVE32F-NEXT: andi a2, a1, 64
2495 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
2496 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2
2497 ; RV64ZVE32F-NEXT: bnez a2, .LBB33_15
2498 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
2499 ; RV64ZVE32F-NEXT: andi a1, a1, -128
2500 ; RV64ZVE32F-NEXT: bnez a1, .LBB33_16
2501 ; RV64ZVE32F-NEXT: .LBB33_11: # %else14
2502 ; RV64ZVE32F-NEXT: ret
2503 ; RV64ZVE32F-NEXT: .LBB33_12: # %cond.store3
2504 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2505 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2506 ; RV64ZVE32F-NEXT: add a2, a0, a2
2507 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2508 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2
2509 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2510 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
2511 ; RV64ZVE32F-NEXT: andi a2, a1, 8
2512 ; RV64ZVE32F-NEXT: beqz a2, .LBB33_6
2513 ; RV64ZVE32F-NEXT: .LBB33_13: # %cond.store5
2514 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
2515 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
2516 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2517 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2518 ; RV64ZVE32F-NEXT: add a2, a0, a2
2519 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2520 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
2521 ; RV64ZVE32F-NEXT: vse32.v v10, (a2)
2522 ; RV64ZVE32F-NEXT: andi a2, a1, 16
2523 ; RV64ZVE32F-NEXT: beqz a2, .LBB33_7
2524 ; RV64ZVE32F-NEXT: .LBB33_14: # %cond.store7
2525 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
2526 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
2527 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2528 ; RV64ZVE32F-NEXT: add a2, a0, a2
2529 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
2530 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
2531 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2532 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
2533 ; RV64ZVE32F-NEXT: andi a2, a1, 32
2534 ; RV64ZVE32F-NEXT: bnez a2, .LBB33_8
2535 ; RV64ZVE32F-NEXT: j .LBB33_9
2536 ; RV64ZVE32F-NEXT: .LBB33_15: # %cond.store11
2537 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2538 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2539 ; RV64ZVE32F-NEXT: add a2, a0, a2
2540 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
2541 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6
2542 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2543 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
2544 ; RV64ZVE32F-NEXT: andi a1, a1, -128
2545 ; RV64ZVE32F-NEXT: beqz a1, .LBB33_11
2546 ; RV64ZVE32F-NEXT: .LBB33_16: # %cond.store13
2547 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
2548 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
2549 ; RV64ZVE32F-NEXT: vmv.x.s a1, v10
2550 ; RV64ZVE32F-NEXT: slli a1, a1, 2
2551 ; RV64ZVE32F-NEXT: add a0, a0, a1
2552 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
2553 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
2554 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2555 ; RV64ZVE32F-NEXT: vse32.v v8, (a0)
2556 ; RV64ZVE32F-NEXT: ret
2557 %eidxs = sext <8 x i16> %idxs to <8 x i32>
2558 %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %eidxs
2559 call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %val, <8 x ptr> %ptrs, i32 4, <8 x i1> %m)
2563 define void @mscatter_baseidx_zext_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m) {
2564 ; RV32-LABEL: mscatter_baseidx_zext_v8i16_v8i32:
2566 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
2567 ; RV32-NEXT: vzext.vf2 v12, v10
2568 ; RV32-NEXT: vsll.vi v10, v12, 2
2569 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
2572 ; RV64-LABEL: mscatter_baseidx_zext_v8i16_v8i32:
2574 ; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma
2575 ; RV64-NEXT: vzext.vf2 v12, v10
2576 ; RV64-NEXT: vsll.vi v10, v12, 2
2577 ; RV64-NEXT: vsoxei32.v v8, (a0), v10, v0.t
2580 ; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i16_v8i32:
2581 ; RV64ZVE32F: # %bb.0:
2582 ; RV64ZVE32F-NEXT: lui a1, 16
2583 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
2584 ; RV64ZVE32F-NEXT: vmv.x.s a2, v0
2585 ; RV64ZVE32F-NEXT: andi a3, a2, 1
2586 ; RV64ZVE32F-NEXT: addiw a1, a1, -1
2587 ; RV64ZVE32F-NEXT: beqz a3, .LBB34_2
2588 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
2589 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
2590 ; RV64ZVE32F-NEXT: vmv.x.s a3, v10
2591 ; RV64ZVE32F-NEXT: and a3, a3, a1
2592 ; RV64ZVE32F-NEXT: slli a3, a3, 2
2593 ; RV64ZVE32F-NEXT: add a3, a0, a3
2594 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2595 ; RV64ZVE32F-NEXT: vse32.v v8, (a3)
2596 ; RV64ZVE32F-NEXT: .LBB34_2: # %else
2597 ; RV64ZVE32F-NEXT: andi a3, a2, 2
2598 ; RV64ZVE32F-NEXT: beqz a3, .LBB34_4
2599 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
2600 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
2601 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1
2602 ; RV64ZVE32F-NEXT: vmv.x.s a3, v11
2603 ; RV64ZVE32F-NEXT: and a3, a3, a1
2604 ; RV64ZVE32F-NEXT: slli a3, a3, 2
2605 ; RV64ZVE32F-NEXT: add a3, a0, a3
2606 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2607 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 1
2608 ; RV64ZVE32F-NEXT: vse32.v v11, (a3)
2609 ; RV64ZVE32F-NEXT: .LBB34_4: # %else2
2610 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
2611 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4
2612 ; RV64ZVE32F-NEXT: andi a3, a2, 4
2613 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
2614 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
2615 ; RV64ZVE32F-NEXT: bnez a3, .LBB34_12
2616 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
2617 ; RV64ZVE32F-NEXT: andi a3, a2, 8
2618 ; RV64ZVE32F-NEXT: bnez a3, .LBB34_13
2619 ; RV64ZVE32F-NEXT: .LBB34_6: # %else6
2620 ; RV64ZVE32F-NEXT: andi a3, a2, 16
2621 ; RV64ZVE32F-NEXT: bnez a3, .LBB34_14
2622 ; RV64ZVE32F-NEXT: .LBB34_7: # %else8
2623 ; RV64ZVE32F-NEXT: andi a3, a2, 32
2624 ; RV64ZVE32F-NEXT: beqz a3, .LBB34_9
2625 ; RV64ZVE32F-NEXT: .LBB34_8: # %cond.store9
2626 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
2627 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 1
2628 ; RV64ZVE32F-NEXT: vmv.x.s a3, v10
2629 ; RV64ZVE32F-NEXT: and a3, a3, a1
2630 ; RV64ZVE32F-NEXT: slli a3, a3, 2
2631 ; RV64ZVE32F-NEXT: add a3, a0, a3
2632 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
2633 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5
2634 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2635 ; RV64ZVE32F-NEXT: vse32.v v12, (a3)
2636 ; RV64ZVE32F-NEXT: .LBB34_9: # %else10
2637 ; RV64ZVE32F-NEXT: andi a3, a2, 64
2638 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
2639 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2
2640 ; RV64ZVE32F-NEXT: bnez a3, .LBB34_15
2641 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
2642 ; RV64ZVE32F-NEXT: andi a2, a2, -128
2643 ; RV64ZVE32F-NEXT: bnez a2, .LBB34_16
2644 ; RV64ZVE32F-NEXT: .LBB34_11: # %else14
2645 ; RV64ZVE32F-NEXT: ret
2646 ; RV64ZVE32F-NEXT: .LBB34_12: # %cond.store3
2647 ; RV64ZVE32F-NEXT: vmv.x.s a3, v10
2648 ; RV64ZVE32F-NEXT: and a3, a3, a1
2649 ; RV64ZVE32F-NEXT: slli a3, a3, 2
2650 ; RV64ZVE32F-NEXT: add a3, a0, a3
2651 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2652 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2
2653 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2654 ; RV64ZVE32F-NEXT: vse32.v v12, (a3)
2655 ; RV64ZVE32F-NEXT: andi a3, a2, 8
2656 ; RV64ZVE32F-NEXT: beqz a3, .LBB34_6
2657 ; RV64ZVE32F-NEXT: .LBB34_13: # %cond.store5
2658 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
2659 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
2660 ; RV64ZVE32F-NEXT: vmv.x.s a3, v10
2661 ; RV64ZVE32F-NEXT: and a3, a3, a1
2662 ; RV64ZVE32F-NEXT: slli a3, a3, 2
2663 ; RV64ZVE32F-NEXT: add a3, a0, a3
2664 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2665 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
2666 ; RV64ZVE32F-NEXT: vse32.v v10, (a3)
2667 ; RV64ZVE32F-NEXT: andi a3, a2, 16
2668 ; RV64ZVE32F-NEXT: beqz a3, .LBB34_7
2669 ; RV64ZVE32F-NEXT: .LBB34_14: # %cond.store7
2670 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
2671 ; RV64ZVE32F-NEXT: vmv.x.s a3, v11
2672 ; RV64ZVE32F-NEXT: and a3, a3, a1
2673 ; RV64ZVE32F-NEXT: slli a3, a3, 2
2674 ; RV64ZVE32F-NEXT: add a3, a0, a3
2675 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
2676 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
2677 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2678 ; RV64ZVE32F-NEXT: vse32.v v12, (a3)
2679 ; RV64ZVE32F-NEXT: andi a3, a2, 32
2680 ; RV64ZVE32F-NEXT: bnez a3, .LBB34_8
2681 ; RV64ZVE32F-NEXT: j .LBB34_9
2682 ; RV64ZVE32F-NEXT: .LBB34_15: # %cond.store11
2683 ; RV64ZVE32F-NEXT: vmv.x.s a3, v10
2684 ; RV64ZVE32F-NEXT: and a3, a3, a1
2685 ; RV64ZVE32F-NEXT: slli a3, a3, 2
2686 ; RV64ZVE32F-NEXT: add a3, a0, a3
2687 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
2688 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6
2689 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2690 ; RV64ZVE32F-NEXT: vse32.v v12, (a3)
2691 ; RV64ZVE32F-NEXT: andi a2, a2, -128
2692 ; RV64ZVE32F-NEXT: beqz a2, .LBB34_11
2693 ; RV64ZVE32F-NEXT: .LBB34_16: # %cond.store13
2694 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
2695 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
2696 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2697 ; RV64ZVE32F-NEXT: and a1, a2, a1
2698 ; RV64ZVE32F-NEXT: slli a1, a1, 2
2699 ; RV64ZVE32F-NEXT: add a0, a0, a1
2700 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
2701 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
2702 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2703 ; RV64ZVE32F-NEXT: vse32.v v8, (a0)
2704 ; RV64ZVE32F-NEXT: ret
2705 %eidxs = zext <8 x i16> %idxs to <8 x i32>
2706 %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %eidxs
2707 call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %val, <8 x ptr> %ptrs, i32 4, <8 x i1> %m)
2711 define void @mscatter_baseidx_v8i32(<8 x i32> %val, ptr %base, <8 x i32> %idxs, <8 x i1> %m) {
2712 ; RV32-LABEL: mscatter_baseidx_v8i32:
2714 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
2715 ; RV32-NEXT: vsll.vi v10, v10, 2
2716 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
2719 ; RV64-LABEL: mscatter_baseidx_v8i32:
2721 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
2722 ; RV64-NEXT: vsext.vf2 v12, v10
2723 ; RV64-NEXT: vsll.vi v12, v12, 2
2724 ; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
2725 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
2728 ; RV64ZVE32F-LABEL: mscatter_baseidx_v8i32:
2729 ; RV64ZVE32F: # %bb.0:
2730 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
2731 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
2732 ; RV64ZVE32F-NEXT: andi a2, a1, 1
2733 ; RV64ZVE32F-NEXT: beqz a2, .LBB35_2
2734 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
2735 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2736 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2737 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2738 ; RV64ZVE32F-NEXT: add a2, a0, a2
2739 ; RV64ZVE32F-NEXT: vse32.v v8, (a2)
2740 ; RV64ZVE32F-NEXT: .LBB35_2: # %else
2741 ; RV64ZVE32F-NEXT: andi a2, a1, 2
2742 ; RV64ZVE32F-NEXT: beqz a2, .LBB35_4
2743 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
2744 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2745 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v10, 1
2746 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12
2747 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2748 ; RV64ZVE32F-NEXT: add a2, a0, a2
2749 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 1
2750 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
2751 ; RV64ZVE32F-NEXT: .LBB35_4: # %else2
2752 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
2753 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v10, 4
2754 ; RV64ZVE32F-NEXT: andi a2, a1, 4
2755 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
2756 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
2757 ; RV64ZVE32F-NEXT: bnez a2, .LBB35_12
2758 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
2759 ; RV64ZVE32F-NEXT: andi a2, a1, 8
2760 ; RV64ZVE32F-NEXT: bnez a2, .LBB35_13
2761 ; RV64ZVE32F-NEXT: .LBB35_6: # %else6
2762 ; RV64ZVE32F-NEXT: andi a2, a1, 16
2763 ; RV64ZVE32F-NEXT: bnez a2, .LBB35_14
2764 ; RV64ZVE32F-NEXT: .LBB35_7: # %else8
2765 ; RV64ZVE32F-NEXT: andi a2, a1, 32
2766 ; RV64ZVE32F-NEXT: beqz a2, .LBB35_9
2767 ; RV64ZVE32F-NEXT: .LBB35_8: # %cond.store9
2768 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2769 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v12, 1
2770 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2771 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2772 ; RV64ZVE32F-NEXT: add a2, a0, a2
2773 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
2774 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 5
2775 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2776 ; RV64ZVE32F-NEXT: vse32.v v10, (a2)
2777 ; RV64ZVE32F-NEXT: .LBB35_9: # %else10
2778 ; RV64ZVE32F-NEXT: andi a2, a1, 64
2779 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
2780 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v12, 2
2781 ; RV64ZVE32F-NEXT: bnez a2, .LBB35_15
2782 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
2783 ; RV64ZVE32F-NEXT: andi a1, a1, -128
2784 ; RV64ZVE32F-NEXT: bnez a1, .LBB35_16
2785 ; RV64ZVE32F-NEXT: .LBB35_11: # %else14
2786 ; RV64ZVE32F-NEXT: ret
2787 ; RV64ZVE32F-NEXT: .LBB35_12: # %cond.store3
2788 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2789 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2790 ; RV64ZVE32F-NEXT: add a2, a0, a2
2791 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2
2792 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2793 ; RV64ZVE32F-NEXT: vse32.v v11, (a2)
2794 ; RV64ZVE32F-NEXT: andi a2, a1, 8
2795 ; RV64ZVE32F-NEXT: beqz a2, .LBB35_6
2796 ; RV64ZVE32F-NEXT: .LBB35_13: # %cond.store5
2797 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2798 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
2799 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2800 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2801 ; RV64ZVE32F-NEXT: add a2, a0, a2
2802 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
2803 ; RV64ZVE32F-NEXT: vse32.v v10, (a2)
2804 ; RV64ZVE32F-NEXT: andi a2, a1, 16
2805 ; RV64ZVE32F-NEXT: beqz a2, .LBB35_7
2806 ; RV64ZVE32F-NEXT: .LBB35_14: # %cond.store7
2807 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
2808 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12
2809 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2810 ; RV64ZVE32F-NEXT: add a2, a0, a2
2811 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
2812 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2813 ; RV64ZVE32F-NEXT: vse32.v v10, (a2)
2814 ; RV64ZVE32F-NEXT: andi a2, a1, 32
2815 ; RV64ZVE32F-NEXT: bnez a2, .LBB35_8
2816 ; RV64ZVE32F-NEXT: j .LBB35_9
2817 ; RV64ZVE32F-NEXT: .LBB35_15: # %cond.store11
2818 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2819 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2820 ; RV64ZVE32F-NEXT: add a2, a0, a2
2821 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
2822 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6
2823 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2824 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
2825 ; RV64ZVE32F-NEXT: andi a1, a1, -128
2826 ; RV64ZVE32F-NEXT: beqz a1, .LBB35_11
2827 ; RV64ZVE32F-NEXT: .LBB35_16: # %cond.store13
2828 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2829 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
2830 ; RV64ZVE32F-NEXT: vmv.x.s a1, v10
2831 ; RV64ZVE32F-NEXT: slli a1, a1, 2
2832 ; RV64ZVE32F-NEXT: add a0, a0, a1
2833 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
2834 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
2835 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2836 ; RV64ZVE32F-NEXT: vse32.v v8, (a0)
2837 ; RV64ZVE32F-NEXT: ret
2838 %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %idxs
2839 call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %val, <8 x ptr> %ptrs, i32 4, <8 x i1> %m)
2843 declare void @llvm.masked.scatter.v1i64.v1p0(<1 x i64>, <1 x ptr>, i32, <1 x i1>)
2845 define void @mscatter_v1i64(<1 x i64> %val, <1 x ptr> %ptrs, <1 x i1> %m) {
2846 ; RV32V-LABEL: mscatter_v1i64:
2848 ; RV32V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
2849 ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t
2852 ; RV64-LABEL: mscatter_v1i64:
2854 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
2855 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
2858 ; RV32ZVE32F-LABEL: mscatter_v1i64:
2859 ; RV32ZVE32F: # %bb.0:
2860 ; RV32ZVE32F-NEXT: vsetvli a2, zero, e8, mf4, ta, ma
2861 ; RV32ZVE32F-NEXT: vfirst.m a2, v0
2862 ; RV32ZVE32F-NEXT: bnez a2, .LBB36_2
2863 ; RV32ZVE32F-NEXT: # %bb.1: # %cond.store
2864 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2865 ; RV32ZVE32F-NEXT: vmv.x.s a2, v8
2866 ; RV32ZVE32F-NEXT: sw a1, 4(a2)
2867 ; RV32ZVE32F-NEXT: sw a0, 0(a2)
2868 ; RV32ZVE32F-NEXT: .LBB36_2: # %else
2869 ; RV32ZVE32F-NEXT: ret
2871 ; RV64ZVE32F-LABEL: mscatter_v1i64:
2872 ; RV64ZVE32F: # %bb.0:
2873 ; RV64ZVE32F-NEXT: vsetvli a2, zero, e8, mf4, ta, ma
2874 ; RV64ZVE32F-NEXT: vfirst.m a2, v0
2875 ; RV64ZVE32F-NEXT: bnez a2, .LBB36_2
2876 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
2877 ; RV64ZVE32F-NEXT: sd a0, 0(a1)
2878 ; RV64ZVE32F-NEXT: .LBB36_2: # %else
2879 ; RV64ZVE32F-NEXT: ret
2880 call void @llvm.masked.scatter.v1i64.v1p0(<1 x i64> %val, <1 x ptr> %ptrs, i32 8, <1 x i1> %m)
2884 declare void @llvm.masked.scatter.v2i64.v2p0(<2 x i64>, <2 x ptr>, i32, <2 x i1>)
2886 define void @mscatter_v2i64(<2 x i64> %val, <2 x ptr> %ptrs, <2 x i1> %m) {
2887 ; RV32V-LABEL: mscatter_v2i64:
2889 ; RV32V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
2890 ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t
2893 ; RV64-LABEL: mscatter_v2i64:
2895 ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
2896 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
2899 ; RV32ZVE32F-LABEL: mscatter_v2i64:
2900 ; RV32ZVE32F: # %bb.0:
2901 ; RV32ZVE32F-NEXT: lw a2, 12(a0)
2902 ; RV32ZVE32F-NEXT: lw a1, 8(a0)
2903 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
2904 ; RV32ZVE32F-NEXT: vmv.x.s a3, v0
2905 ; RV32ZVE32F-NEXT: andi a4, a3, 1
2906 ; RV32ZVE32F-NEXT: bnez a4, .LBB37_3
2907 ; RV32ZVE32F-NEXT: # %bb.1: # %else
2908 ; RV32ZVE32F-NEXT: andi a3, a3, 2
2909 ; RV32ZVE32F-NEXT: bnez a3, .LBB37_4
2910 ; RV32ZVE32F-NEXT: .LBB37_2: # %else2
2911 ; RV32ZVE32F-NEXT: ret
2912 ; RV32ZVE32F-NEXT: .LBB37_3: # %cond.store
2913 ; RV32ZVE32F-NEXT: lw a4, 4(a0)
2914 ; RV32ZVE32F-NEXT: lw a0, 0(a0)
2915 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
2916 ; RV32ZVE32F-NEXT: vmv.x.s a5, v8
2917 ; RV32ZVE32F-NEXT: sw a4, 4(a5)
2918 ; RV32ZVE32F-NEXT: sw a0, 0(a5)
2919 ; RV32ZVE32F-NEXT: andi a3, a3, 2
2920 ; RV32ZVE32F-NEXT: beqz a3, .LBB37_2
2921 ; RV32ZVE32F-NEXT: .LBB37_4: # %cond.store1
2922 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2923 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 1
2924 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
2925 ; RV32ZVE32F-NEXT: sw a2, 4(a0)
2926 ; RV32ZVE32F-NEXT: sw a1, 0(a0)
2927 ; RV32ZVE32F-NEXT: ret
2929 ; RV64ZVE32F-LABEL: mscatter_v2i64:
2930 ; RV64ZVE32F: # %bb.0:
2931 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
2932 ; RV64ZVE32F-NEXT: vmv.x.s a4, v0
2933 ; RV64ZVE32F-NEXT: andi a5, a4, 1
2934 ; RV64ZVE32F-NEXT: bnez a5, .LBB37_3
2935 ; RV64ZVE32F-NEXT: # %bb.1: # %else
2936 ; RV64ZVE32F-NEXT: andi a4, a4, 2
2937 ; RV64ZVE32F-NEXT: bnez a4, .LBB37_4
2938 ; RV64ZVE32F-NEXT: .LBB37_2: # %else2
2939 ; RV64ZVE32F-NEXT: ret
2940 ; RV64ZVE32F-NEXT: .LBB37_3: # %cond.store
2941 ; RV64ZVE32F-NEXT: sd a0, 0(a2)
2942 ; RV64ZVE32F-NEXT: andi a4, a4, 2
2943 ; RV64ZVE32F-NEXT: beqz a4, .LBB37_2
2944 ; RV64ZVE32F-NEXT: .LBB37_4: # %cond.store1
2945 ; RV64ZVE32F-NEXT: sd a1, 0(a3)
2946 ; RV64ZVE32F-NEXT: ret
2947 call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> %val, <2 x ptr> %ptrs, i32 8, <2 x i1> %m)
2951 declare void @llvm.masked.scatter.v4i64.v4p0(<4 x i64>, <4 x ptr>, i32, <4 x i1>)
2953 define void @mscatter_v4i64(<4 x i64> %val, <4 x ptr> %ptrs, <4 x i1> %m) {
2954 ; RV32V-LABEL: mscatter_v4i64:
2956 ; RV32V-NEXT: vsetivli zero, 4, e64, m2, ta, ma
2957 ; RV32V-NEXT: vsoxei32.v v8, (zero), v10, v0.t
2960 ; RV64-LABEL: mscatter_v4i64:
2962 ; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
2963 ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
2966 ; RV32ZVE32F-LABEL: mscatter_v4i64:
2967 ; RV32ZVE32F: # %bb.0:
2968 ; RV32ZVE32F-NEXT: lw a1, 28(a0)
2969 ; RV32ZVE32F-NEXT: lw a2, 24(a0)
2970 ; RV32ZVE32F-NEXT: lw a3, 20(a0)
2971 ; RV32ZVE32F-NEXT: lw a4, 16(a0)
2972 ; RV32ZVE32F-NEXT: lw a7, 12(a0)
2973 ; RV32ZVE32F-NEXT: lw a6, 8(a0)
2974 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
2975 ; RV32ZVE32F-NEXT: vmv.x.s a5, v0
2976 ; RV32ZVE32F-NEXT: andi t0, a5, 1
2977 ; RV32ZVE32F-NEXT: bnez t0, .LBB38_5
2978 ; RV32ZVE32F-NEXT: # %bb.1: # %else
2979 ; RV32ZVE32F-NEXT: andi a0, a5, 2
2980 ; RV32ZVE32F-NEXT: bnez a0, .LBB38_6
2981 ; RV32ZVE32F-NEXT: .LBB38_2: # %else2
2982 ; RV32ZVE32F-NEXT: andi a0, a5, 4
2983 ; RV32ZVE32F-NEXT: bnez a0, .LBB38_7
2984 ; RV32ZVE32F-NEXT: .LBB38_3: # %else4
2985 ; RV32ZVE32F-NEXT: andi a5, a5, 8
2986 ; RV32ZVE32F-NEXT: bnez a5, .LBB38_8
2987 ; RV32ZVE32F-NEXT: .LBB38_4: # %else6
2988 ; RV32ZVE32F-NEXT: ret
2989 ; RV32ZVE32F-NEXT: .LBB38_5: # %cond.store
2990 ; RV32ZVE32F-NEXT: lw t0, 4(a0)
2991 ; RV32ZVE32F-NEXT: lw a0, 0(a0)
2992 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
2993 ; RV32ZVE32F-NEXT: vmv.x.s t1, v8
2994 ; RV32ZVE32F-NEXT: sw t0, 4(t1)
2995 ; RV32ZVE32F-NEXT: sw a0, 0(t1)
2996 ; RV32ZVE32F-NEXT: andi a0, a5, 2
2997 ; RV32ZVE32F-NEXT: beqz a0, .LBB38_2
2998 ; RV32ZVE32F-NEXT: .LBB38_6: # %cond.store1
2999 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
3000 ; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 1
3001 ; RV32ZVE32F-NEXT: vmv.x.s a0, v9
3002 ; RV32ZVE32F-NEXT: sw a7, 4(a0)
3003 ; RV32ZVE32F-NEXT: sw a6, 0(a0)
3004 ; RV32ZVE32F-NEXT: andi a0, a5, 4
3005 ; RV32ZVE32F-NEXT: beqz a0, .LBB38_3
3006 ; RV32ZVE32F-NEXT: .LBB38_7: # %cond.store3
3007 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
3008 ; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 2
3009 ; RV32ZVE32F-NEXT: vmv.x.s a0, v9
3010 ; RV32ZVE32F-NEXT: sw a4, 0(a0)
3011 ; RV32ZVE32F-NEXT: sw a3, 4(a0)
3012 ; RV32ZVE32F-NEXT: andi a5, a5, 8
3013 ; RV32ZVE32F-NEXT: beqz a5, .LBB38_4
3014 ; RV32ZVE32F-NEXT: .LBB38_8: # %cond.store5
3015 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
3016 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 3
3017 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
3018 ; RV32ZVE32F-NEXT: sw a2, 0(a0)
3019 ; RV32ZVE32F-NEXT: sw a1, 4(a0)
3020 ; RV32ZVE32F-NEXT: ret
3022 ; RV64ZVE32F-LABEL: mscatter_v4i64:
3023 ; RV64ZVE32F: # %bb.0:
3024 ; RV64ZVE32F-NEXT: ld a2, 24(a1)
3025 ; RV64ZVE32F-NEXT: ld a4, 16(a1)
3026 ; RV64ZVE32F-NEXT: ld a7, 8(a1)
3027 ; RV64ZVE32F-NEXT: ld a3, 24(a0)
3028 ; RV64ZVE32F-NEXT: ld a5, 16(a0)
3029 ; RV64ZVE32F-NEXT: ld t0, 8(a0)
3030 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
3031 ; RV64ZVE32F-NEXT: vmv.x.s a6, v0
3032 ; RV64ZVE32F-NEXT: andi t1, a6, 1
3033 ; RV64ZVE32F-NEXT: bnez t1, .LBB38_5
3034 ; RV64ZVE32F-NEXT: # %bb.1: # %else
3035 ; RV64ZVE32F-NEXT: andi a0, a6, 2
3036 ; RV64ZVE32F-NEXT: bnez a0, .LBB38_6
3037 ; RV64ZVE32F-NEXT: .LBB38_2: # %else2
3038 ; RV64ZVE32F-NEXT: andi a0, a6, 4
3039 ; RV64ZVE32F-NEXT: bnez a0, .LBB38_7
3040 ; RV64ZVE32F-NEXT: .LBB38_3: # %else4
3041 ; RV64ZVE32F-NEXT: andi a0, a6, 8
3042 ; RV64ZVE32F-NEXT: bnez a0, .LBB38_8
3043 ; RV64ZVE32F-NEXT: .LBB38_4: # %else6
3044 ; RV64ZVE32F-NEXT: ret
3045 ; RV64ZVE32F-NEXT: .LBB38_5: # %cond.store
3046 ; RV64ZVE32F-NEXT: ld a1, 0(a1)
3047 ; RV64ZVE32F-NEXT: ld a0, 0(a0)
3048 ; RV64ZVE32F-NEXT: sd a0, 0(a1)
3049 ; RV64ZVE32F-NEXT: andi a0, a6, 2
3050 ; RV64ZVE32F-NEXT: beqz a0, .LBB38_2
3051 ; RV64ZVE32F-NEXT: .LBB38_6: # %cond.store1
3052 ; RV64ZVE32F-NEXT: sd t0, 0(a7)
3053 ; RV64ZVE32F-NEXT: andi a0, a6, 4
3054 ; RV64ZVE32F-NEXT: beqz a0, .LBB38_3
3055 ; RV64ZVE32F-NEXT: .LBB38_7: # %cond.store3
3056 ; RV64ZVE32F-NEXT: sd a5, 0(a4)
3057 ; RV64ZVE32F-NEXT: andi a0, a6, 8
3058 ; RV64ZVE32F-NEXT: beqz a0, .LBB38_4
3059 ; RV64ZVE32F-NEXT: .LBB38_8: # %cond.store5
3060 ; RV64ZVE32F-NEXT: sd a3, 0(a2)
3061 ; RV64ZVE32F-NEXT: ret
3062 call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> %val, <4 x ptr> %ptrs, i32 8, <4 x i1> %m)
3066 define void @mscatter_truemask_v4i64(<4 x i64> %val, <4 x ptr> %ptrs) {
3067 ; RV32V-LABEL: mscatter_truemask_v4i64:
3069 ; RV32V-NEXT: vsetivli zero, 4, e64, m2, ta, ma
3070 ; RV32V-NEXT: vsoxei32.v v8, (zero), v10
3073 ; RV64-LABEL: mscatter_truemask_v4i64:
3075 ; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
3076 ; RV64-NEXT: vsoxei64.v v8, (zero), v10
3079 ; RV32ZVE32F-LABEL: mscatter_truemask_v4i64:
3080 ; RV32ZVE32F: # %bb.0:
3081 ; RV32ZVE32F-NEXT: lw a1, 28(a0)
3082 ; RV32ZVE32F-NEXT: lw a2, 24(a0)
3083 ; RV32ZVE32F-NEXT: lw a3, 20(a0)
3084 ; RV32ZVE32F-NEXT: lw a4, 16(a0)
3085 ; RV32ZVE32F-NEXT: lw a5, 12(a0)
3086 ; RV32ZVE32F-NEXT: lw a6, 0(a0)
3087 ; RV32ZVE32F-NEXT: lw a7, 4(a0)
3088 ; RV32ZVE32F-NEXT: lw a0, 8(a0)
3089 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
3090 ; RV32ZVE32F-NEXT: vmv.x.s t0, v8
3091 ; RV32ZVE32F-NEXT: sw a6, 0(t0)
3092 ; RV32ZVE32F-NEXT: sw a7, 4(t0)
3093 ; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 1
3094 ; RV32ZVE32F-NEXT: vmv.x.s a6, v9
3095 ; RV32ZVE32F-NEXT: sw a0, 0(a6)
3096 ; RV32ZVE32F-NEXT: sw a5, 4(a6)
3097 ; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 2
3098 ; RV32ZVE32F-NEXT: vmv.x.s a0, v9
3099 ; RV32ZVE32F-NEXT: sw a4, 0(a0)
3100 ; RV32ZVE32F-NEXT: sw a3, 4(a0)
3101 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 3
3102 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
3103 ; RV32ZVE32F-NEXT: sw a2, 0(a0)
3104 ; RV32ZVE32F-NEXT: sw a1, 4(a0)
3105 ; RV32ZVE32F-NEXT: ret
3107 ; RV64ZVE32F-LABEL: mscatter_truemask_v4i64:
3108 ; RV64ZVE32F: # %bb.0:
3109 ; RV64ZVE32F-NEXT: ld a2, 24(a1)
3110 ; RV64ZVE32F-NEXT: ld a3, 16(a1)
3111 ; RV64ZVE32F-NEXT: ld a4, 8(a1)
3112 ; RV64ZVE32F-NEXT: ld a1, 0(a1)
3113 ; RV64ZVE32F-NEXT: ld a5, 0(a0)
3114 ; RV64ZVE32F-NEXT: ld a6, 8(a0)
3115 ; RV64ZVE32F-NEXT: ld a7, 16(a0)
3116 ; RV64ZVE32F-NEXT: ld a0, 24(a0)
3117 ; RV64ZVE32F-NEXT: sd a5, 0(a1)
3118 ; RV64ZVE32F-NEXT: sd a6, 0(a4)
3119 ; RV64ZVE32F-NEXT: sd a7, 0(a3)
3120 ; RV64ZVE32F-NEXT: sd a0, 0(a2)
3121 ; RV64ZVE32F-NEXT: ret
3122 call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> %val, <4 x ptr> %ptrs, i32 8, <4 x i1> splat (i1 1))
3126 define void @mscatter_falsemask_v4i64(<4 x i64> %val, <4 x ptr> %ptrs) {
3127 ; CHECK-LABEL: mscatter_falsemask_v4i64:
3130 call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> %val, <4 x ptr> %ptrs, i32 8, <4 x i1> zeroinitializer)
3134 declare void @llvm.masked.scatter.v8i64.v8p0(<8 x i64>, <8 x ptr>, i32, <8 x i1>)
3136 define void @mscatter_v8i64(<8 x i64> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
3137 ; RV32V-LABEL: mscatter_v8i64:
3139 ; RV32V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
3140 ; RV32V-NEXT: vsoxei32.v v8, (zero), v12, v0.t
3143 ; RV64-LABEL: mscatter_v8i64:
3145 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
3146 ; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t
3149 ; RV32ZVE32F-LABEL: mscatter_v8i64:
3150 ; RV32ZVE32F: # %bb.0:
3151 ; RV32ZVE32F-NEXT: addi sp, sp, -16
3152 ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
3153 ; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
3154 ; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
3155 ; RV32ZVE32F-NEXT: sw s2, 4(sp) # 4-byte Folded Spill
3156 ; RV32ZVE32F-NEXT: .cfi_offset s0, -4
3157 ; RV32ZVE32F-NEXT: .cfi_offset s1, -8
3158 ; RV32ZVE32F-NEXT: .cfi_offset s2, -12
3159 ; RV32ZVE32F-NEXT: lw a1, 60(a0)
3160 ; RV32ZVE32F-NEXT: lw a2, 56(a0)
3161 ; RV32ZVE32F-NEXT: lw a3, 52(a0)
3162 ; RV32ZVE32F-NEXT: lw a4, 48(a0)
3163 ; RV32ZVE32F-NEXT: lw a5, 44(a0)
3164 ; RV32ZVE32F-NEXT: lw a7, 40(a0)
3165 ; RV32ZVE32F-NEXT: lw t0, 36(a0)
3166 ; RV32ZVE32F-NEXT: lw t1, 32(a0)
3167 ; RV32ZVE32F-NEXT: lw t2, 28(a0)
3168 ; RV32ZVE32F-NEXT: lw t3, 24(a0)
3169 ; RV32ZVE32F-NEXT: lw t4, 20(a0)
3170 ; RV32ZVE32F-NEXT: lw t5, 16(a0)
3171 ; RV32ZVE32F-NEXT: lw s0, 12(a0)
3172 ; RV32ZVE32F-NEXT: lw t6, 8(a0)
3173 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
3174 ; RV32ZVE32F-NEXT: vmv.x.s a6, v0
3175 ; RV32ZVE32F-NEXT: andi s1, a6, 1
3176 ; RV32ZVE32F-NEXT: bnez s1, .LBB41_10
3177 ; RV32ZVE32F-NEXT: # %bb.1: # %else
3178 ; RV32ZVE32F-NEXT: andi a0, a6, 2
3179 ; RV32ZVE32F-NEXT: bnez a0, .LBB41_11
3180 ; RV32ZVE32F-NEXT: .LBB41_2: # %else2
3181 ; RV32ZVE32F-NEXT: andi a0, a6, 4
3182 ; RV32ZVE32F-NEXT: bnez a0, .LBB41_12
3183 ; RV32ZVE32F-NEXT: .LBB41_3: # %else4
3184 ; RV32ZVE32F-NEXT: andi a0, a6, 8
3185 ; RV32ZVE32F-NEXT: bnez a0, .LBB41_13
3186 ; RV32ZVE32F-NEXT: .LBB41_4: # %else6
3187 ; RV32ZVE32F-NEXT: andi a0, a6, 16
3188 ; RV32ZVE32F-NEXT: bnez a0, .LBB41_14
3189 ; RV32ZVE32F-NEXT: .LBB41_5: # %else8
3190 ; RV32ZVE32F-NEXT: andi a0, a6, 32
3191 ; RV32ZVE32F-NEXT: bnez a0, .LBB41_15
3192 ; RV32ZVE32F-NEXT: .LBB41_6: # %else10
3193 ; RV32ZVE32F-NEXT: andi a0, a6, 64
3194 ; RV32ZVE32F-NEXT: bnez a0, .LBB41_16
3195 ; RV32ZVE32F-NEXT: .LBB41_7: # %else12
3196 ; RV32ZVE32F-NEXT: andi a0, a6, -128
3197 ; RV32ZVE32F-NEXT: beqz a0, .LBB41_9
3198 ; RV32ZVE32F-NEXT: .LBB41_8: # %cond.store13
3199 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
3200 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
3201 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
3202 ; RV32ZVE32F-NEXT: sw a2, 0(a0)
3203 ; RV32ZVE32F-NEXT: sw a1, 4(a0)
3204 ; RV32ZVE32F-NEXT: .LBB41_9: # %else14
3205 ; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
3206 ; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
3207 ; RV32ZVE32F-NEXT: lw s2, 4(sp) # 4-byte Folded Reload
3208 ; RV32ZVE32F-NEXT: addi sp, sp, 16
3209 ; RV32ZVE32F-NEXT: ret
3210 ; RV32ZVE32F-NEXT: .LBB41_10: # %cond.store
3211 ; RV32ZVE32F-NEXT: lw s1, 4(a0)
3212 ; RV32ZVE32F-NEXT: lw a0, 0(a0)
3213 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
3214 ; RV32ZVE32F-NEXT: vmv.x.s s2, v8
3215 ; RV32ZVE32F-NEXT: sw s1, 4(s2)
3216 ; RV32ZVE32F-NEXT: sw a0, 0(s2)
3217 ; RV32ZVE32F-NEXT: andi a0, a6, 2
3218 ; RV32ZVE32F-NEXT: beqz a0, .LBB41_2
3219 ; RV32ZVE32F-NEXT: .LBB41_11: # %cond.store1
3220 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
3221 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
3222 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
3223 ; RV32ZVE32F-NEXT: sw s0, 4(a0)
3224 ; RV32ZVE32F-NEXT: sw t6, 0(a0)
3225 ; RV32ZVE32F-NEXT: andi a0, a6, 4
3226 ; RV32ZVE32F-NEXT: beqz a0, .LBB41_3
3227 ; RV32ZVE32F-NEXT: .LBB41_12: # %cond.store3
3228 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
3229 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
3230 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
3231 ; RV32ZVE32F-NEXT: sw t5, 0(a0)
3232 ; RV32ZVE32F-NEXT: sw t4, 4(a0)
3233 ; RV32ZVE32F-NEXT: andi a0, a6, 8
3234 ; RV32ZVE32F-NEXT: beqz a0, .LBB41_4
3235 ; RV32ZVE32F-NEXT: .LBB41_13: # %cond.store5
3236 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
3237 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
3238 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
3239 ; RV32ZVE32F-NEXT: sw t3, 0(a0)
3240 ; RV32ZVE32F-NEXT: sw t2, 4(a0)
3241 ; RV32ZVE32F-NEXT: andi a0, a6, 16
3242 ; RV32ZVE32F-NEXT: beqz a0, .LBB41_5
3243 ; RV32ZVE32F-NEXT: .LBB41_14: # %cond.store7
3244 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
3245 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
3246 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
3247 ; RV32ZVE32F-NEXT: sw t1, 0(a0)
3248 ; RV32ZVE32F-NEXT: sw t0, 4(a0)
3249 ; RV32ZVE32F-NEXT: andi a0, a6, 32
3250 ; RV32ZVE32F-NEXT: beqz a0, .LBB41_6
3251 ; RV32ZVE32F-NEXT: .LBB41_15: # %cond.store9
3252 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
3253 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
3254 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
3255 ; RV32ZVE32F-NEXT: sw a7, 0(a0)
3256 ; RV32ZVE32F-NEXT: sw a5, 4(a0)
3257 ; RV32ZVE32F-NEXT: andi a0, a6, 64
3258 ; RV32ZVE32F-NEXT: beqz a0, .LBB41_7
3259 ; RV32ZVE32F-NEXT: .LBB41_16: # %cond.store11
3260 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
3261 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
3262 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
3263 ; RV32ZVE32F-NEXT: sw a4, 0(a0)
3264 ; RV32ZVE32F-NEXT: sw a3, 4(a0)
3265 ; RV32ZVE32F-NEXT: andi a0, a6, -128
3266 ; RV32ZVE32F-NEXT: bnez a0, .LBB41_8
3267 ; RV32ZVE32F-NEXT: j .LBB41_9
3269 ; RV64ZVE32F-LABEL: mscatter_v8i64:
3270 ; RV64ZVE32F: # %bb.0:
3271 ; RV64ZVE32F-NEXT: addi sp, sp, -32
3272 ; RV64ZVE32F-NEXT: .cfi_def_cfa_offset 32
3273 ; RV64ZVE32F-NEXT: sd s0, 24(sp) # 8-byte Folded Spill
3274 ; RV64ZVE32F-NEXT: sd s1, 16(sp) # 8-byte Folded Spill
3275 ; RV64ZVE32F-NEXT: sd s2, 8(sp) # 8-byte Folded Spill
3276 ; RV64ZVE32F-NEXT: .cfi_offset s0, -8
3277 ; RV64ZVE32F-NEXT: .cfi_offset s1, -16
3278 ; RV64ZVE32F-NEXT: .cfi_offset s2, -24
3279 ; RV64ZVE32F-NEXT: ld a2, 56(a1)
3280 ; RV64ZVE32F-NEXT: ld a4, 48(a1)
3281 ; RV64ZVE32F-NEXT: ld a6, 40(a1)
3282 ; RV64ZVE32F-NEXT: ld t1, 32(a1)
3283 ; RV64ZVE32F-NEXT: ld t3, 24(a1)
3284 ; RV64ZVE32F-NEXT: ld t5, 16(a1)
3285 ; RV64ZVE32F-NEXT: ld s0, 8(a1)
3286 ; RV64ZVE32F-NEXT: ld a3, 56(a0)
3287 ; RV64ZVE32F-NEXT: ld a5, 48(a0)
3288 ; RV64ZVE32F-NEXT: ld t0, 40(a0)
3289 ; RV64ZVE32F-NEXT: ld t2, 32(a0)
3290 ; RV64ZVE32F-NEXT: ld t4, 24(a0)
3291 ; RV64ZVE32F-NEXT: ld t6, 16(a0)
3292 ; RV64ZVE32F-NEXT: ld s1, 8(a0)
3293 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
3294 ; RV64ZVE32F-NEXT: vmv.x.s a7, v0
3295 ; RV64ZVE32F-NEXT: andi s2, a7, 1
3296 ; RV64ZVE32F-NEXT: bnez s2, .LBB41_10
3297 ; RV64ZVE32F-NEXT: # %bb.1: # %else
3298 ; RV64ZVE32F-NEXT: andi a0, a7, 2
3299 ; RV64ZVE32F-NEXT: bnez a0, .LBB41_11
3300 ; RV64ZVE32F-NEXT: .LBB41_2: # %else2
3301 ; RV64ZVE32F-NEXT: andi a0, a7, 4
3302 ; RV64ZVE32F-NEXT: bnez a0, .LBB41_12
3303 ; RV64ZVE32F-NEXT: .LBB41_3: # %else4
3304 ; RV64ZVE32F-NEXT: andi a0, a7, 8
3305 ; RV64ZVE32F-NEXT: bnez a0, .LBB41_13
3306 ; RV64ZVE32F-NEXT: .LBB41_4: # %else6
3307 ; RV64ZVE32F-NEXT: andi a0, a7, 16
3308 ; RV64ZVE32F-NEXT: bnez a0, .LBB41_14
3309 ; RV64ZVE32F-NEXT: .LBB41_5: # %else8
3310 ; RV64ZVE32F-NEXT: andi a0, a7, 32
3311 ; RV64ZVE32F-NEXT: bnez a0, .LBB41_15
3312 ; RV64ZVE32F-NEXT: .LBB41_6: # %else10
3313 ; RV64ZVE32F-NEXT: andi a0, a7, 64
3314 ; RV64ZVE32F-NEXT: bnez a0, .LBB41_16
3315 ; RV64ZVE32F-NEXT: .LBB41_7: # %else12
3316 ; RV64ZVE32F-NEXT: andi a0, a7, -128
3317 ; RV64ZVE32F-NEXT: beqz a0, .LBB41_9
3318 ; RV64ZVE32F-NEXT: .LBB41_8: # %cond.store13
3319 ; RV64ZVE32F-NEXT: sd a3, 0(a2)
3320 ; RV64ZVE32F-NEXT: .LBB41_9: # %else14
3321 ; RV64ZVE32F-NEXT: ld s0, 24(sp) # 8-byte Folded Reload
3322 ; RV64ZVE32F-NEXT: ld s1, 16(sp) # 8-byte Folded Reload
3323 ; RV64ZVE32F-NEXT: ld s2, 8(sp) # 8-byte Folded Reload
3324 ; RV64ZVE32F-NEXT: addi sp, sp, 32
3325 ; RV64ZVE32F-NEXT: ret
3326 ; RV64ZVE32F-NEXT: .LBB41_10: # %cond.store
3327 ; RV64ZVE32F-NEXT: ld a1, 0(a1)
3328 ; RV64ZVE32F-NEXT: ld a0, 0(a0)
3329 ; RV64ZVE32F-NEXT: sd a0, 0(a1)
3330 ; RV64ZVE32F-NEXT: andi a0, a7, 2
3331 ; RV64ZVE32F-NEXT: beqz a0, .LBB41_2
3332 ; RV64ZVE32F-NEXT: .LBB41_11: # %cond.store1
3333 ; RV64ZVE32F-NEXT: sd s1, 0(s0)
3334 ; RV64ZVE32F-NEXT: andi a0, a7, 4
3335 ; RV64ZVE32F-NEXT: beqz a0, .LBB41_3
3336 ; RV64ZVE32F-NEXT: .LBB41_12: # %cond.store3
3337 ; RV64ZVE32F-NEXT: sd t6, 0(t5)
3338 ; RV64ZVE32F-NEXT: andi a0, a7, 8
3339 ; RV64ZVE32F-NEXT: beqz a0, .LBB41_4
3340 ; RV64ZVE32F-NEXT: .LBB41_13: # %cond.store5
3341 ; RV64ZVE32F-NEXT: sd t4, 0(t3)
3342 ; RV64ZVE32F-NEXT: andi a0, a7, 16
3343 ; RV64ZVE32F-NEXT: beqz a0, .LBB41_5
3344 ; RV64ZVE32F-NEXT: .LBB41_14: # %cond.store7
3345 ; RV64ZVE32F-NEXT: sd t2, 0(t1)
3346 ; RV64ZVE32F-NEXT: andi a0, a7, 32
3347 ; RV64ZVE32F-NEXT: beqz a0, .LBB41_6
3348 ; RV64ZVE32F-NEXT: .LBB41_15: # %cond.store9
3349 ; RV64ZVE32F-NEXT: sd t0, 0(a6)
3350 ; RV64ZVE32F-NEXT: andi a0, a7, 64
3351 ; RV64ZVE32F-NEXT: beqz a0, .LBB41_7
3352 ; RV64ZVE32F-NEXT: .LBB41_16: # %cond.store11
3353 ; RV64ZVE32F-NEXT: sd a5, 0(a4)
3354 ; RV64ZVE32F-NEXT: andi a0, a7, -128
3355 ; RV64ZVE32F-NEXT: bnez a0, .LBB41_8
3356 ; RV64ZVE32F-NEXT: j .LBB41_9
3357 call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m)
3361 define void @mscatter_baseidx_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) {
3362 ; RV32V-LABEL: mscatter_baseidx_v8i8_v8i64:
3364 ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
3365 ; RV32V-NEXT: vsext.vf4 v14, v12
3366 ; RV32V-NEXT: vsll.vi v12, v14, 3
3367 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
3368 ; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t
3371 ; RV64-LABEL: mscatter_baseidx_v8i8_v8i64:
3373 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
3374 ; RV64-NEXT: vsext.vf8 v16, v12
3375 ; RV64-NEXT: vsll.vi v12, v16, 3
3376 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
3379 ; RV32ZVE32F-LABEL: mscatter_baseidx_v8i8_v8i64:
3380 ; RV32ZVE32F: # %bb.0:
3381 ; RV32ZVE32F-NEXT: addi sp, sp, -16
3382 ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
3383 ; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
3384 ; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
3385 ; RV32ZVE32F-NEXT: sw s2, 4(sp) # 4-byte Folded Spill
3386 ; RV32ZVE32F-NEXT: .cfi_offset s0, -4
3387 ; RV32ZVE32F-NEXT: .cfi_offset s1, -8
3388 ; RV32ZVE32F-NEXT: .cfi_offset s2, -12
3389 ; RV32ZVE32F-NEXT: lw a2, 60(a0)
3390 ; RV32ZVE32F-NEXT: lw a3, 56(a0)
3391 ; RV32ZVE32F-NEXT: lw a4, 52(a0)
3392 ; RV32ZVE32F-NEXT: lw a5, 48(a0)
3393 ; RV32ZVE32F-NEXT: lw a6, 44(a0)
3394 ; RV32ZVE32F-NEXT: lw a7, 40(a0)
3395 ; RV32ZVE32F-NEXT: lw t1, 36(a0)
3396 ; RV32ZVE32F-NEXT: lw t2, 32(a0)
3397 ; RV32ZVE32F-NEXT: lw t3, 28(a0)
3398 ; RV32ZVE32F-NEXT: lw t4, 24(a0)
3399 ; RV32ZVE32F-NEXT: lw t5, 20(a0)
3400 ; RV32ZVE32F-NEXT: lw t6, 16(a0)
3401 ; RV32ZVE32F-NEXT: lw s1, 12(a0)
3402 ; RV32ZVE32F-NEXT: lw s0, 8(a0)
3403 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
3404 ; RV32ZVE32F-NEXT: vsext.vf4 v10, v8
3405 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3
3406 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
3407 ; RV32ZVE32F-NEXT: vmv.x.s t0, v0
3408 ; RV32ZVE32F-NEXT: andi s2, t0, 1
3409 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
3410 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
3411 ; RV32ZVE32F-NEXT: bnez s2, .LBB42_10
3412 ; RV32ZVE32F-NEXT: # %bb.1: # %else
3413 ; RV32ZVE32F-NEXT: andi a0, t0, 2
3414 ; RV32ZVE32F-NEXT: bnez a0, .LBB42_11
3415 ; RV32ZVE32F-NEXT: .LBB42_2: # %else2
3416 ; RV32ZVE32F-NEXT: andi a0, t0, 4
3417 ; RV32ZVE32F-NEXT: bnez a0, .LBB42_12
3418 ; RV32ZVE32F-NEXT: .LBB42_3: # %else4
3419 ; RV32ZVE32F-NEXT: andi a0, t0, 8
3420 ; RV32ZVE32F-NEXT: bnez a0, .LBB42_13
3421 ; RV32ZVE32F-NEXT: .LBB42_4: # %else6
3422 ; RV32ZVE32F-NEXT: andi a0, t0, 16
3423 ; RV32ZVE32F-NEXT: bnez a0, .LBB42_14
3424 ; RV32ZVE32F-NEXT: .LBB42_5: # %else8
3425 ; RV32ZVE32F-NEXT: andi a0, t0, 32
3426 ; RV32ZVE32F-NEXT: bnez a0, .LBB42_15
3427 ; RV32ZVE32F-NEXT: .LBB42_6: # %else10
3428 ; RV32ZVE32F-NEXT: andi a0, t0, 64
3429 ; RV32ZVE32F-NEXT: bnez a0, .LBB42_16
3430 ; RV32ZVE32F-NEXT: .LBB42_7: # %else12
3431 ; RV32ZVE32F-NEXT: andi a0, t0, -128
3432 ; RV32ZVE32F-NEXT: beqz a0, .LBB42_9
3433 ; RV32ZVE32F-NEXT: .LBB42_8: # %cond.store13
3434 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
3435 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
3436 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
3437 ; RV32ZVE32F-NEXT: sw a3, 0(a0)
3438 ; RV32ZVE32F-NEXT: sw a2, 4(a0)
3439 ; RV32ZVE32F-NEXT: .LBB42_9: # %else14
3440 ; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
3441 ; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
3442 ; RV32ZVE32F-NEXT: lw s2, 4(sp) # 4-byte Folded Reload
3443 ; RV32ZVE32F-NEXT: addi sp, sp, 16
3444 ; RV32ZVE32F-NEXT: ret
3445 ; RV32ZVE32F-NEXT: .LBB42_10: # %cond.store
3446 ; RV32ZVE32F-NEXT: lw a1, 4(a0)
3447 ; RV32ZVE32F-NEXT: lw a0, 0(a0)
3448 ; RV32ZVE32F-NEXT: vmv.x.s s2, v8
3449 ; RV32ZVE32F-NEXT: sw a1, 4(s2)
3450 ; RV32ZVE32F-NEXT: sw a0, 0(s2)
3451 ; RV32ZVE32F-NEXT: andi a0, t0, 2
3452 ; RV32ZVE32F-NEXT: beqz a0, .LBB42_2
3453 ; RV32ZVE32F-NEXT: .LBB42_11: # %cond.store1
3454 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
3455 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
3456 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
3457 ; RV32ZVE32F-NEXT: sw s1, 4(a0)
3458 ; RV32ZVE32F-NEXT: sw s0, 0(a0)
3459 ; RV32ZVE32F-NEXT: andi a0, t0, 4
3460 ; RV32ZVE32F-NEXT: beqz a0, .LBB42_3
3461 ; RV32ZVE32F-NEXT: .LBB42_12: # %cond.store3
3462 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
3463 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
3464 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
3465 ; RV32ZVE32F-NEXT: sw t6, 0(a0)
3466 ; RV32ZVE32F-NEXT: sw t5, 4(a0)
3467 ; RV32ZVE32F-NEXT: andi a0, t0, 8
3468 ; RV32ZVE32F-NEXT: beqz a0, .LBB42_4
3469 ; RV32ZVE32F-NEXT: .LBB42_13: # %cond.store5
3470 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
3471 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
3472 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
3473 ; RV32ZVE32F-NEXT: sw t4, 0(a0)
3474 ; RV32ZVE32F-NEXT: sw t3, 4(a0)
3475 ; RV32ZVE32F-NEXT: andi a0, t0, 16
3476 ; RV32ZVE32F-NEXT: beqz a0, .LBB42_5
3477 ; RV32ZVE32F-NEXT: .LBB42_14: # %cond.store7
3478 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
3479 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
3480 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
3481 ; RV32ZVE32F-NEXT: sw t2, 0(a0)
3482 ; RV32ZVE32F-NEXT: sw t1, 4(a0)
3483 ; RV32ZVE32F-NEXT: andi a0, t0, 32
3484 ; RV32ZVE32F-NEXT: beqz a0, .LBB42_6
3485 ; RV32ZVE32F-NEXT: .LBB42_15: # %cond.store9
3486 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
3487 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
3488 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
3489 ; RV32ZVE32F-NEXT: sw a7, 0(a0)
3490 ; RV32ZVE32F-NEXT: sw a6, 4(a0)
3491 ; RV32ZVE32F-NEXT: andi a0, t0, 64
3492 ; RV32ZVE32F-NEXT: beqz a0, .LBB42_7
3493 ; RV32ZVE32F-NEXT: .LBB42_16: # %cond.store11
3494 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
3495 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
3496 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
3497 ; RV32ZVE32F-NEXT: sw a5, 0(a0)
3498 ; RV32ZVE32F-NEXT: sw a4, 4(a0)
3499 ; RV32ZVE32F-NEXT: andi a0, t0, -128
3500 ; RV32ZVE32F-NEXT: bnez a0, .LBB42_8
3501 ; RV32ZVE32F-NEXT: j .LBB42_9
3503 ; RV64ZVE32F-LABEL: mscatter_baseidx_v8i8_v8i64:
3504 ; RV64ZVE32F: # %bb.0:
3505 ; RV64ZVE32F-NEXT: ld a2, 56(a0)
3506 ; RV64ZVE32F-NEXT: ld a3, 48(a0)
3507 ; RV64ZVE32F-NEXT: ld a5, 40(a0)
3508 ; RV64ZVE32F-NEXT: ld a6, 32(a0)
3509 ; RV64ZVE32F-NEXT: ld a7, 24(a0)
3510 ; RV64ZVE32F-NEXT: ld t0, 16(a0)
3511 ; RV64ZVE32F-NEXT: ld t1, 8(a0)
3512 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
3513 ; RV64ZVE32F-NEXT: vmv.x.s a4, v0
3514 ; RV64ZVE32F-NEXT: andi t2, a4, 1
3515 ; RV64ZVE32F-NEXT: beqz t2, .LBB42_2
3516 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
3517 ; RV64ZVE32F-NEXT: ld a0, 0(a0)
3518 ; RV64ZVE32F-NEXT: vmv.x.s t2, v8
3519 ; RV64ZVE32F-NEXT: slli t2, t2, 3
3520 ; RV64ZVE32F-NEXT: add t2, a1, t2
3521 ; RV64ZVE32F-NEXT: sd a0, 0(t2)
3522 ; RV64ZVE32F-NEXT: .LBB42_2: # %else
3523 ; RV64ZVE32F-NEXT: andi a0, a4, 2
3524 ; RV64ZVE32F-NEXT: beqz a0, .LBB42_4
3525 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
3526 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
3527 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
3528 ; RV64ZVE32F-NEXT: vmv.x.s a0, v9
3529 ; RV64ZVE32F-NEXT: slli a0, a0, 3
3530 ; RV64ZVE32F-NEXT: add a0, a1, a0
3531 ; RV64ZVE32F-NEXT: sd t1, 0(a0)
3532 ; RV64ZVE32F-NEXT: .LBB42_4: # %else2
3533 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
3534 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
3535 ; RV64ZVE32F-NEXT: andi a0, a4, 4
3536 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
3537 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
3538 ; RV64ZVE32F-NEXT: bnez a0, .LBB42_12
3539 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
3540 ; RV64ZVE32F-NEXT: andi a0, a4, 8
3541 ; RV64ZVE32F-NEXT: bnez a0, .LBB42_13
3542 ; RV64ZVE32F-NEXT: .LBB42_6: # %else6
3543 ; RV64ZVE32F-NEXT: andi a0, a4, 16
3544 ; RV64ZVE32F-NEXT: bnez a0, .LBB42_14
3545 ; RV64ZVE32F-NEXT: .LBB42_7: # %else8
3546 ; RV64ZVE32F-NEXT: andi a0, a4, 32
3547 ; RV64ZVE32F-NEXT: beqz a0, .LBB42_9
3548 ; RV64ZVE32F-NEXT: .LBB42_8: # %cond.store9
3549 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
3550 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
3551 ; RV64ZVE32F-NEXT: slli a0, a0, 3
3552 ; RV64ZVE32F-NEXT: add a0, a1, a0
3553 ; RV64ZVE32F-NEXT: sd a5, 0(a0)
3554 ; RV64ZVE32F-NEXT: .LBB42_9: # %else10
3555 ; RV64ZVE32F-NEXT: andi a0, a4, 64
3556 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
3557 ; RV64ZVE32F-NEXT: bnez a0, .LBB42_15
3558 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
3559 ; RV64ZVE32F-NEXT: andi a0, a4, -128
3560 ; RV64ZVE32F-NEXT: bnez a0, .LBB42_16
3561 ; RV64ZVE32F-NEXT: .LBB42_11: # %else14
3562 ; RV64ZVE32F-NEXT: ret
3563 ; RV64ZVE32F-NEXT: .LBB42_12: # %cond.store3
3564 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
3565 ; RV64ZVE32F-NEXT: slli a0, a0, 3
3566 ; RV64ZVE32F-NEXT: add a0, a1, a0
3567 ; RV64ZVE32F-NEXT: sd t0, 0(a0)
3568 ; RV64ZVE32F-NEXT: andi a0, a4, 8
3569 ; RV64ZVE32F-NEXT: beqz a0, .LBB42_6
3570 ; RV64ZVE32F-NEXT: .LBB42_13: # %cond.store5
3571 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
3572 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
3573 ; RV64ZVE32F-NEXT: slli a0, a0, 3
3574 ; RV64ZVE32F-NEXT: add a0, a1, a0
3575 ; RV64ZVE32F-NEXT: sd a7, 0(a0)
3576 ; RV64ZVE32F-NEXT: andi a0, a4, 16
3577 ; RV64ZVE32F-NEXT: beqz a0, .LBB42_7
3578 ; RV64ZVE32F-NEXT: .LBB42_14: # %cond.store7
3579 ; RV64ZVE32F-NEXT: vmv.x.s a0, v9
3580 ; RV64ZVE32F-NEXT: slli a0, a0, 3
3581 ; RV64ZVE32F-NEXT: add a0, a1, a0
3582 ; RV64ZVE32F-NEXT: sd a6, 0(a0)
3583 ; RV64ZVE32F-NEXT: andi a0, a4, 32
3584 ; RV64ZVE32F-NEXT: bnez a0, .LBB42_8
3585 ; RV64ZVE32F-NEXT: j .LBB42_9
3586 ; RV64ZVE32F-NEXT: .LBB42_15: # %cond.store11
3587 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
3588 ; RV64ZVE32F-NEXT: slli a0, a0, 3
3589 ; RV64ZVE32F-NEXT: add a0, a1, a0
3590 ; RV64ZVE32F-NEXT: sd a3, 0(a0)
3591 ; RV64ZVE32F-NEXT: andi a0, a4, -128
3592 ; RV64ZVE32F-NEXT: beqz a0, .LBB42_11
3593 ; RV64ZVE32F-NEXT: .LBB42_16: # %cond.store13
3594 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
3595 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
3596 ; RV64ZVE32F-NEXT: slli a0, a0, 3
3597 ; RV64ZVE32F-NEXT: add a0, a1, a0
3598 ; RV64ZVE32F-NEXT: sd a2, 0(a0)
3599 ; RV64ZVE32F-NEXT: ret
3600 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i8> %idxs
3601 call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m)
3605 define void @mscatter_baseidx_sext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) {
3606 ; RV32V-LABEL: mscatter_baseidx_sext_v8i8_v8i64:
3608 ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
3609 ; RV32V-NEXT: vsext.vf4 v14, v12
3610 ; RV32V-NEXT: vsll.vi v12, v14, 3
3611 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
3612 ; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t
3615 ; RV64-LABEL: mscatter_baseidx_sext_v8i8_v8i64:
3617 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
3618 ; RV64-NEXT: vsext.vf8 v16, v12
3619 ; RV64-NEXT: vsll.vi v12, v16, 3
3620 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
3623 ; RV32ZVE32F-LABEL: mscatter_baseidx_sext_v8i8_v8i64:
3624 ; RV32ZVE32F: # %bb.0:
3625 ; RV32ZVE32F-NEXT: addi sp, sp, -16
3626 ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
3627 ; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
3628 ; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
3629 ; RV32ZVE32F-NEXT: sw s2, 4(sp) # 4-byte Folded Spill
3630 ; RV32ZVE32F-NEXT: .cfi_offset s0, -4
3631 ; RV32ZVE32F-NEXT: .cfi_offset s1, -8
3632 ; RV32ZVE32F-NEXT: .cfi_offset s2, -12
3633 ; RV32ZVE32F-NEXT: lw a2, 60(a0)
3634 ; RV32ZVE32F-NEXT: lw a3, 56(a0)
3635 ; RV32ZVE32F-NEXT: lw a4, 52(a0)
3636 ; RV32ZVE32F-NEXT: lw a5, 48(a0)
3637 ; RV32ZVE32F-NEXT: lw a6, 44(a0)
3638 ; RV32ZVE32F-NEXT: lw a7, 40(a0)
3639 ; RV32ZVE32F-NEXT: lw t1, 36(a0)
3640 ; RV32ZVE32F-NEXT: lw t2, 32(a0)
3641 ; RV32ZVE32F-NEXT: lw t3, 28(a0)
3642 ; RV32ZVE32F-NEXT: lw t4, 24(a0)
3643 ; RV32ZVE32F-NEXT: lw t5, 20(a0)
3644 ; RV32ZVE32F-NEXT: lw t6, 16(a0)
3645 ; RV32ZVE32F-NEXT: lw s1, 12(a0)
3646 ; RV32ZVE32F-NEXT: lw s0, 8(a0)
3647 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
3648 ; RV32ZVE32F-NEXT: vsext.vf4 v10, v8
3649 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3
3650 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
3651 ; RV32ZVE32F-NEXT: vmv.x.s t0, v0
3652 ; RV32ZVE32F-NEXT: andi s2, t0, 1
3653 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
3654 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
3655 ; RV32ZVE32F-NEXT: bnez s2, .LBB43_10
3656 ; RV32ZVE32F-NEXT: # %bb.1: # %else
3657 ; RV32ZVE32F-NEXT: andi a0, t0, 2
3658 ; RV32ZVE32F-NEXT: bnez a0, .LBB43_11
3659 ; RV32ZVE32F-NEXT: .LBB43_2: # %else2
3660 ; RV32ZVE32F-NEXT: andi a0, t0, 4
3661 ; RV32ZVE32F-NEXT: bnez a0, .LBB43_12
3662 ; RV32ZVE32F-NEXT: .LBB43_3: # %else4
3663 ; RV32ZVE32F-NEXT: andi a0, t0, 8
3664 ; RV32ZVE32F-NEXT: bnez a0, .LBB43_13
3665 ; RV32ZVE32F-NEXT: .LBB43_4: # %else6
3666 ; RV32ZVE32F-NEXT: andi a0, t0, 16
3667 ; RV32ZVE32F-NEXT: bnez a0, .LBB43_14
3668 ; RV32ZVE32F-NEXT: .LBB43_5: # %else8
3669 ; RV32ZVE32F-NEXT: andi a0, t0, 32
3670 ; RV32ZVE32F-NEXT: bnez a0, .LBB43_15
3671 ; RV32ZVE32F-NEXT: .LBB43_6: # %else10
3672 ; RV32ZVE32F-NEXT: andi a0, t0, 64
3673 ; RV32ZVE32F-NEXT: bnez a0, .LBB43_16
3674 ; RV32ZVE32F-NEXT: .LBB43_7: # %else12
3675 ; RV32ZVE32F-NEXT: andi a0, t0, -128
3676 ; RV32ZVE32F-NEXT: beqz a0, .LBB43_9
3677 ; RV32ZVE32F-NEXT: .LBB43_8: # %cond.store13
3678 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
3679 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
3680 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
3681 ; RV32ZVE32F-NEXT: sw a3, 0(a0)
3682 ; RV32ZVE32F-NEXT: sw a2, 4(a0)
3683 ; RV32ZVE32F-NEXT: .LBB43_9: # %else14
3684 ; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
3685 ; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
3686 ; RV32ZVE32F-NEXT: lw s2, 4(sp) # 4-byte Folded Reload
3687 ; RV32ZVE32F-NEXT: addi sp, sp, 16
3688 ; RV32ZVE32F-NEXT: ret
3689 ; RV32ZVE32F-NEXT: .LBB43_10: # %cond.store
3690 ; RV32ZVE32F-NEXT: lw a1, 4(a0)
3691 ; RV32ZVE32F-NEXT: lw a0, 0(a0)
3692 ; RV32ZVE32F-NEXT: vmv.x.s s2, v8
3693 ; RV32ZVE32F-NEXT: sw a1, 4(s2)
3694 ; RV32ZVE32F-NEXT: sw a0, 0(s2)
3695 ; RV32ZVE32F-NEXT: andi a0, t0, 2
3696 ; RV32ZVE32F-NEXT: beqz a0, .LBB43_2
3697 ; RV32ZVE32F-NEXT: .LBB43_11: # %cond.store1
3698 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
3699 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
3700 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
3701 ; RV32ZVE32F-NEXT: sw s1, 4(a0)
3702 ; RV32ZVE32F-NEXT: sw s0, 0(a0)
3703 ; RV32ZVE32F-NEXT: andi a0, t0, 4
3704 ; RV32ZVE32F-NEXT: beqz a0, .LBB43_3
3705 ; RV32ZVE32F-NEXT: .LBB43_12: # %cond.store3
3706 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
3707 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
3708 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
3709 ; RV32ZVE32F-NEXT: sw t6, 0(a0)
3710 ; RV32ZVE32F-NEXT: sw t5, 4(a0)
3711 ; RV32ZVE32F-NEXT: andi a0, t0, 8
3712 ; RV32ZVE32F-NEXT: beqz a0, .LBB43_4
3713 ; RV32ZVE32F-NEXT: .LBB43_13: # %cond.store5
3714 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
3715 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
3716 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
3717 ; RV32ZVE32F-NEXT: sw t4, 0(a0)
3718 ; RV32ZVE32F-NEXT: sw t3, 4(a0)
3719 ; RV32ZVE32F-NEXT: andi a0, t0, 16
3720 ; RV32ZVE32F-NEXT: beqz a0, .LBB43_5
3721 ; RV32ZVE32F-NEXT: .LBB43_14: # %cond.store7
3722 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
3723 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
3724 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
3725 ; RV32ZVE32F-NEXT: sw t2, 0(a0)
3726 ; RV32ZVE32F-NEXT: sw t1, 4(a0)
3727 ; RV32ZVE32F-NEXT: andi a0, t0, 32
3728 ; RV32ZVE32F-NEXT: beqz a0, .LBB43_6
3729 ; RV32ZVE32F-NEXT: .LBB43_15: # %cond.store9
3730 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
3731 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
3732 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
3733 ; RV32ZVE32F-NEXT: sw a7, 0(a0)
3734 ; RV32ZVE32F-NEXT: sw a6, 4(a0)
3735 ; RV32ZVE32F-NEXT: andi a0, t0, 64
3736 ; RV32ZVE32F-NEXT: beqz a0, .LBB43_7
3737 ; RV32ZVE32F-NEXT: .LBB43_16: # %cond.store11
3738 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
3739 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
3740 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
3741 ; RV32ZVE32F-NEXT: sw a5, 0(a0)
3742 ; RV32ZVE32F-NEXT: sw a4, 4(a0)
3743 ; RV32ZVE32F-NEXT: andi a0, t0, -128
3744 ; RV32ZVE32F-NEXT: bnez a0, .LBB43_8
3745 ; RV32ZVE32F-NEXT: j .LBB43_9
3747 ; RV64ZVE32F-LABEL: mscatter_baseidx_sext_v8i8_v8i64:
3748 ; RV64ZVE32F: # %bb.0:
3749 ; RV64ZVE32F-NEXT: ld a2, 56(a0)
3750 ; RV64ZVE32F-NEXT: ld a3, 48(a0)
3751 ; RV64ZVE32F-NEXT: ld a5, 40(a0)
3752 ; RV64ZVE32F-NEXT: ld a6, 32(a0)
3753 ; RV64ZVE32F-NEXT: ld a7, 24(a0)
3754 ; RV64ZVE32F-NEXT: ld t0, 16(a0)
3755 ; RV64ZVE32F-NEXT: ld t1, 8(a0)
3756 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
3757 ; RV64ZVE32F-NEXT: vmv.x.s a4, v0
3758 ; RV64ZVE32F-NEXT: andi t2, a4, 1
3759 ; RV64ZVE32F-NEXT: beqz t2, .LBB43_2
3760 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
3761 ; RV64ZVE32F-NEXT: ld a0, 0(a0)
3762 ; RV64ZVE32F-NEXT: vmv.x.s t2, v8
3763 ; RV64ZVE32F-NEXT: slli t2, t2, 3
3764 ; RV64ZVE32F-NEXT: add t2, a1, t2
3765 ; RV64ZVE32F-NEXT: sd a0, 0(t2)
3766 ; RV64ZVE32F-NEXT: .LBB43_2: # %else
3767 ; RV64ZVE32F-NEXT: andi a0, a4, 2
3768 ; RV64ZVE32F-NEXT: beqz a0, .LBB43_4
3769 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
3770 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
3771 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
3772 ; RV64ZVE32F-NEXT: vmv.x.s a0, v9
3773 ; RV64ZVE32F-NEXT: slli a0, a0, 3
3774 ; RV64ZVE32F-NEXT: add a0, a1, a0
3775 ; RV64ZVE32F-NEXT: sd t1, 0(a0)
3776 ; RV64ZVE32F-NEXT: .LBB43_4: # %else2
3777 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
3778 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
3779 ; RV64ZVE32F-NEXT: andi a0, a4, 4
3780 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
3781 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
3782 ; RV64ZVE32F-NEXT: bnez a0, .LBB43_12
3783 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
3784 ; RV64ZVE32F-NEXT: andi a0, a4, 8
3785 ; RV64ZVE32F-NEXT: bnez a0, .LBB43_13
3786 ; RV64ZVE32F-NEXT: .LBB43_6: # %else6
3787 ; RV64ZVE32F-NEXT: andi a0, a4, 16
3788 ; RV64ZVE32F-NEXT: bnez a0, .LBB43_14
3789 ; RV64ZVE32F-NEXT: .LBB43_7: # %else8
3790 ; RV64ZVE32F-NEXT: andi a0, a4, 32
3791 ; RV64ZVE32F-NEXT: beqz a0, .LBB43_9
3792 ; RV64ZVE32F-NEXT: .LBB43_8: # %cond.store9
3793 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
3794 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
3795 ; RV64ZVE32F-NEXT: slli a0, a0, 3
3796 ; RV64ZVE32F-NEXT: add a0, a1, a0
3797 ; RV64ZVE32F-NEXT: sd a5, 0(a0)
3798 ; RV64ZVE32F-NEXT: .LBB43_9: # %else10
3799 ; RV64ZVE32F-NEXT: andi a0, a4, 64
3800 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
3801 ; RV64ZVE32F-NEXT: bnez a0, .LBB43_15
3802 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
3803 ; RV64ZVE32F-NEXT: andi a0, a4, -128
3804 ; RV64ZVE32F-NEXT: bnez a0, .LBB43_16
3805 ; RV64ZVE32F-NEXT: .LBB43_11: # %else14
3806 ; RV64ZVE32F-NEXT: ret
3807 ; RV64ZVE32F-NEXT: .LBB43_12: # %cond.store3
3808 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
3809 ; RV64ZVE32F-NEXT: slli a0, a0, 3
3810 ; RV64ZVE32F-NEXT: add a0, a1, a0
3811 ; RV64ZVE32F-NEXT: sd t0, 0(a0)
3812 ; RV64ZVE32F-NEXT: andi a0, a4, 8
3813 ; RV64ZVE32F-NEXT: beqz a0, .LBB43_6
3814 ; RV64ZVE32F-NEXT: .LBB43_13: # %cond.store5
3815 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
3816 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
3817 ; RV64ZVE32F-NEXT: slli a0, a0, 3
3818 ; RV64ZVE32F-NEXT: add a0, a1, a0
3819 ; RV64ZVE32F-NEXT: sd a7, 0(a0)
3820 ; RV64ZVE32F-NEXT: andi a0, a4, 16
3821 ; RV64ZVE32F-NEXT: beqz a0, .LBB43_7
3822 ; RV64ZVE32F-NEXT: .LBB43_14: # %cond.store7
3823 ; RV64ZVE32F-NEXT: vmv.x.s a0, v9
3824 ; RV64ZVE32F-NEXT: slli a0, a0, 3
3825 ; RV64ZVE32F-NEXT: add a0, a1, a0
3826 ; RV64ZVE32F-NEXT: sd a6, 0(a0)
3827 ; RV64ZVE32F-NEXT: andi a0, a4, 32
3828 ; RV64ZVE32F-NEXT: bnez a0, .LBB43_8
3829 ; RV64ZVE32F-NEXT: j .LBB43_9
3830 ; RV64ZVE32F-NEXT: .LBB43_15: # %cond.store11
3831 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
3832 ; RV64ZVE32F-NEXT: slli a0, a0, 3
3833 ; RV64ZVE32F-NEXT: add a0, a1, a0
3834 ; RV64ZVE32F-NEXT: sd a3, 0(a0)
3835 ; RV64ZVE32F-NEXT: andi a0, a4, -128
3836 ; RV64ZVE32F-NEXT: beqz a0, .LBB43_11
3837 ; RV64ZVE32F-NEXT: .LBB43_16: # %cond.store13
3838 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
3839 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
3840 ; RV64ZVE32F-NEXT: slli a0, a0, 3
3841 ; RV64ZVE32F-NEXT: add a0, a1, a0
3842 ; RV64ZVE32F-NEXT: sd a2, 0(a0)
3843 ; RV64ZVE32F-NEXT: ret
3844 %eidxs = sext <8 x i8> %idxs to <8 x i64>
3845 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs
3846 call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m)
3850 define void @mscatter_baseidx_zext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) {
3851 ; RV32V-LABEL: mscatter_baseidx_zext_v8i8_v8i64:
3853 ; RV32V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
3854 ; RV32V-NEXT: vzext.vf2 v13, v12
3855 ; RV32V-NEXT: vsll.vi v12, v13, 3
3856 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
3857 ; RV32V-NEXT: vsoxei16.v v8, (a0), v12, v0.t
3860 ; RV64-LABEL: mscatter_baseidx_zext_v8i8_v8i64:
3862 ; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
3863 ; RV64-NEXT: vzext.vf2 v13, v12
3864 ; RV64-NEXT: vsll.vi v12, v13, 3
3865 ; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, ma
3866 ; RV64-NEXT: vsoxei16.v v8, (a0), v12, v0.t
3869 ; RV32ZVE32F-LABEL: mscatter_baseidx_zext_v8i8_v8i64:
3870 ; RV32ZVE32F: # %bb.0:
3871 ; RV32ZVE32F-NEXT: addi sp, sp, -16
3872 ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
3873 ; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
3874 ; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
3875 ; RV32ZVE32F-NEXT: sw s2, 4(sp) # 4-byte Folded Spill
3876 ; RV32ZVE32F-NEXT: .cfi_offset s0, -4
3877 ; RV32ZVE32F-NEXT: .cfi_offset s1, -8
3878 ; RV32ZVE32F-NEXT: .cfi_offset s2, -12
3879 ; RV32ZVE32F-NEXT: lw a2, 60(a0)
3880 ; RV32ZVE32F-NEXT: lw a3, 56(a0)
3881 ; RV32ZVE32F-NEXT: lw a4, 52(a0)
3882 ; RV32ZVE32F-NEXT: lw a5, 48(a0)
3883 ; RV32ZVE32F-NEXT: lw a6, 44(a0)
3884 ; RV32ZVE32F-NEXT: lw a7, 40(a0)
3885 ; RV32ZVE32F-NEXT: lw t1, 36(a0)
3886 ; RV32ZVE32F-NEXT: lw t2, 32(a0)
3887 ; RV32ZVE32F-NEXT: lw t3, 28(a0)
3888 ; RV32ZVE32F-NEXT: lw t4, 24(a0)
3889 ; RV32ZVE32F-NEXT: lw t5, 20(a0)
3890 ; RV32ZVE32F-NEXT: lw t6, 16(a0)
3891 ; RV32ZVE32F-NEXT: lw s1, 12(a0)
3892 ; RV32ZVE32F-NEXT: lw s0, 8(a0)
3893 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
3894 ; RV32ZVE32F-NEXT: vzext.vf4 v10, v8
3895 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3
3896 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
3897 ; RV32ZVE32F-NEXT: vmv.x.s t0, v0
3898 ; RV32ZVE32F-NEXT: andi s2, t0, 1
3899 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
3900 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
3901 ; RV32ZVE32F-NEXT: bnez s2, .LBB44_10
3902 ; RV32ZVE32F-NEXT: # %bb.1: # %else
3903 ; RV32ZVE32F-NEXT: andi a0, t0, 2
3904 ; RV32ZVE32F-NEXT: bnez a0, .LBB44_11
3905 ; RV32ZVE32F-NEXT: .LBB44_2: # %else2
3906 ; RV32ZVE32F-NEXT: andi a0, t0, 4
3907 ; RV32ZVE32F-NEXT: bnez a0, .LBB44_12
3908 ; RV32ZVE32F-NEXT: .LBB44_3: # %else4
3909 ; RV32ZVE32F-NEXT: andi a0, t0, 8
3910 ; RV32ZVE32F-NEXT: bnez a0, .LBB44_13
3911 ; RV32ZVE32F-NEXT: .LBB44_4: # %else6
3912 ; RV32ZVE32F-NEXT: andi a0, t0, 16
3913 ; RV32ZVE32F-NEXT: bnez a0, .LBB44_14
3914 ; RV32ZVE32F-NEXT: .LBB44_5: # %else8
3915 ; RV32ZVE32F-NEXT: andi a0, t0, 32
3916 ; RV32ZVE32F-NEXT: bnez a0, .LBB44_15
3917 ; RV32ZVE32F-NEXT: .LBB44_6: # %else10
3918 ; RV32ZVE32F-NEXT: andi a0, t0, 64
3919 ; RV32ZVE32F-NEXT: bnez a0, .LBB44_16
3920 ; RV32ZVE32F-NEXT: .LBB44_7: # %else12
3921 ; RV32ZVE32F-NEXT: andi a0, t0, -128
3922 ; RV32ZVE32F-NEXT: beqz a0, .LBB44_9
3923 ; RV32ZVE32F-NEXT: .LBB44_8: # %cond.store13
3924 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
3925 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
3926 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
3927 ; RV32ZVE32F-NEXT: sw a3, 0(a0)
3928 ; RV32ZVE32F-NEXT: sw a2, 4(a0)
3929 ; RV32ZVE32F-NEXT: .LBB44_9: # %else14
3930 ; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
3931 ; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
3932 ; RV32ZVE32F-NEXT: lw s2, 4(sp) # 4-byte Folded Reload
3933 ; RV32ZVE32F-NEXT: addi sp, sp, 16
3934 ; RV32ZVE32F-NEXT: ret
3935 ; RV32ZVE32F-NEXT: .LBB44_10: # %cond.store
3936 ; RV32ZVE32F-NEXT: lw a1, 4(a0)
3937 ; RV32ZVE32F-NEXT: lw a0, 0(a0)
3938 ; RV32ZVE32F-NEXT: vmv.x.s s2, v8
3939 ; RV32ZVE32F-NEXT: sw a1, 4(s2)
3940 ; RV32ZVE32F-NEXT: sw a0, 0(s2)
3941 ; RV32ZVE32F-NEXT: andi a0, t0, 2
3942 ; RV32ZVE32F-NEXT: beqz a0, .LBB44_2
3943 ; RV32ZVE32F-NEXT: .LBB44_11: # %cond.store1
3944 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
3945 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
3946 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
3947 ; RV32ZVE32F-NEXT: sw s1, 4(a0)
3948 ; RV32ZVE32F-NEXT: sw s0, 0(a0)
3949 ; RV32ZVE32F-NEXT: andi a0, t0, 4
3950 ; RV32ZVE32F-NEXT: beqz a0, .LBB44_3
3951 ; RV32ZVE32F-NEXT: .LBB44_12: # %cond.store3
3952 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
3953 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
3954 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
3955 ; RV32ZVE32F-NEXT: sw t6, 0(a0)
3956 ; RV32ZVE32F-NEXT: sw t5, 4(a0)
3957 ; RV32ZVE32F-NEXT: andi a0, t0, 8
3958 ; RV32ZVE32F-NEXT: beqz a0, .LBB44_4
3959 ; RV32ZVE32F-NEXT: .LBB44_13: # %cond.store5
3960 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
3961 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
3962 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
3963 ; RV32ZVE32F-NEXT: sw t4, 0(a0)
3964 ; RV32ZVE32F-NEXT: sw t3, 4(a0)
3965 ; RV32ZVE32F-NEXT: andi a0, t0, 16
3966 ; RV32ZVE32F-NEXT: beqz a0, .LBB44_5
3967 ; RV32ZVE32F-NEXT: .LBB44_14: # %cond.store7
3968 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
3969 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
3970 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
3971 ; RV32ZVE32F-NEXT: sw t2, 0(a0)
3972 ; RV32ZVE32F-NEXT: sw t1, 4(a0)
3973 ; RV32ZVE32F-NEXT: andi a0, t0, 32
3974 ; RV32ZVE32F-NEXT: beqz a0, .LBB44_6
3975 ; RV32ZVE32F-NEXT: .LBB44_15: # %cond.store9
3976 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
3977 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
3978 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
3979 ; RV32ZVE32F-NEXT: sw a7, 0(a0)
3980 ; RV32ZVE32F-NEXT: sw a6, 4(a0)
3981 ; RV32ZVE32F-NEXT: andi a0, t0, 64
3982 ; RV32ZVE32F-NEXT: beqz a0, .LBB44_7
3983 ; RV32ZVE32F-NEXT: .LBB44_16: # %cond.store11
3984 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
3985 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
3986 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
3987 ; RV32ZVE32F-NEXT: sw a5, 0(a0)
3988 ; RV32ZVE32F-NEXT: sw a4, 4(a0)
3989 ; RV32ZVE32F-NEXT: andi a0, t0, -128
3990 ; RV32ZVE32F-NEXT: bnez a0, .LBB44_8
3991 ; RV32ZVE32F-NEXT: j .LBB44_9
3993 ; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i8_v8i64:
3994 ; RV64ZVE32F: # %bb.0:
3995 ; RV64ZVE32F-NEXT: ld a2, 56(a0)
3996 ; RV64ZVE32F-NEXT: ld a3, 48(a0)
3997 ; RV64ZVE32F-NEXT: ld a5, 40(a0)
3998 ; RV64ZVE32F-NEXT: ld a6, 32(a0)
3999 ; RV64ZVE32F-NEXT: ld a7, 24(a0)
4000 ; RV64ZVE32F-NEXT: ld t0, 16(a0)
4001 ; RV64ZVE32F-NEXT: ld t1, 8(a0)
4002 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
4003 ; RV64ZVE32F-NEXT: vmv.x.s a4, v0
4004 ; RV64ZVE32F-NEXT: andi t2, a4, 1
4005 ; RV64ZVE32F-NEXT: beqz t2, .LBB44_2
4006 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
4007 ; RV64ZVE32F-NEXT: ld a0, 0(a0)
4008 ; RV64ZVE32F-NEXT: vmv.x.s t2, v8
4009 ; RV64ZVE32F-NEXT: andi t2, t2, 255
4010 ; RV64ZVE32F-NEXT: slli t2, t2, 3
4011 ; RV64ZVE32F-NEXT: add t2, a1, t2
4012 ; RV64ZVE32F-NEXT: sd a0, 0(t2)
4013 ; RV64ZVE32F-NEXT: .LBB44_2: # %else
4014 ; RV64ZVE32F-NEXT: andi a0, a4, 2
4015 ; RV64ZVE32F-NEXT: beqz a0, .LBB44_4
4016 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
4017 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
4018 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
4019 ; RV64ZVE32F-NEXT: vmv.x.s a0, v9
4020 ; RV64ZVE32F-NEXT: andi a0, a0, 255
4021 ; RV64ZVE32F-NEXT: slli a0, a0, 3
4022 ; RV64ZVE32F-NEXT: add a0, a1, a0
4023 ; RV64ZVE32F-NEXT: sd t1, 0(a0)
4024 ; RV64ZVE32F-NEXT: .LBB44_4: # %else2
4025 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
4026 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
4027 ; RV64ZVE32F-NEXT: andi a0, a4, 4
4028 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
4029 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
4030 ; RV64ZVE32F-NEXT: bnez a0, .LBB44_12
4031 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
4032 ; RV64ZVE32F-NEXT: andi a0, a4, 8
4033 ; RV64ZVE32F-NEXT: bnez a0, .LBB44_13
4034 ; RV64ZVE32F-NEXT: .LBB44_6: # %else6
4035 ; RV64ZVE32F-NEXT: andi a0, a4, 16
4036 ; RV64ZVE32F-NEXT: bnez a0, .LBB44_14
4037 ; RV64ZVE32F-NEXT: .LBB44_7: # %else8
4038 ; RV64ZVE32F-NEXT: andi a0, a4, 32
4039 ; RV64ZVE32F-NEXT: beqz a0, .LBB44_9
4040 ; RV64ZVE32F-NEXT: .LBB44_8: # %cond.store9
4041 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
4042 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
4043 ; RV64ZVE32F-NEXT: andi a0, a0, 255
4044 ; RV64ZVE32F-NEXT: slli a0, a0, 3
4045 ; RV64ZVE32F-NEXT: add a0, a1, a0
4046 ; RV64ZVE32F-NEXT: sd a5, 0(a0)
4047 ; RV64ZVE32F-NEXT: .LBB44_9: # %else10
4048 ; RV64ZVE32F-NEXT: andi a0, a4, 64
4049 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
4050 ; RV64ZVE32F-NEXT: bnez a0, .LBB44_15
4051 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
4052 ; RV64ZVE32F-NEXT: andi a0, a4, -128
4053 ; RV64ZVE32F-NEXT: bnez a0, .LBB44_16
4054 ; RV64ZVE32F-NEXT: .LBB44_11: # %else14
4055 ; RV64ZVE32F-NEXT: ret
4056 ; RV64ZVE32F-NEXT: .LBB44_12: # %cond.store3
4057 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
4058 ; RV64ZVE32F-NEXT: andi a0, a0, 255
4059 ; RV64ZVE32F-NEXT: slli a0, a0, 3
4060 ; RV64ZVE32F-NEXT: add a0, a1, a0
4061 ; RV64ZVE32F-NEXT: sd t0, 0(a0)
4062 ; RV64ZVE32F-NEXT: andi a0, a4, 8
4063 ; RV64ZVE32F-NEXT: beqz a0, .LBB44_6
4064 ; RV64ZVE32F-NEXT: .LBB44_13: # %cond.store5
4065 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
4066 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
4067 ; RV64ZVE32F-NEXT: andi a0, a0, 255
4068 ; RV64ZVE32F-NEXT: slli a0, a0, 3
4069 ; RV64ZVE32F-NEXT: add a0, a1, a0
4070 ; RV64ZVE32F-NEXT: sd a7, 0(a0)
4071 ; RV64ZVE32F-NEXT: andi a0, a4, 16
4072 ; RV64ZVE32F-NEXT: beqz a0, .LBB44_7
4073 ; RV64ZVE32F-NEXT: .LBB44_14: # %cond.store7
4074 ; RV64ZVE32F-NEXT: vmv.x.s a0, v9
4075 ; RV64ZVE32F-NEXT: andi a0, a0, 255
4076 ; RV64ZVE32F-NEXT: slli a0, a0, 3
4077 ; RV64ZVE32F-NEXT: add a0, a1, a0
4078 ; RV64ZVE32F-NEXT: sd a6, 0(a0)
4079 ; RV64ZVE32F-NEXT: andi a0, a4, 32
4080 ; RV64ZVE32F-NEXT: bnez a0, .LBB44_8
4081 ; RV64ZVE32F-NEXT: j .LBB44_9
4082 ; RV64ZVE32F-NEXT: .LBB44_15: # %cond.store11
4083 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
4084 ; RV64ZVE32F-NEXT: andi a0, a0, 255
4085 ; RV64ZVE32F-NEXT: slli a0, a0, 3
4086 ; RV64ZVE32F-NEXT: add a0, a1, a0
4087 ; RV64ZVE32F-NEXT: sd a3, 0(a0)
4088 ; RV64ZVE32F-NEXT: andi a0, a4, -128
4089 ; RV64ZVE32F-NEXT: beqz a0, .LBB44_11
4090 ; RV64ZVE32F-NEXT: .LBB44_16: # %cond.store13
4091 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
4092 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
4093 ; RV64ZVE32F-NEXT: andi a0, a0, 255
4094 ; RV64ZVE32F-NEXT: slli a0, a0, 3
4095 ; RV64ZVE32F-NEXT: add a0, a1, a0
4096 ; RV64ZVE32F-NEXT: sd a2, 0(a0)
4097 ; RV64ZVE32F-NEXT: ret
4098 %eidxs = zext <8 x i8> %idxs to <8 x i64>
4099 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs
4100 call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m)
4104 define void @mscatter_baseidx_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m) {
4105 ; RV32V-LABEL: mscatter_baseidx_v8i16_v8i64:
4107 ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
4108 ; RV32V-NEXT: vsext.vf2 v14, v12
4109 ; RV32V-NEXT: vsll.vi v12, v14, 3
4110 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
4111 ; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t
4114 ; RV64-LABEL: mscatter_baseidx_v8i16_v8i64:
4116 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
4117 ; RV64-NEXT: vsext.vf4 v16, v12
4118 ; RV64-NEXT: vsll.vi v12, v16, 3
4119 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
4122 ; RV32ZVE32F-LABEL: mscatter_baseidx_v8i16_v8i64:
4123 ; RV32ZVE32F: # %bb.0:
4124 ; RV32ZVE32F-NEXT: addi sp, sp, -16
4125 ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
4126 ; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
4127 ; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
4128 ; RV32ZVE32F-NEXT: sw s2, 4(sp) # 4-byte Folded Spill
4129 ; RV32ZVE32F-NEXT: .cfi_offset s0, -4
4130 ; RV32ZVE32F-NEXT: .cfi_offset s1, -8
4131 ; RV32ZVE32F-NEXT: .cfi_offset s2, -12
4132 ; RV32ZVE32F-NEXT: lw a2, 60(a0)
4133 ; RV32ZVE32F-NEXT: lw a3, 56(a0)
4134 ; RV32ZVE32F-NEXT: lw a4, 52(a0)
4135 ; RV32ZVE32F-NEXT: lw a5, 48(a0)
4136 ; RV32ZVE32F-NEXT: lw a6, 44(a0)
4137 ; RV32ZVE32F-NEXT: lw a7, 40(a0)
4138 ; RV32ZVE32F-NEXT: lw t1, 36(a0)
4139 ; RV32ZVE32F-NEXT: lw t2, 32(a0)
4140 ; RV32ZVE32F-NEXT: lw t3, 28(a0)
4141 ; RV32ZVE32F-NEXT: lw t4, 24(a0)
4142 ; RV32ZVE32F-NEXT: lw t5, 20(a0)
4143 ; RV32ZVE32F-NEXT: lw t6, 16(a0)
4144 ; RV32ZVE32F-NEXT: lw s1, 12(a0)
4145 ; RV32ZVE32F-NEXT: lw s0, 8(a0)
4146 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
4147 ; RV32ZVE32F-NEXT: vsext.vf2 v10, v8
4148 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3
4149 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
4150 ; RV32ZVE32F-NEXT: vmv.x.s t0, v0
4151 ; RV32ZVE32F-NEXT: andi s2, t0, 1
4152 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
4153 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
4154 ; RV32ZVE32F-NEXT: bnez s2, .LBB45_10
4155 ; RV32ZVE32F-NEXT: # %bb.1: # %else
4156 ; RV32ZVE32F-NEXT: andi a0, t0, 2
4157 ; RV32ZVE32F-NEXT: bnez a0, .LBB45_11
4158 ; RV32ZVE32F-NEXT: .LBB45_2: # %else2
4159 ; RV32ZVE32F-NEXT: andi a0, t0, 4
4160 ; RV32ZVE32F-NEXT: bnez a0, .LBB45_12
4161 ; RV32ZVE32F-NEXT: .LBB45_3: # %else4
4162 ; RV32ZVE32F-NEXT: andi a0, t0, 8
4163 ; RV32ZVE32F-NEXT: bnez a0, .LBB45_13
4164 ; RV32ZVE32F-NEXT: .LBB45_4: # %else6
4165 ; RV32ZVE32F-NEXT: andi a0, t0, 16
4166 ; RV32ZVE32F-NEXT: bnez a0, .LBB45_14
4167 ; RV32ZVE32F-NEXT: .LBB45_5: # %else8
4168 ; RV32ZVE32F-NEXT: andi a0, t0, 32
4169 ; RV32ZVE32F-NEXT: bnez a0, .LBB45_15
4170 ; RV32ZVE32F-NEXT: .LBB45_6: # %else10
4171 ; RV32ZVE32F-NEXT: andi a0, t0, 64
4172 ; RV32ZVE32F-NEXT: bnez a0, .LBB45_16
4173 ; RV32ZVE32F-NEXT: .LBB45_7: # %else12
4174 ; RV32ZVE32F-NEXT: andi a0, t0, -128
4175 ; RV32ZVE32F-NEXT: beqz a0, .LBB45_9
4176 ; RV32ZVE32F-NEXT: .LBB45_8: # %cond.store13
4177 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
4178 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
4179 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
4180 ; RV32ZVE32F-NEXT: sw a3, 0(a0)
4181 ; RV32ZVE32F-NEXT: sw a2, 4(a0)
4182 ; RV32ZVE32F-NEXT: .LBB45_9: # %else14
4183 ; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
4184 ; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
4185 ; RV32ZVE32F-NEXT: lw s2, 4(sp) # 4-byte Folded Reload
4186 ; RV32ZVE32F-NEXT: addi sp, sp, 16
4187 ; RV32ZVE32F-NEXT: ret
4188 ; RV32ZVE32F-NEXT: .LBB45_10: # %cond.store
4189 ; RV32ZVE32F-NEXT: lw a1, 4(a0)
4190 ; RV32ZVE32F-NEXT: lw a0, 0(a0)
4191 ; RV32ZVE32F-NEXT: vmv.x.s s2, v8
4192 ; RV32ZVE32F-NEXT: sw a1, 4(s2)
4193 ; RV32ZVE32F-NEXT: sw a0, 0(s2)
4194 ; RV32ZVE32F-NEXT: andi a0, t0, 2
4195 ; RV32ZVE32F-NEXT: beqz a0, .LBB45_2
4196 ; RV32ZVE32F-NEXT: .LBB45_11: # %cond.store1
4197 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
4198 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
4199 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
4200 ; RV32ZVE32F-NEXT: sw s1, 4(a0)
4201 ; RV32ZVE32F-NEXT: sw s0, 0(a0)
4202 ; RV32ZVE32F-NEXT: andi a0, t0, 4
4203 ; RV32ZVE32F-NEXT: beqz a0, .LBB45_3
4204 ; RV32ZVE32F-NEXT: .LBB45_12: # %cond.store3
4205 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
4206 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
4207 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
4208 ; RV32ZVE32F-NEXT: sw t6, 0(a0)
4209 ; RV32ZVE32F-NEXT: sw t5, 4(a0)
4210 ; RV32ZVE32F-NEXT: andi a0, t0, 8
4211 ; RV32ZVE32F-NEXT: beqz a0, .LBB45_4
4212 ; RV32ZVE32F-NEXT: .LBB45_13: # %cond.store5
4213 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
4214 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
4215 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
4216 ; RV32ZVE32F-NEXT: sw t4, 0(a0)
4217 ; RV32ZVE32F-NEXT: sw t3, 4(a0)
4218 ; RV32ZVE32F-NEXT: andi a0, t0, 16
4219 ; RV32ZVE32F-NEXT: beqz a0, .LBB45_5
4220 ; RV32ZVE32F-NEXT: .LBB45_14: # %cond.store7
4221 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
4222 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
4223 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
4224 ; RV32ZVE32F-NEXT: sw t2, 0(a0)
4225 ; RV32ZVE32F-NEXT: sw t1, 4(a0)
4226 ; RV32ZVE32F-NEXT: andi a0, t0, 32
4227 ; RV32ZVE32F-NEXT: beqz a0, .LBB45_6
4228 ; RV32ZVE32F-NEXT: .LBB45_15: # %cond.store9
4229 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
4230 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
4231 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
4232 ; RV32ZVE32F-NEXT: sw a7, 0(a0)
4233 ; RV32ZVE32F-NEXT: sw a6, 4(a0)
4234 ; RV32ZVE32F-NEXT: andi a0, t0, 64
4235 ; RV32ZVE32F-NEXT: beqz a0, .LBB45_7
4236 ; RV32ZVE32F-NEXT: .LBB45_16: # %cond.store11
4237 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
4238 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
4239 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
4240 ; RV32ZVE32F-NEXT: sw a5, 0(a0)
4241 ; RV32ZVE32F-NEXT: sw a4, 4(a0)
4242 ; RV32ZVE32F-NEXT: andi a0, t0, -128
4243 ; RV32ZVE32F-NEXT: bnez a0, .LBB45_8
4244 ; RV32ZVE32F-NEXT: j .LBB45_9
4246 ; RV64ZVE32F-LABEL: mscatter_baseidx_v8i16_v8i64:
4247 ; RV64ZVE32F: # %bb.0:
4248 ; RV64ZVE32F-NEXT: ld a2, 56(a0)
4249 ; RV64ZVE32F-NEXT: ld a3, 48(a0)
4250 ; RV64ZVE32F-NEXT: ld a5, 40(a0)
4251 ; RV64ZVE32F-NEXT: ld a6, 32(a0)
4252 ; RV64ZVE32F-NEXT: ld a7, 24(a0)
4253 ; RV64ZVE32F-NEXT: ld t0, 16(a0)
4254 ; RV64ZVE32F-NEXT: ld t1, 8(a0)
4255 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
4256 ; RV64ZVE32F-NEXT: vmv.x.s a4, v0
4257 ; RV64ZVE32F-NEXT: andi t2, a4, 1
4258 ; RV64ZVE32F-NEXT: beqz t2, .LBB45_2
4259 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
4260 ; RV64ZVE32F-NEXT: ld a0, 0(a0)
4261 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
4262 ; RV64ZVE32F-NEXT: vmv.x.s t2, v8
4263 ; RV64ZVE32F-NEXT: slli t2, t2, 3
4264 ; RV64ZVE32F-NEXT: add t2, a1, t2
4265 ; RV64ZVE32F-NEXT: sd a0, 0(t2)
4266 ; RV64ZVE32F-NEXT: .LBB45_2: # %else
4267 ; RV64ZVE32F-NEXT: andi a0, a4, 2
4268 ; RV64ZVE32F-NEXT: beqz a0, .LBB45_4
4269 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
4270 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
4271 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
4272 ; RV64ZVE32F-NEXT: vmv.x.s a0, v9
4273 ; RV64ZVE32F-NEXT: slli a0, a0, 3
4274 ; RV64ZVE32F-NEXT: add a0, a1, a0
4275 ; RV64ZVE32F-NEXT: sd t1, 0(a0)
4276 ; RV64ZVE32F-NEXT: .LBB45_4: # %else2
4277 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
4278 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
4279 ; RV64ZVE32F-NEXT: andi a0, a4, 4
4280 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
4281 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
4282 ; RV64ZVE32F-NEXT: bnez a0, .LBB45_12
4283 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
4284 ; RV64ZVE32F-NEXT: andi a0, a4, 8
4285 ; RV64ZVE32F-NEXT: bnez a0, .LBB45_13
4286 ; RV64ZVE32F-NEXT: .LBB45_6: # %else6
4287 ; RV64ZVE32F-NEXT: andi a0, a4, 16
4288 ; RV64ZVE32F-NEXT: bnez a0, .LBB45_14
4289 ; RV64ZVE32F-NEXT: .LBB45_7: # %else8
4290 ; RV64ZVE32F-NEXT: andi a0, a4, 32
4291 ; RV64ZVE32F-NEXT: beqz a0, .LBB45_9
4292 ; RV64ZVE32F-NEXT: .LBB45_8: # %cond.store9
4293 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
4294 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
4295 ; RV64ZVE32F-NEXT: slli a0, a0, 3
4296 ; RV64ZVE32F-NEXT: add a0, a1, a0
4297 ; RV64ZVE32F-NEXT: sd a5, 0(a0)
4298 ; RV64ZVE32F-NEXT: .LBB45_9: # %else10
4299 ; RV64ZVE32F-NEXT: andi a0, a4, 64
4300 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
4301 ; RV64ZVE32F-NEXT: bnez a0, .LBB45_15
4302 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
4303 ; RV64ZVE32F-NEXT: andi a0, a4, -128
4304 ; RV64ZVE32F-NEXT: bnez a0, .LBB45_16
4305 ; RV64ZVE32F-NEXT: .LBB45_11: # %else14
4306 ; RV64ZVE32F-NEXT: ret
4307 ; RV64ZVE32F-NEXT: .LBB45_12: # %cond.store3
4308 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
4309 ; RV64ZVE32F-NEXT: slli a0, a0, 3
4310 ; RV64ZVE32F-NEXT: add a0, a1, a0
4311 ; RV64ZVE32F-NEXT: sd t0, 0(a0)
4312 ; RV64ZVE32F-NEXT: andi a0, a4, 8
4313 ; RV64ZVE32F-NEXT: beqz a0, .LBB45_6
4314 ; RV64ZVE32F-NEXT: .LBB45_13: # %cond.store5
4315 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
4316 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
4317 ; RV64ZVE32F-NEXT: slli a0, a0, 3
4318 ; RV64ZVE32F-NEXT: add a0, a1, a0
4319 ; RV64ZVE32F-NEXT: sd a7, 0(a0)
4320 ; RV64ZVE32F-NEXT: andi a0, a4, 16
4321 ; RV64ZVE32F-NEXT: beqz a0, .LBB45_7
4322 ; RV64ZVE32F-NEXT: .LBB45_14: # %cond.store7
4323 ; RV64ZVE32F-NEXT: vmv.x.s a0, v9
4324 ; RV64ZVE32F-NEXT: slli a0, a0, 3
4325 ; RV64ZVE32F-NEXT: add a0, a1, a0
4326 ; RV64ZVE32F-NEXT: sd a6, 0(a0)
4327 ; RV64ZVE32F-NEXT: andi a0, a4, 32
4328 ; RV64ZVE32F-NEXT: bnez a0, .LBB45_8
4329 ; RV64ZVE32F-NEXT: j .LBB45_9
4330 ; RV64ZVE32F-NEXT: .LBB45_15: # %cond.store11
4331 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
4332 ; RV64ZVE32F-NEXT: slli a0, a0, 3
4333 ; RV64ZVE32F-NEXT: add a0, a1, a0
4334 ; RV64ZVE32F-NEXT: sd a3, 0(a0)
4335 ; RV64ZVE32F-NEXT: andi a0, a4, -128
4336 ; RV64ZVE32F-NEXT: beqz a0, .LBB45_11
4337 ; RV64ZVE32F-NEXT: .LBB45_16: # %cond.store13
4338 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
4339 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
4340 ; RV64ZVE32F-NEXT: slli a0, a0, 3
4341 ; RV64ZVE32F-NEXT: add a0, a1, a0
4342 ; RV64ZVE32F-NEXT: sd a2, 0(a0)
4343 ; RV64ZVE32F-NEXT: ret
4344 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i16> %idxs
4345 call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m)
4349 define void @mscatter_baseidx_sext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m) {
4350 ; RV32V-LABEL: mscatter_baseidx_sext_v8i16_v8i64:
4352 ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
4353 ; RV32V-NEXT: vsext.vf2 v14, v12
4354 ; RV32V-NEXT: vsll.vi v12, v14, 3
4355 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
4356 ; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t
4359 ; RV64-LABEL: mscatter_baseidx_sext_v8i16_v8i64:
4361 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
4362 ; RV64-NEXT: vsext.vf4 v16, v12
4363 ; RV64-NEXT: vsll.vi v12, v16, 3
4364 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
4367 ; RV32ZVE32F-LABEL: mscatter_baseidx_sext_v8i16_v8i64:
4368 ; RV32ZVE32F: # %bb.0:
4369 ; RV32ZVE32F-NEXT: addi sp, sp, -16
4370 ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
4371 ; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
4372 ; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
4373 ; RV32ZVE32F-NEXT: sw s2, 4(sp) # 4-byte Folded Spill
4374 ; RV32ZVE32F-NEXT: .cfi_offset s0, -4
4375 ; RV32ZVE32F-NEXT: .cfi_offset s1, -8
4376 ; RV32ZVE32F-NEXT: .cfi_offset s2, -12
4377 ; RV32ZVE32F-NEXT: lw a2, 60(a0)
4378 ; RV32ZVE32F-NEXT: lw a3, 56(a0)
4379 ; RV32ZVE32F-NEXT: lw a4, 52(a0)
4380 ; RV32ZVE32F-NEXT: lw a5, 48(a0)
4381 ; RV32ZVE32F-NEXT: lw a6, 44(a0)
4382 ; RV32ZVE32F-NEXT: lw a7, 40(a0)
4383 ; RV32ZVE32F-NEXT: lw t1, 36(a0)
4384 ; RV32ZVE32F-NEXT: lw t2, 32(a0)
4385 ; RV32ZVE32F-NEXT: lw t3, 28(a0)
4386 ; RV32ZVE32F-NEXT: lw t4, 24(a0)
4387 ; RV32ZVE32F-NEXT: lw t5, 20(a0)
4388 ; RV32ZVE32F-NEXT: lw t6, 16(a0)
4389 ; RV32ZVE32F-NEXT: lw s1, 12(a0)
4390 ; RV32ZVE32F-NEXT: lw s0, 8(a0)
4391 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
4392 ; RV32ZVE32F-NEXT: vsext.vf2 v10, v8
4393 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3
4394 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
4395 ; RV32ZVE32F-NEXT: vmv.x.s t0, v0
4396 ; RV32ZVE32F-NEXT: andi s2, t0, 1
4397 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
4398 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
4399 ; RV32ZVE32F-NEXT: bnez s2, .LBB46_10
4400 ; RV32ZVE32F-NEXT: # %bb.1: # %else
4401 ; RV32ZVE32F-NEXT: andi a0, t0, 2
4402 ; RV32ZVE32F-NEXT: bnez a0, .LBB46_11
4403 ; RV32ZVE32F-NEXT: .LBB46_2: # %else2
4404 ; RV32ZVE32F-NEXT: andi a0, t0, 4
4405 ; RV32ZVE32F-NEXT: bnez a0, .LBB46_12
4406 ; RV32ZVE32F-NEXT: .LBB46_3: # %else4
4407 ; RV32ZVE32F-NEXT: andi a0, t0, 8
4408 ; RV32ZVE32F-NEXT: bnez a0, .LBB46_13
4409 ; RV32ZVE32F-NEXT: .LBB46_4: # %else6
4410 ; RV32ZVE32F-NEXT: andi a0, t0, 16
4411 ; RV32ZVE32F-NEXT: bnez a0, .LBB46_14
4412 ; RV32ZVE32F-NEXT: .LBB46_5: # %else8
4413 ; RV32ZVE32F-NEXT: andi a0, t0, 32
4414 ; RV32ZVE32F-NEXT: bnez a0, .LBB46_15
4415 ; RV32ZVE32F-NEXT: .LBB46_6: # %else10
4416 ; RV32ZVE32F-NEXT: andi a0, t0, 64
4417 ; RV32ZVE32F-NEXT: bnez a0, .LBB46_16
4418 ; RV32ZVE32F-NEXT: .LBB46_7: # %else12
4419 ; RV32ZVE32F-NEXT: andi a0, t0, -128
4420 ; RV32ZVE32F-NEXT: beqz a0, .LBB46_9
4421 ; RV32ZVE32F-NEXT: .LBB46_8: # %cond.store13
4422 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
4423 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
4424 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
4425 ; RV32ZVE32F-NEXT: sw a3, 0(a0)
4426 ; RV32ZVE32F-NEXT: sw a2, 4(a0)
4427 ; RV32ZVE32F-NEXT: .LBB46_9: # %else14
4428 ; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
4429 ; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
4430 ; RV32ZVE32F-NEXT: lw s2, 4(sp) # 4-byte Folded Reload
4431 ; RV32ZVE32F-NEXT: addi sp, sp, 16
4432 ; RV32ZVE32F-NEXT: ret
4433 ; RV32ZVE32F-NEXT: .LBB46_10: # %cond.store
4434 ; RV32ZVE32F-NEXT: lw a1, 4(a0)
4435 ; RV32ZVE32F-NEXT: lw a0, 0(a0)
4436 ; RV32ZVE32F-NEXT: vmv.x.s s2, v8
4437 ; RV32ZVE32F-NEXT: sw a1, 4(s2)
4438 ; RV32ZVE32F-NEXT: sw a0, 0(s2)
4439 ; RV32ZVE32F-NEXT: andi a0, t0, 2
4440 ; RV32ZVE32F-NEXT: beqz a0, .LBB46_2
4441 ; RV32ZVE32F-NEXT: .LBB46_11: # %cond.store1
4442 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
4443 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
4444 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
4445 ; RV32ZVE32F-NEXT: sw s1, 4(a0)
4446 ; RV32ZVE32F-NEXT: sw s0, 0(a0)
4447 ; RV32ZVE32F-NEXT: andi a0, t0, 4
4448 ; RV32ZVE32F-NEXT: beqz a0, .LBB46_3
4449 ; RV32ZVE32F-NEXT: .LBB46_12: # %cond.store3
4450 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
4451 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
4452 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
4453 ; RV32ZVE32F-NEXT: sw t6, 0(a0)
4454 ; RV32ZVE32F-NEXT: sw t5, 4(a0)
4455 ; RV32ZVE32F-NEXT: andi a0, t0, 8
4456 ; RV32ZVE32F-NEXT: beqz a0, .LBB46_4
4457 ; RV32ZVE32F-NEXT: .LBB46_13: # %cond.store5
4458 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
4459 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
4460 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
4461 ; RV32ZVE32F-NEXT: sw t4, 0(a0)
4462 ; RV32ZVE32F-NEXT: sw t3, 4(a0)
4463 ; RV32ZVE32F-NEXT: andi a0, t0, 16
4464 ; RV32ZVE32F-NEXT: beqz a0, .LBB46_5
4465 ; RV32ZVE32F-NEXT: .LBB46_14: # %cond.store7
4466 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
4467 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
4468 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
4469 ; RV32ZVE32F-NEXT: sw t2, 0(a0)
4470 ; RV32ZVE32F-NEXT: sw t1, 4(a0)
4471 ; RV32ZVE32F-NEXT: andi a0, t0, 32
4472 ; RV32ZVE32F-NEXT: beqz a0, .LBB46_6
4473 ; RV32ZVE32F-NEXT: .LBB46_15: # %cond.store9
4474 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
4475 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
4476 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
4477 ; RV32ZVE32F-NEXT: sw a7, 0(a0)
4478 ; RV32ZVE32F-NEXT: sw a6, 4(a0)
4479 ; RV32ZVE32F-NEXT: andi a0, t0, 64
4480 ; RV32ZVE32F-NEXT: beqz a0, .LBB46_7
4481 ; RV32ZVE32F-NEXT: .LBB46_16: # %cond.store11
4482 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
4483 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
4484 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
4485 ; RV32ZVE32F-NEXT: sw a5, 0(a0)
4486 ; RV32ZVE32F-NEXT: sw a4, 4(a0)
4487 ; RV32ZVE32F-NEXT: andi a0, t0, -128
4488 ; RV32ZVE32F-NEXT: bnez a0, .LBB46_8
4489 ; RV32ZVE32F-NEXT: j .LBB46_9
4491 ; RV64ZVE32F-LABEL: mscatter_baseidx_sext_v8i16_v8i64:
4492 ; RV64ZVE32F: # %bb.0:
4493 ; RV64ZVE32F-NEXT: ld a2, 56(a0)
4494 ; RV64ZVE32F-NEXT: ld a3, 48(a0)
4495 ; RV64ZVE32F-NEXT: ld a5, 40(a0)
4496 ; RV64ZVE32F-NEXT: ld a6, 32(a0)
4497 ; RV64ZVE32F-NEXT: ld a7, 24(a0)
4498 ; RV64ZVE32F-NEXT: ld t0, 16(a0)
4499 ; RV64ZVE32F-NEXT: ld t1, 8(a0)
4500 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
4501 ; RV64ZVE32F-NEXT: vmv.x.s a4, v0
4502 ; RV64ZVE32F-NEXT: andi t2, a4, 1
4503 ; RV64ZVE32F-NEXT: beqz t2, .LBB46_2
4504 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
4505 ; RV64ZVE32F-NEXT: ld a0, 0(a0)
4506 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
4507 ; RV64ZVE32F-NEXT: vmv.x.s t2, v8
4508 ; RV64ZVE32F-NEXT: slli t2, t2, 3
4509 ; RV64ZVE32F-NEXT: add t2, a1, t2
4510 ; RV64ZVE32F-NEXT: sd a0, 0(t2)
4511 ; RV64ZVE32F-NEXT: .LBB46_2: # %else
4512 ; RV64ZVE32F-NEXT: andi a0, a4, 2
4513 ; RV64ZVE32F-NEXT: beqz a0, .LBB46_4
4514 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
4515 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
4516 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
4517 ; RV64ZVE32F-NEXT: vmv.x.s a0, v9
4518 ; RV64ZVE32F-NEXT: slli a0, a0, 3
4519 ; RV64ZVE32F-NEXT: add a0, a1, a0
4520 ; RV64ZVE32F-NEXT: sd t1, 0(a0)
4521 ; RV64ZVE32F-NEXT: .LBB46_4: # %else2
4522 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
4523 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
4524 ; RV64ZVE32F-NEXT: andi a0, a4, 4
4525 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
4526 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
4527 ; RV64ZVE32F-NEXT: bnez a0, .LBB46_12
4528 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
4529 ; RV64ZVE32F-NEXT: andi a0, a4, 8
4530 ; RV64ZVE32F-NEXT: bnez a0, .LBB46_13
4531 ; RV64ZVE32F-NEXT: .LBB46_6: # %else6
4532 ; RV64ZVE32F-NEXT: andi a0, a4, 16
4533 ; RV64ZVE32F-NEXT: bnez a0, .LBB46_14
4534 ; RV64ZVE32F-NEXT: .LBB46_7: # %else8
4535 ; RV64ZVE32F-NEXT: andi a0, a4, 32
4536 ; RV64ZVE32F-NEXT: beqz a0, .LBB46_9
4537 ; RV64ZVE32F-NEXT: .LBB46_8: # %cond.store9
4538 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
4539 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
4540 ; RV64ZVE32F-NEXT: slli a0, a0, 3
4541 ; RV64ZVE32F-NEXT: add a0, a1, a0
4542 ; RV64ZVE32F-NEXT: sd a5, 0(a0)
4543 ; RV64ZVE32F-NEXT: .LBB46_9: # %else10
4544 ; RV64ZVE32F-NEXT: andi a0, a4, 64
4545 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
4546 ; RV64ZVE32F-NEXT: bnez a0, .LBB46_15
4547 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
4548 ; RV64ZVE32F-NEXT: andi a0, a4, -128
4549 ; RV64ZVE32F-NEXT: bnez a0, .LBB46_16
4550 ; RV64ZVE32F-NEXT: .LBB46_11: # %else14
4551 ; RV64ZVE32F-NEXT: ret
4552 ; RV64ZVE32F-NEXT: .LBB46_12: # %cond.store3
4553 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
4554 ; RV64ZVE32F-NEXT: slli a0, a0, 3
4555 ; RV64ZVE32F-NEXT: add a0, a1, a0
4556 ; RV64ZVE32F-NEXT: sd t0, 0(a0)
4557 ; RV64ZVE32F-NEXT: andi a0, a4, 8
4558 ; RV64ZVE32F-NEXT: beqz a0, .LBB46_6
4559 ; RV64ZVE32F-NEXT: .LBB46_13: # %cond.store5
4560 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
4561 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
4562 ; RV64ZVE32F-NEXT: slli a0, a0, 3
4563 ; RV64ZVE32F-NEXT: add a0, a1, a0
4564 ; RV64ZVE32F-NEXT: sd a7, 0(a0)
4565 ; RV64ZVE32F-NEXT: andi a0, a4, 16
4566 ; RV64ZVE32F-NEXT: beqz a0, .LBB46_7
4567 ; RV64ZVE32F-NEXT: .LBB46_14: # %cond.store7
4568 ; RV64ZVE32F-NEXT: vmv.x.s a0, v9
4569 ; RV64ZVE32F-NEXT: slli a0, a0, 3
4570 ; RV64ZVE32F-NEXT: add a0, a1, a0
4571 ; RV64ZVE32F-NEXT: sd a6, 0(a0)
4572 ; RV64ZVE32F-NEXT: andi a0, a4, 32
4573 ; RV64ZVE32F-NEXT: bnez a0, .LBB46_8
4574 ; RV64ZVE32F-NEXT: j .LBB46_9
4575 ; RV64ZVE32F-NEXT: .LBB46_15: # %cond.store11
4576 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
4577 ; RV64ZVE32F-NEXT: slli a0, a0, 3
4578 ; RV64ZVE32F-NEXT: add a0, a1, a0
4579 ; RV64ZVE32F-NEXT: sd a3, 0(a0)
4580 ; RV64ZVE32F-NEXT: andi a0, a4, -128
4581 ; RV64ZVE32F-NEXT: beqz a0, .LBB46_11
4582 ; RV64ZVE32F-NEXT: .LBB46_16: # %cond.store13
4583 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
4584 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
4585 ; RV64ZVE32F-NEXT: slli a0, a0, 3
4586 ; RV64ZVE32F-NEXT: add a0, a1, a0
4587 ; RV64ZVE32F-NEXT: sd a2, 0(a0)
4588 ; RV64ZVE32F-NEXT: ret
4589 %eidxs = sext <8 x i16> %idxs to <8 x i64>
4590 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs
4591 call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m)
4595 define void @mscatter_baseidx_zext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m) {
4596 ; RV32V-LABEL: mscatter_baseidx_zext_v8i16_v8i64:
4598 ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
4599 ; RV32V-NEXT: vzext.vf2 v14, v12
4600 ; RV32V-NEXT: vsll.vi v12, v14, 3
4601 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
4602 ; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t
4605 ; RV64-LABEL: mscatter_baseidx_zext_v8i16_v8i64:
4607 ; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma
4608 ; RV64-NEXT: vzext.vf2 v14, v12
4609 ; RV64-NEXT: vsll.vi v12, v14, 3
4610 ; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, ma
4611 ; RV64-NEXT: vsoxei32.v v8, (a0), v12, v0.t
4614 ; RV32ZVE32F-LABEL: mscatter_baseidx_zext_v8i16_v8i64:
4615 ; RV32ZVE32F: # %bb.0:
4616 ; RV32ZVE32F-NEXT: addi sp, sp, -16
4617 ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
4618 ; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
4619 ; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
4620 ; RV32ZVE32F-NEXT: sw s2, 4(sp) # 4-byte Folded Spill
4621 ; RV32ZVE32F-NEXT: .cfi_offset s0, -4
4622 ; RV32ZVE32F-NEXT: .cfi_offset s1, -8
4623 ; RV32ZVE32F-NEXT: .cfi_offset s2, -12
4624 ; RV32ZVE32F-NEXT: lw a2, 60(a0)
4625 ; RV32ZVE32F-NEXT: lw a3, 56(a0)
4626 ; RV32ZVE32F-NEXT: lw a4, 52(a0)
4627 ; RV32ZVE32F-NEXT: lw a5, 48(a0)
4628 ; RV32ZVE32F-NEXT: lw a6, 44(a0)
4629 ; RV32ZVE32F-NEXT: lw a7, 40(a0)
4630 ; RV32ZVE32F-NEXT: lw t1, 36(a0)
4631 ; RV32ZVE32F-NEXT: lw t2, 32(a0)
4632 ; RV32ZVE32F-NEXT: lw t3, 28(a0)
4633 ; RV32ZVE32F-NEXT: lw t4, 24(a0)
4634 ; RV32ZVE32F-NEXT: lw t5, 20(a0)
4635 ; RV32ZVE32F-NEXT: lw t6, 16(a0)
4636 ; RV32ZVE32F-NEXT: lw s1, 12(a0)
4637 ; RV32ZVE32F-NEXT: lw s0, 8(a0)
4638 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
4639 ; RV32ZVE32F-NEXT: vzext.vf2 v10, v8
4640 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3
4641 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
4642 ; RV32ZVE32F-NEXT: vmv.x.s t0, v0
4643 ; RV32ZVE32F-NEXT: andi s2, t0, 1
4644 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
4645 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
4646 ; RV32ZVE32F-NEXT: bnez s2, .LBB47_10
4647 ; RV32ZVE32F-NEXT: # %bb.1: # %else
4648 ; RV32ZVE32F-NEXT: andi a0, t0, 2
4649 ; RV32ZVE32F-NEXT: bnez a0, .LBB47_11
4650 ; RV32ZVE32F-NEXT: .LBB47_2: # %else2
4651 ; RV32ZVE32F-NEXT: andi a0, t0, 4
4652 ; RV32ZVE32F-NEXT: bnez a0, .LBB47_12
4653 ; RV32ZVE32F-NEXT: .LBB47_3: # %else4
4654 ; RV32ZVE32F-NEXT: andi a0, t0, 8
4655 ; RV32ZVE32F-NEXT: bnez a0, .LBB47_13
4656 ; RV32ZVE32F-NEXT: .LBB47_4: # %else6
4657 ; RV32ZVE32F-NEXT: andi a0, t0, 16
4658 ; RV32ZVE32F-NEXT: bnez a0, .LBB47_14
4659 ; RV32ZVE32F-NEXT: .LBB47_5: # %else8
4660 ; RV32ZVE32F-NEXT: andi a0, t0, 32
4661 ; RV32ZVE32F-NEXT: bnez a0, .LBB47_15
4662 ; RV32ZVE32F-NEXT: .LBB47_6: # %else10
4663 ; RV32ZVE32F-NEXT: andi a0, t0, 64
4664 ; RV32ZVE32F-NEXT: bnez a0, .LBB47_16
4665 ; RV32ZVE32F-NEXT: .LBB47_7: # %else12
4666 ; RV32ZVE32F-NEXT: andi a0, t0, -128
4667 ; RV32ZVE32F-NEXT: beqz a0, .LBB47_9
4668 ; RV32ZVE32F-NEXT: .LBB47_8: # %cond.store13
4669 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
4670 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
4671 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
4672 ; RV32ZVE32F-NEXT: sw a3, 0(a0)
4673 ; RV32ZVE32F-NEXT: sw a2, 4(a0)
4674 ; RV32ZVE32F-NEXT: .LBB47_9: # %else14
4675 ; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
4676 ; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
4677 ; RV32ZVE32F-NEXT: lw s2, 4(sp) # 4-byte Folded Reload
4678 ; RV32ZVE32F-NEXT: addi sp, sp, 16
4679 ; RV32ZVE32F-NEXT: ret
4680 ; RV32ZVE32F-NEXT: .LBB47_10: # %cond.store
4681 ; RV32ZVE32F-NEXT: lw a1, 4(a0)
4682 ; RV32ZVE32F-NEXT: lw a0, 0(a0)
4683 ; RV32ZVE32F-NEXT: vmv.x.s s2, v8
4684 ; RV32ZVE32F-NEXT: sw a1, 4(s2)
4685 ; RV32ZVE32F-NEXT: sw a0, 0(s2)
4686 ; RV32ZVE32F-NEXT: andi a0, t0, 2
4687 ; RV32ZVE32F-NEXT: beqz a0, .LBB47_2
4688 ; RV32ZVE32F-NEXT: .LBB47_11: # %cond.store1
4689 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
4690 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
4691 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
4692 ; RV32ZVE32F-NEXT: sw s1, 4(a0)
4693 ; RV32ZVE32F-NEXT: sw s0, 0(a0)
4694 ; RV32ZVE32F-NEXT: andi a0, t0, 4
4695 ; RV32ZVE32F-NEXT: beqz a0, .LBB47_3
4696 ; RV32ZVE32F-NEXT: .LBB47_12: # %cond.store3
4697 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
4698 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
4699 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
4700 ; RV32ZVE32F-NEXT: sw t6, 0(a0)
4701 ; RV32ZVE32F-NEXT: sw t5, 4(a0)
4702 ; RV32ZVE32F-NEXT: andi a0, t0, 8
4703 ; RV32ZVE32F-NEXT: beqz a0, .LBB47_4
4704 ; RV32ZVE32F-NEXT: .LBB47_13: # %cond.store5
4705 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
4706 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
4707 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
4708 ; RV32ZVE32F-NEXT: sw t4, 0(a0)
4709 ; RV32ZVE32F-NEXT: sw t3, 4(a0)
4710 ; RV32ZVE32F-NEXT: andi a0, t0, 16
4711 ; RV32ZVE32F-NEXT: beqz a0, .LBB47_5
4712 ; RV32ZVE32F-NEXT: .LBB47_14: # %cond.store7
4713 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
4714 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
4715 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
4716 ; RV32ZVE32F-NEXT: sw t2, 0(a0)
4717 ; RV32ZVE32F-NEXT: sw t1, 4(a0)
4718 ; RV32ZVE32F-NEXT: andi a0, t0, 32
4719 ; RV32ZVE32F-NEXT: beqz a0, .LBB47_6
4720 ; RV32ZVE32F-NEXT: .LBB47_15: # %cond.store9
4721 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
4722 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
4723 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
4724 ; RV32ZVE32F-NEXT: sw a7, 0(a0)
4725 ; RV32ZVE32F-NEXT: sw a6, 4(a0)
4726 ; RV32ZVE32F-NEXT: andi a0, t0, 64
4727 ; RV32ZVE32F-NEXT: beqz a0, .LBB47_7
4728 ; RV32ZVE32F-NEXT: .LBB47_16: # %cond.store11
4729 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
4730 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
4731 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
4732 ; RV32ZVE32F-NEXT: sw a5, 0(a0)
4733 ; RV32ZVE32F-NEXT: sw a4, 4(a0)
4734 ; RV32ZVE32F-NEXT: andi a0, t0, -128
4735 ; RV32ZVE32F-NEXT: bnez a0, .LBB47_8
4736 ; RV32ZVE32F-NEXT: j .LBB47_9
4738 ; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i16_v8i64:
4739 ; RV64ZVE32F: # %bb.0:
4740 ; RV64ZVE32F-NEXT: ld a2, 56(a0)
4741 ; RV64ZVE32F-NEXT: ld a3, 48(a0)
4742 ; RV64ZVE32F-NEXT: ld a6, 40(a0)
4743 ; RV64ZVE32F-NEXT: ld a7, 32(a0)
4744 ; RV64ZVE32F-NEXT: ld t0, 24(a0)
4745 ; RV64ZVE32F-NEXT: ld t1, 16(a0)
4746 ; RV64ZVE32F-NEXT: ld t2, 8(a0)
4747 ; RV64ZVE32F-NEXT: lui a4, 16
4748 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
4749 ; RV64ZVE32F-NEXT: vmv.x.s a5, v0
4750 ; RV64ZVE32F-NEXT: andi t3, a5, 1
4751 ; RV64ZVE32F-NEXT: addiw a4, a4, -1
4752 ; RV64ZVE32F-NEXT: beqz t3, .LBB47_2
4753 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
4754 ; RV64ZVE32F-NEXT: ld a0, 0(a0)
4755 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
4756 ; RV64ZVE32F-NEXT: vmv.x.s t3, v8
4757 ; RV64ZVE32F-NEXT: and t3, t3, a4
4758 ; RV64ZVE32F-NEXT: slli t3, t3, 3
4759 ; RV64ZVE32F-NEXT: add t3, a1, t3
4760 ; RV64ZVE32F-NEXT: sd a0, 0(t3)
4761 ; RV64ZVE32F-NEXT: .LBB47_2: # %else
4762 ; RV64ZVE32F-NEXT: andi a0, a5, 2
4763 ; RV64ZVE32F-NEXT: beqz a0, .LBB47_4
4764 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
4765 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
4766 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
4767 ; RV64ZVE32F-NEXT: vmv.x.s a0, v9
4768 ; RV64ZVE32F-NEXT: and a0, a0, a4
4769 ; RV64ZVE32F-NEXT: slli a0, a0, 3
4770 ; RV64ZVE32F-NEXT: add a0, a1, a0
4771 ; RV64ZVE32F-NEXT: sd t2, 0(a0)
4772 ; RV64ZVE32F-NEXT: .LBB47_4: # %else2
4773 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
4774 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
4775 ; RV64ZVE32F-NEXT: andi a0, a5, 4
4776 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
4777 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
4778 ; RV64ZVE32F-NEXT: bnez a0, .LBB47_12
4779 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
4780 ; RV64ZVE32F-NEXT: andi a0, a5, 8
4781 ; RV64ZVE32F-NEXT: bnez a0, .LBB47_13
4782 ; RV64ZVE32F-NEXT: .LBB47_6: # %else6
4783 ; RV64ZVE32F-NEXT: andi a0, a5, 16
4784 ; RV64ZVE32F-NEXT: bnez a0, .LBB47_14
4785 ; RV64ZVE32F-NEXT: .LBB47_7: # %else8
4786 ; RV64ZVE32F-NEXT: andi a0, a5, 32
4787 ; RV64ZVE32F-NEXT: beqz a0, .LBB47_9
4788 ; RV64ZVE32F-NEXT: .LBB47_8: # %cond.store9
4789 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
4790 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
4791 ; RV64ZVE32F-NEXT: and a0, a0, a4
4792 ; RV64ZVE32F-NEXT: slli a0, a0, 3
4793 ; RV64ZVE32F-NEXT: add a0, a1, a0
4794 ; RV64ZVE32F-NEXT: sd a6, 0(a0)
4795 ; RV64ZVE32F-NEXT: .LBB47_9: # %else10
4796 ; RV64ZVE32F-NEXT: andi a0, a5, 64
4797 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
4798 ; RV64ZVE32F-NEXT: bnez a0, .LBB47_15
4799 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
4800 ; RV64ZVE32F-NEXT: andi a0, a5, -128
4801 ; RV64ZVE32F-NEXT: bnez a0, .LBB47_16
4802 ; RV64ZVE32F-NEXT: .LBB47_11: # %else14
4803 ; RV64ZVE32F-NEXT: ret
4804 ; RV64ZVE32F-NEXT: .LBB47_12: # %cond.store3
4805 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
4806 ; RV64ZVE32F-NEXT: and a0, a0, a4
4807 ; RV64ZVE32F-NEXT: slli a0, a0, 3
4808 ; RV64ZVE32F-NEXT: add a0, a1, a0
4809 ; RV64ZVE32F-NEXT: sd t1, 0(a0)
4810 ; RV64ZVE32F-NEXT: andi a0, a5, 8
4811 ; RV64ZVE32F-NEXT: beqz a0, .LBB47_6
4812 ; RV64ZVE32F-NEXT: .LBB47_13: # %cond.store5
4813 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
4814 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
4815 ; RV64ZVE32F-NEXT: and a0, a0, a4
4816 ; RV64ZVE32F-NEXT: slli a0, a0, 3
4817 ; RV64ZVE32F-NEXT: add a0, a1, a0
4818 ; RV64ZVE32F-NEXT: sd t0, 0(a0)
4819 ; RV64ZVE32F-NEXT: andi a0, a5, 16
4820 ; RV64ZVE32F-NEXT: beqz a0, .LBB47_7
4821 ; RV64ZVE32F-NEXT: .LBB47_14: # %cond.store7
4822 ; RV64ZVE32F-NEXT: vmv.x.s a0, v9
4823 ; RV64ZVE32F-NEXT: and a0, a0, a4
4824 ; RV64ZVE32F-NEXT: slli a0, a0, 3
4825 ; RV64ZVE32F-NEXT: add a0, a1, a0
4826 ; RV64ZVE32F-NEXT: sd a7, 0(a0)
4827 ; RV64ZVE32F-NEXT: andi a0, a5, 32
4828 ; RV64ZVE32F-NEXT: bnez a0, .LBB47_8
4829 ; RV64ZVE32F-NEXT: j .LBB47_9
4830 ; RV64ZVE32F-NEXT: .LBB47_15: # %cond.store11
4831 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
4832 ; RV64ZVE32F-NEXT: and a0, a0, a4
4833 ; RV64ZVE32F-NEXT: slli a0, a0, 3
4834 ; RV64ZVE32F-NEXT: add a0, a1, a0
4835 ; RV64ZVE32F-NEXT: sd a3, 0(a0)
4836 ; RV64ZVE32F-NEXT: andi a0, a5, -128
4837 ; RV64ZVE32F-NEXT: beqz a0, .LBB47_11
4838 ; RV64ZVE32F-NEXT: .LBB47_16: # %cond.store13
4839 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
4840 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
4841 ; RV64ZVE32F-NEXT: and a0, a0, a4
4842 ; RV64ZVE32F-NEXT: slli a0, a0, 3
4843 ; RV64ZVE32F-NEXT: add a0, a1, a0
4844 ; RV64ZVE32F-NEXT: sd a2, 0(a0)
4845 ; RV64ZVE32F-NEXT: ret
4846 %eidxs = zext <8 x i16> %idxs to <8 x i64>
4847 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs
4848 call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m)
4852 define void @mscatter_baseidx_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i32> %idxs, <8 x i1> %m) {
4853 ; RV32V-LABEL: mscatter_baseidx_v8i32_v8i64:
4855 ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
4856 ; RV32V-NEXT: vsll.vi v12, v12, 3
4857 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
4858 ; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t
4861 ; RV64-LABEL: mscatter_baseidx_v8i32_v8i64:
4863 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
4864 ; RV64-NEXT: vsext.vf2 v16, v12
4865 ; RV64-NEXT: vsll.vi v12, v16, 3
4866 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
4869 ; RV32ZVE32F-LABEL: mscatter_baseidx_v8i32_v8i64:
4870 ; RV32ZVE32F: # %bb.0:
4871 ; RV32ZVE32F-NEXT: addi sp, sp, -16
4872 ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
4873 ; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
4874 ; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
4875 ; RV32ZVE32F-NEXT: sw s2, 4(sp) # 4-byte Folded Spill
4876 ; RV32ZVE32F-NEXT: .cfi_offset s0, -4
4877 ; RV32ZVE32F-NEXT: .cfi_offset s1, -8
4878 ; RV32ZVE32F-NEXT: .cfi_offset s2, -12
4879 ; RV32ZVE32F-NEXT: lw a2, 60(a0)
4880 ; RV32ZVE32F-NEXT: lw a3, 56(a0)
4881 ; RV32ZVE32F-NEXT: lw a4, 52(a0)
4882 ; RV32ZVE32F-NEXT: lw a5, 48(a0)
4883 ; RV32ZVE32F-NEXT: lw a6, 44(a0)
4884 ; RV32ZVE32F-NEXT: lw t0, 40(a0)
4885 ; RV32ZVE32F-NEXT: lw t1, 36(a0)
4886 ; RV32ZVE32F-NEXT: lw t2, 32(a0)
4887 ; RV32ZVE32F-NEXT: lw t3, 28(a0)
4888 ; RV32ZVE32F-NEXT: lw t4, 24(a0)
4889 ; RV32ZVE32F-NEXT: lw t5, 20(a0)
4890 ; RV32ZVE32F-NEXT: lw t6, 16(a0)
4891 ; RV32ZVE32F-NEXT: lw s1, 12(a0)
4892 ; RV32ZVE32F-NEXT: lw s0, 8(a0)
4893 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
4894 ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3
4895 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
4896 ; RV32ZVE32F-NEXT: vmv.x.s a7, v0
4897 ; RV32ZVE32F-NEXT: andi s2, a7, 1
4898 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
4899 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
4900 ; RV32ZVE32F-NEXT: bnez s2, .LBB48_10
4901 ; RV32ZVE32F-NEXT: # %bb.1: # %else
4902 ; RV32ZVE32F-NEXT: andi a0, a7, 2
4903 ; RV32ZVE32F-NEXT: bnez a0, .LBB48_11
4904 ; RV32ZVE32F-NEXT: .LBB48_2: # %else2
4905 ; RV32ZVE32F-NEXT: andi a0, a7, 4
4906 ; RV32ZVE32F-NEXT: bnez a0, .LBB48_12
4907 ; RV32ZVE32F-NEXT: .LBB48_3: # %else4
4908 ; RV32ZVE32F-NEXT: andi a0, a7, 8
4909 ; RV32ZVE32F-NEXT: bnez a0, .LBB48_13
4910 ; RV32ZVE32F-NEXT: .LBB48_4: # %else6
4911 ; RV32ZVE32F-NEXT: andi a0, a7, 16
4912 ; RV32ZVE32F-NEXT: bnez a0, .LBB48_14
4913 ; RV32ZVE32F-NEXT: .LBB48_5: # %else8
4914 ; RV32ZVE32F-NEXT: andi a0, a7, 32
4915 ; RV32ZVE32F-NEXT: bnez a0, .LBB48_15
4916 ; RV32ZVE32F-NEXT: .LBB48_6: # %else10
4917 ; RV32ZVE32F-NEXT: andi a0, a7, 64
4918 ; RV32ZVE32F-NEXT: bnez a0, .LBB48_16
4919 ; RV32ZVE32F-NEXT: .LBB48_7: # %else12
4920 ; RV32ZVE32F-NEXT: andi a0, a7, -128
4921 ; RV32ZVE32F-NEXT: beqz a0, .LBB48_9
4922 ; RV32ZVE32F-NEXT: .LBB48_8: # %cond.store13
4923 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
4924 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
4925 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
4926 ; RV32ZVE32F-NEXT: sw a3, 0(a0)
4927 ; RV32ZVE32F-NEXT: sw a2, 4(a0)
4928 ; RV32ZVE32F-NEXT: .LBB48_9: # %else14
4929 ; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
4930 ; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
4931 ; RV32ZVE32F-NEXT: lw s2, 4(sp) # 4-byte Folded Reload
4932 ; RV32ZVE32F-NEXT: addi sp, sp, 16
4933 ; RV32ZVE32F-NEXT: ret
4934 ; RV32ZVE32F-NEXT: .LBB48_10: # %cond.store
4935 ; RV32ZVE32F-NEXT: lw a1, 4(a0)
4936 ; RV32ZVE32F-NEXT: lw a0, 0(a0)
4937 ; RV32ZVE32F-NEXT: vmv.x.s s2, v8
4938 ; RV32ZVE32F-NEXT: sw a1, 4(s2)
4939 ; RV32ZVE32F-NEXT: sw a0, 0(s2)
4940 ; RV32ZVE32F-NEXT: andi a0, a7, 2
4941 ; RV32ZVE32F-NEXT: beqz a0, .LBB48_2
4942 ; RV32ZVE32F-NEXT: .LBB48_11: # %cond.store1
4943 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
4944 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
4945 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
4946 ; RV32ZVE32F-NEXT: sw s1, 4(a0)
4947 ; RV32ZVE32F-NEXT: sw s0, 0(a0)
4948 ; RV32ZVE32F-NEXT: andi a0, a7, 4
4949 ; RV32ZVE32F-NEXT: beqz a0, .LBB48_3
4950 ; RV32ZVE32F-NEXT: .LBB48_12: # %cond.store3
4951 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
4952 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
4953 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
4954 ; RV32ZVE32F-NEXT: sw t6, 0(a0)
4955 ; RV32ZVE32F-NEXT: sw t5, 4(a0)
4956 ; RV32ZVE32F-NEXT: andi a0, a7, 8
4957 ; RV32ZVE32F-NEXT: beqz a0, .LBB48_4
4958 ; RV32ZVE32F-NEXT: .LBB48_13: # %cond.store5
4959 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
4960 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
4961 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
4962 ; RV32ZVE32F-NEXT: sw t4, 0(a0)
4963 ; RV32ZVE32F-NEXT: sw t3, 4(a0)
4964 ; RV32ZVE32F-NEXT: andi a0, a7, 16
4965 ; RV32ZVE32F-NEXT: beqz a0, .LBB48_5
4966 ; RV32ZVE32F-NEXT: .LBB48_14: # %cond.store7
4967 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
4968 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
4969 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
4970 ; RV32ZVE32F-NEXT: sw t2, 0(a0)
4971 ; RV32ZVE32F-NEXT: sw t1, 4(a0)
4972 ; RV32ZVE32F-NEXT: andi a0, a7, 32
4973 ; RV32ZVE32F-NEXT: beqz a0, .LBB48_6
4974 ; RV32ZVE32F-NEXT: .LBB48_15: # %cond.store9
4975 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
4976 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
4977 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
4978 ; RV32ZVE32F-NEXT: sw t0, 0(a0)
4979 ; RV32ZVE32F-NEXT: sw a6, 4(a0)
4980 ; RV32ZVE32F-NEXT: andi a0, a7, 64
4981 ; RV32ZVE32F-NEXT: beqz a0, .LBB48_7
4982 ; RV32ZVE32F-NEXT: .LBB48_16: # %cond.store11
4983 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
4984 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
4985 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
4986 ; RV32ZVE32F-NEXT: sw a5, 0(a0)
4987 ; RV32ZVE32F-NEXT: sw a4, 4(a0)
4988 ; RV32ZVE32F-NEXT: andi a0, a7, -128
4989 ; RV32ZVE32F-NEXT: bnez a0, .LBB48_8
4990 ; RV32ZVE32F-NEXT: j .LBB48_9
4992 ; RV64ZVE32F-LABEL: mscatter_baseidx_v8i32_v8i64:
4993 ; RV64ZVE32F: # %bb.0:
4994 ; RV64ZVE32F-NEXT: ld a2, 56(a0)
4995 ; RV64ZVE32F-NEXT: ld a3, 48(a0)
4996 ; RV64ZVE32F-NEXT: ld a5, 40(a0)
4997 ; RV64ZVE32F-NEXT: ld a6, 32(a0)
4998 ; RV64ZVE32F-NEXT: ld a7, 24(a0)
4999 ; RV64ZVE32F-NEXT: ld t0, 16(a0)
5000 ; RV64ZVE32F-NEXT: ld t1, 8(a0)
5001 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
5002 ; RV64ZVE32F-NEXT: vmv.x.s a4, v0
5003 ; RV64ZVE32F-NEXT: andi t2, a4, 1
5004 ; RV64ZVE32F-NEXT: beqz t2, .LBB48_2
5005 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
5006 ; RV64ZVE32F-NEXT: ld a0, 0(a0)
5007 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
5008 ; RV64ZVE32F-NEXT: vmv.x.s t2, v8
5009 ; RV64ZVE32F-NEXT: slli t2, t2, 3
5010 ; RV64ZVE32F-NEXT: add t2, a1, t2
5011 ; RV64ZVE32F-NEXT: sd a0, 0(t2)
5012 ; RV64ZVE32F-NEXT: .LBB48_2: # %else
5013 ; RV64ZVE32F-NEXT: andi a0, a4, 2
5014 ; RV64ZVE32F-NEXT: beqz a0, .LBB48_4
5015 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
5016 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
5017 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
5018 ; RV64ZVE32F-NEXT: vmv.x.s a0, v10
5019 ; RV64ZVE32F-NEXT: slli a0, a0, 3
5020 ; RV64ZVE32F-NEXT: add a0, a1, a0
5021 ; RV64ZVE32F-NEXT: sd t1, 0(a0)
5022 ; RV64ZVE32F-NEXT: .LBB48_4: # %else2
5023 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
5024 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
5025 ; RV64ZVE32F-NEXT: andi a0, a4, 4
5026 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
5027 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
5028 ; RV64ZVE32F-NEXT: bnez a0, .LBB48_12
5029 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
5030 ; RV64ZVE32F-NEXT: andi a0, a4, 8
5031 ; RV64ZVE32F-NEXT: bnez a0, .LBB48_13
5032 ; RV64ZVE32F-NEXT: .LBB48_6: # %else6
5033 ; RV64ZVE32F-NEXT: andi a0, a4, 16
5034 ; RV64ZVE32F-NEXT: bnez a0, .LBB48_14
5035 ; RV64ZVE32F-NEXT: .LBB48_7: # %else8
5036 ; RV64ZVE32F-NEXT: andi a0, a4, 32
5037 ; RV64ZVE32F-NEXT: beqz a0, .LBB48_9
5038 ; RV64ZVE32F-NEXT: .LBB48_8: # %cond.store9
5039 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
5040 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
5041 ; RV64ZVE32F-NEXT: slli a0, a0, 3
5042 ; RV64ZVE32F-NEXT: add a0, a1, a0
5043 ; RV64ZVE32F-NEXT: sd a5, 0(a0)
5044 ; RV64ZVE32F-NEXT: .LBB48_9: # %else10
5045 ; RV64ZVE32F-NEXT: andi a0, a4, 64
5046 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
5047 ; RV64ZVE32F-NEXT: bnez a0, .LBB48_15
5048 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
5049 ; RV64ZVE32F-NEXT: andi a0, a4, -128
5050 ; RV64ZVE32F-NEXT: bnez a0, .LBB48_16
5051 ; RV64ZVE32F-NEXT: .LBB48_11: # %else14
5052 ; RV64ZVE32F-NEXT: ret
5053 ; RV64ZVE32F-NEXT: .LBB48_12: # %cond.store3
5054 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
5055 ; RV64ZVE32F-NEXT: slli a0, a0, 3
5056 ; RV64ZVE32F-NEXT: add a0, a1, a0
5057 ; RV64ZVE32F-NEXT: sd t0, 0(a0)
5058 ; RV64ZVE32F-NEXT: andi a0, a4, 8
5059 ; RV64ZVE32F-NEXT: beqz a0, .LBB48_6
5060 ; RV64ZVE32F-NEXT: .LBB48_13: # %cond.store5
5061 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
5062 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
5063 ; RV64ZVE32F-NEXT: slli a0, a0, 3
5064 ; RV64ZVE32F-NEXT: add a0, a1, a0
5065 ; RV64ZVE32F-NEXT: sd a7, 0(a0)
5066 ; RV64ZVE32F-NEXT: andi a0, a4, 16
5067 ; RV64ZVE32F-NEXT: beqz a0, .LBB48_7
5068 ; RV64ZVE32F-NEXT: .LBB48_14: # %cond.store7
5069 ; RV64ZVE32F-NEXT: vmv.x.s a0, v10
5070 ; RV64ZVE32F-NEXT: slli a0, a0, 3
5071 ; RV64ZVE32F-NEXT: add a0, a1, a0
5072 ; RV64ZVE32F-NEXT: sd a6, 0(a0)
5073 ; RV64ZVE32F-NEXT: andi a0, a4, 32
5074 ; RV64ZVE32F-NEXT: bnez a0, .LBB48_8
5075 ; RV64ZVE32F-NEXT: j .LBB48_9
5076 ; RV64ZVE32F-NEXT: .LBB48_15: # %cond.store11
5077 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
5078 ; RV64ZVE32F-NEXT: slli a0, a0, 3
5079 ; RV64ZVE32F-NEXT: add a0, a1, a0
5080 ; RV64ZVE32F-NEXT: sd a3, 0(a0)
5081 ; RV64ZVE32F-NEXT: andi a0, a4, -128
5082 ; RV64ZVE32F-NEXT: beqz a0, .LBB48_11
5083 ; RV64ZVE32F-NEXT: .LBB48_16: # %cond.store13
5084 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
5085 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
5086 ; RV64ZVE32F-NEXT: slli a0, a0, 3
5087 ; RV64ZVE32F-NEXT: add a0, a1, a0
5088 ; RV64ZVE32F-NEXT: sd a2, 0(a0)
5089 ; RV64ZVE32F-NEXT: ret
5090 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i32> %idxs
5091 call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m)
5095 define void @mscatter_baseidx_sext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i32> %idxs, <8 x i1> %m) {
5096 ; RV32V-LABEL: mscatter_baseidx_sext_v8i32_v8i64:
5098 ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
5099 ; RV32V-NEXT: vsll.vi v12, v12, 3
5100 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
5101 ; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t
5104 ; RV64-LABEL: mscatter_baseidx_sext_v8i32_v8i64:
5106 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
5107 ; RV64-NEXT: vsext.vf2 v16, v12
5108 ; RV64-NEXT: vsll.vi v12, v16, 3
5109 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
5112 ; RV32ZVE32F-LABEL: mscatter_baseidx_sext_v8i32_v8i64:
5113 ; RV32ZVE32F: # %bb.0:
5114 ; RV32ZVE32F-NEXT: addi sp, sp, -16
5115 ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
5116 ; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
5117 ; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
5118 ; RV32ZVE32F-NEXT: sw s2, 4(sp) # 4-byte Folded Spill
5119 ; RV32ZVE32F-NEXT: .cfi_offset s0, -4
5120 ; RV32ZVE32F-NEXT: .cfi_offset s1, -8
5121 ; RV32ZVE32F-NEXT: .cfi_offset s2, -12
5122 ; RV32ZVE32F-NEXT: lw a2, 60(a0)
5123 ; RV32ZVE32F-NEXT: lw a3, 56(a0)
5124 ; RV32ZVE32F-NEXT: lw a4, 52(a0)
5125 ; RV32ZVE32F-NEXT: lw a5, 48(a0)
5126 ; RV32ZVE32F-NEXT: lw a6, 44(a0)
5127 ; RV32ZVE32F-NEXT: lw t0, 40(a0)
5128 ; RV32ZVE32F-NEXT: lw t1, 36(a0)
5129 ; RV32ZVE32F-NEXT: lw t2, 32(a0)
5130 ; RV32ZVE32F-NEXT: lw t3, 28(a0)
5131 ; RV32ZVE32F-NEXT: lw t4, 24(a0)
5132 ; RV32ZVE32F-NEXT: lw t5, 20(a0)
5133 ; RV32ZVE32F-NEXT: lw t6, 16(a0)
5134 ; RV32ZVE32F-NEXT: lw s1, 12(a0)
5135 ; RV32ZVE32F-NEXT: lw s0, 8(a0)
5136 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
5137 ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3
5138 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
5139 ; RV32ZVE32F-NEXT: vmv.x.s a7, v0
5140 ; RV32ZVE32F-NEXT: andi s2, a7, 1
5141 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
5142 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
5143 ; RV32ZVE32F-NEXT: bnez s2, .LBB49_10
5144 ; RV32ZVE32F-NEXT: # %bb.1: # %else
5145 ; RV32ZVE32F-NEXT: andi a0, a7, 2
5146 ; RV32ZVE32F-NEXT: bnez a0, .LBB49_11
5147 ; RV32ZVE32F-NEXT: .LBB49_2: # %else2
5148 ; RV32ZVE32F-NEXT: andi a0, a7, 4
5149 ; RV32ZVE32F-NEXT: bnez a0, .LBB49_12
5150 ; RV32ZVE32F-NEXT: .LBB49_3: # %else4
5151 ; RV32ZVE32F-NEXT: andi a0, a7, 8
5152 ; RV32ZVE32F-NEXT: bnez a0, .LBB49_13
5153 ; RV32ZVE32F-NEXT: .LBB49_4: # %else6
5154 ; RV32ZVE32F-NEXT: andi a0, a7, 16
5155 ; RV32ZVE32F-NEXT: bnez a0, .LBB49_14
5156 ; RV32ZVE32F-NEXT: .LBB49_5: # %else8
5157 ; RV32ZVE32F-NEXT: andi a0, a7, 32
5158 ; RV32ZVE32F-NEXT: bnez a0, .LBB49_15
5159 ; RV32ZVE32F-NEXT: .LBB49_6: # %else10
5160 ; RV32ZVE32F-NEXT: andi a0, a7, 64
5161 ; RV32ZVE32F-NEXT: bnez a0, .LBB49_16
5162 ; RV32ZVE32F-NEXT: .LBB49_7: # %else12
5163 ; RV32ZVE32F-NEXT: andi a0, a7, -128
5164 ; RV32ZVE32F-NEXT: beqz a0, .LBB49_9
5165 ; RV32ZVE32F-NEXT: .LBB49_8: # %cond.store13
5166 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
5167 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
5168 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
5169 ; RV32ZVE32F-NEXT: sw a3, 0(a0)
5170 ; RV32ZVE32F-NEXT: sw a2, 4(a0)
5171 ; RV32ZVE32F-NEXT: .LBB49_9: # %else14
5172 ; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
5173 ; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
5174 ; RV32ZVE32F-NEXT: lw s2, 4(sp) # 4-byte Folded Reload
5175 ; RV32ZVE32F-NEXT: addi sp, sp, 16
5176 ; RV32ZVE32F-NEXT: ret
5177 ; RV32ZVE32F-NEXT: .LBB49_10: # %cond.store
5178 ; RV32ZVE32F-NEXT: lw a1, 4(a0)
5179 ; RV32ZVE32F-NEXT: lw a0, 0(a0)
5180 ; RV32ZVE32F-NEXT: vmv.x.s s2, v8
5181 ; RV32ZVE32F-NEXT: sw a1, 4(s2)
5182 ; RV32ZVE32F-NEXT: sw a0, 0(s2)
5183 ; RV32ZVE32F-NEXT: andi a0, a7, 2
5184 ; RV32ZVE32F-NEXT: beqz a0, .LBB49_2
5185 ; RV32ZVE32F-NEXT: .LBB49_11: # %cond.store1
5186 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
5187 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
5188 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
5189 ; RV32ZVE32F-NEXT: sw s1, 4(a0)
5190 ; RV32ZVE32F-NEXT: sw s0, 0(a0)
5191 ; RV32ZVE32F-NEXT: andi a0, a7, 4
5192 ; RV32ZVE32F-NEXT: beqz a0, .LBB49_3
5193 ; RV32ZVE32F-NEXT: .LBB49_12: # %cond.store3
5194 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
5195 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
5196 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
5197 ; RV32ZVE32F-NEXT: sw t6, 0(a0)
5198 ; RV32ZVE32F-NEXT: sw t5, 4(a0)
5199 ; RV32ZVE32F-NEXT: andi a0, a7, 8
5200 ; RV32ZVE32F-NEXT: beqz a0, .LBB49_4
5201 ; RV32ZVE32F-NEXT: .LBB49_13: # %cond.store5
5202 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
5203 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
5204 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
5205 ; RV32ZVE32F-NEXT: sw t4, 0(a0)
5206 ; RV32ZVE32F-NEXT: sw t3, 4(a0)
5207 ; RV32ZVE32F-NEXT: andi a0, a7, 16
5208 ; RV32ZVE32F-NEXT: beqz a0, .LBB49_5
5209 ; RV32ZVE32F-NEXT: .LBB49_14: # %cond.store7
5210 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
5211 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
5212 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
5213 ; RV32ZVE32F-NEXT: sw t2, 0(a0)
5214 ; RV32ZVE32F-NEXT: sw t1, 4(a0)
5215 ; RV32ZVE32F-NEXT: andi a0, a7, 32
5216 ; RV32ZVE32F-NEXT: beqz a0, .LBB49_6
5217 ; RV32ZVE32F-NEXT: .LBB49_15: # %cond.store9
5218 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
5219 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
5220 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
5221 ; RV32ZVE32F-NEXT: sw t0, 0(a0)
5222 ; RV32ZVE32F-NEXT: sw a6, 4(a0)
5223 ; RV32ZVE32F-NEXT: andi a0, a7, 64
5224 ; RV32ZVE32F-NEXT: beqz a0, .LBB49_7
5225 ; RV32ZVE32F-NEXT: .LBB49_16: # %cond.store11
5226 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
5227 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
5228 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
5229 ; RV32ZVE32F-NEXT: sw a5, 0(a0)
5230 ; RV32ZVE32F-NEXT: sw a4, 4(a0)
5231 ; RV32ZVE32F-NEXT: andi a0, a7, -128
5232 ; RV32ZVE32F-NEXT: bnez a0, .LBB49_8
5233 ; RV32ZVE32F-NEXT: j .LBB49_9
5235 ; RV64ZVE32F-LABEL: mscatter_baseidx_sext_v8i32_v8i64:
5236 ; RV64ZVE32F: # %bb.0:
5237 ; RV64ZVE32F-NEXT: ld a2, 56(a0)
5238 ; RV64ZVE32F-NEXT: ld a3, 48(a0)
5239 ; RV64ZVE32F-NEXT: ld a5, 40(a0)
5240 ; RV64ZVE32F-NEXT: ld a6, 32(a0)
5241 ; RV64ZVE32F-NEXT: ld a7, 24(a0)
5242 ; RV64ZVE32F-NEXT: ld t0, 16(a0)
5243 ; RV64ZVE32F-NEXT: ld t1, 8(a0)
5244 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
5245 ; RV64ZVE32F-NEXT: vmv.x.s a4, v0
5246 ; RV64ZVE32F-NEXT: andi t2, a4, 1
5247 ; RV64ZVE32F-NEXT: beqz t2, .LBB49_2
5248 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
5249 ; RV64ZVE32F-NEXT: ld a0, 0(a0)
5250 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
5251 ; RV64ZVE32F-NEXT: vmv.x.s t2, v8
5252 ; RV64ZVE32F-NEXT: slli t2, t2, 3
5253 ; RV64ZVE32F-NEXT: add t2, a1, t2
5254 ; RV64ZVE32F-NEXT: sd a0, 0(t2)
5255 ; RV64ZVE32F-NEXT: .LBB49_2: # %else
5256 ; RV64ZVE32F-NEXT: andi a0, a4, 2
5257 ; RV64ZVE32F-NEXT: beqz a0, .LBB49_4
5258 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
5259 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
5260 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
5261 ; RV64ZVE32F-NEXT: vmv.x.s a0, v10
5262 ; RV64ZVE32F-NEXT: slli a0, a0, 3
5263 ; RV64ZVE32F-NEXT: add a0, a1, a0
5264 ; RV64ZVE32F-NEXT: sd t1, 0(a0)
5265 ; RV64ZVE32F-NEXT: .LBB49_4: # %else2
5266 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
5267 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
5268 ; RV64ZVE32F-NEXT: andi a0, a4, 4
5269 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
5270 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
5271 ; RV64ZVE32F-NEXT: bnez a0, .LBB49_12
5272 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
5273 ; RV64ZVE32F-NEXT: andi a0, a4, 8
5274 ; RV64ZVE32F-NEXT: bnez a0, .LBB49_13
5275 ; RV64ZVE32F-NEXT: .LBB49_6: # %else6
5276 ; RV64ZVE32F-NEXT: andi a0, a4, 16
5277 ; RV64ZVE32F-NEXT: bnez a0, .LBB49_14
5278 ; RV64ZVE32F-NEXT: .LBB49_7: # %else8
5279 ; RV64ZVE32F-NEXT: andi a0, a4, 32
5280 ; RV64ZVE32F-NEXT: beqz a0, .LBB49_9
5281 ; RV64ZVE32F-NEXT: .LBB49_8: # %cond.store9
5282 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
5283 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
5284 ; RV64ZVE32F-NEXT: slli a0, a0, 3
5285 ; RV64ZVE32F-NEXT: add a0, a1, a0
5286 ; RV64ZVE32F-NEXT: sd a5, 0(a0)
5287 ; RV64ZVE32F-NEXT: .LBB49_9: # %else10
5288 ; RV64ZVE32F-NEXT: andi a0, a4, 64
5289 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
5290 ; RV64ZVE32F-NEXT: bnez a0, .LBB49_15
5291 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
5292 ; RV64ZVE32F-NEXT: andi a0, a4, -128
5293 ; RV64ZVE32F-NEXT: bnez a0, .LBB49_16
5294 ; RV64ZVE32F-NEXT: .LBB49_11: # %else14
5295 ; RV64ZVE32F-NEXT: ret
5296 ; RV64ZVE32F-NEXT: .LBB49_12: # %cond.store3
5297 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
5298 ; RV64ZVE32F-NEXT: slli a0, a0, 3
5299 ; RV64ZVE32F-NEXT: add a0, a1, a0
5300 ; RV64ZVE32F-NEXT: sd t0, 0(a0)
5301 ; RV64ZVE32F-NEXT: andi a0, a4, 8
5302 ; RV64ZVE32F-NEXT: beqz a0, .LBB49_6
5303 ; RV64ZVE32F-NEXT: .LBB49_13: # %cond.store5
5304 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
5305 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
5306 ; RV64ZVE32F-NEXT: slli a0, a0, 3
5307 ; RV64ZVE32F-NEXT: add a0, a1, a0
5308 ; RV64ZVE32F-NEXT: sd a7, 0(a0)
5309 ; RV64ZVE32F-NEXT: andi a0, a4, 16
5310 ; RV64ZVE32F-NEXT: beqz a0, .LBB49_7
5311 ; RV64ZVE32F-NEXT: .LBB49_14: # %cond.store7
5312 ; RV64ZVE32F-NEXT: vmv.x.s a0, v10
5313 ; RV64ZVE32F-NEXT: slli a0, a0, 3
5314 ; RV64ZVE32F-NEXT: add a0, a1, a0
5315 ; RV64ZVE32F-NEXT: sd a6, 0(a0)
5316 ; RV64ZVE32F-NEXT: andi a0, a4, 32
5317 ; RV64ZVE32F-NEXT: bnez a0, .LBB49_8
5318 ; RV64ZVE32F-NEXT: j .LBB49_9
5319 ; RV64ZVE32F-NEXT: .LBB49_15: # %cond.store11
5320 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
5321 ; RV64ZVE32F-NEXT: slli a0, a0, 3
5322 ; RV64ZVE32F-NEXT: add a0, a1, a0
5323 ; RV64ZVE32F-NEXT: sd a3, 0(a0)
5324 ; RV64ZVE32F-NEXT: andi a0, a4, -128
5325 ; RV64ZVE32F-NEXT: beqz a0, .LBB49_11
5326 ; RV64ZVE32F-NEXT: .LBB49_16: # %cond.store13
5327 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
5328 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
5329 ; RV64ZVE32F-NEXT: slli a0, a0, 3
5330 ; RV64ZVE32F-NEXT: add a0, a1, a0
5331 ; RV64ZVE32F-NEXT: sd a2, 0(a0)
5332 ; RV64ZVE32F-NEXT: ret
5333 %eidxs = sext <8 x i32> %idxs to <8 x i64>
5334 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs
5335 call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m)
5339 define void @mscatter_baseidx_zext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i32> %idxs, <8 x i1> %m) {
5340 ; RV32V-LABEL: mscatter_baseidx_zext_v8i32_v8i64:
5342 ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
5343 ; RV32V-NEXT: vsll.vi v12, v12, 3
5344 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
5345 ; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t
5348 ; RV64-LABEL: mscatter_baseidx_zext_v8i32_v8i64:
5350 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
5351 ; RV64-NEXT: vzext.vf2 v16, v12
5352 ; RV64-NEXT: vsll.vi v12, v16, 3
5353 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
5356 ; RV32ZVE32F-LABEL: mscatter_baseidx_zext_v8i32_v8i64:
5357 ; RV32ZVE32F: # %bb.0:
5358 ; RV32ZVE32F-NEXT: addi sp, sp, -16
5359 ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
5360 ; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
5361 ; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
5362 ; RV32ZVE32F-NEXT: sw s2, 4(sp) # 4-byte Folded Spill
5363 ; RV32ZVE32F-NEXT: .cfi_offset s0, -4
5364 ; RV32ZVE32F-NEXT: .cfi_offset s1, -8
5365 ; RV32ZVE32F-NEXT: .cfi_offset s2, -12
5366 ; RV32ZVE32F-NEXT: lw a2, 60(a0)
5367 ; RV32ZVE32F-NEXT: lw a3, 56(a0)
5368 ; RV32ZVE32F-NEXT: lw a4, 52(a0)
5369 ; RV32ZVE32F-NEXT: lw a5, 48(a0)
5370 ; RV32ZVE32F-NEXT: lw a6, 44(a0)
5371 ; RV32ZVE32F-NEXT: lw t0, 40(a0)
5372 ; RV32ZVE32F-NEXT: lw t1, 36(a0)
5373 ; RV32ZVE32F-NEXT: lw t2, 32(a0)
5374 ; RV32ZVE32F-NEXT: lw t3, 28(a0)
5375 ; RV32ZVE32F-NEXT: lw t4, 24(a0)
5376 ; RV32ZVE32F-NEXT: lw t5, 20(a0)
5377 ; RV32ZVE32F-NEXT: lw t6, 16(a0)
5378 ; RV32ZVE32F-NEXT: lw s1, 12(a0)
5379 ; RV32ZVE32F-NEXT: lw s0, 8(a0)
5380 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
5381 ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3
5382 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
5383 ; RV32ZVE32F-NEXT: vmv.x.s a7, v0
5384 ; RV32ZVE32F-NEXT: andi s2, a7, 1
5385 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
5386 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
5387 ; RV32ZVE32F-NEXT: bnez s2, .LBB50_10
5388 ; RV32ZVE32F-NEXT: # %bb.1: # %else
5389 ; RV32ZVE32F-NEXT: andi a0, a7, 2
5390 ; RV32ZVE32F-NEXT: bnez a0, .LBB50_11
5391 ; RV32ZVE32F-NEXT: .LBB50_2: # %else2
5392 ; RV32ZVE32F-NEXT: andi a0, a7, 4
5393 ; RV32ZVE32F-NEXT: bnez a0, .LBB50_12
5394 ; RV32ZVE32F-NEXT: .LBB50_3: # %else4
5395 ; RV32ZVE32F-NEXT: andi a0, a7, 8
5396 ; RV32ZVE32F-NEXT: bnez a0, .LBB50_13
5397 ; RV32ZVE32F-NEXT: .LBB50_4: # %else6
5398 ; RV32ZVE32F-NEXT: andi a0, a7, 16
5399 ; RV32ZVE32F-NEXT: bnez a0, .LBB50_14
5400 ; RV32ZVE32F-NEXT: .LBB50_5: # %else8
5401 ; RV32ZVE32F-NEXT: andi a0, a7, 32
5402 ; RV32ZVE32F-NEXT: bnez a0, .LBB50_15
5403 ; RV32ZVE32F-NEXT: .LBB50_6: # %else10
5404 ; RV32ZVE32F-NEXT: andi a0, a7, 64
5405 ; RV32ZVE32F-NEXT: bnez a0, .LBB50_16
5406 ; RV32ZVE32F-NEXT: .LBB50_7: # %else12
5407 ; RV32ZVE32F-NEXT: andi a0, a7, -128
5408 ; RV32ZVE32F-NEXT: beqz a0, .LBB50_9
5409 ; RV32ZVE32F-NEXT: .LBB50_8: # %cond.store13
5410 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
5411 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
5412 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
5413 ; RV32ZVE32F-NEXT: sw a3, 0(a0)
5414 ; RV32ZVE32F-NEXT: sw a2, 4(a0)
5415 ; RV32ZVE32F-NEXT: .LBB50_9: # %else14
5416 ; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
5417 ; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
5418 ; RV32ZVE32F-NEXT: lw s2, 4(sp) # 4-byte Folded Reload
5419 ; RV32ZVE32F-NEXT: addi sp, sp, 16
5420 ; RV32ZVE32F-NEXT: ret
5421 ; RV32ZVE32F-NEXT: .LBB50_10: # %cond.store
5422 ; RV32ZVE32F-NEXT: lw a1, 4(a0)
5423 ; RV32ZVE32F-NEXT: lw a0, 0(a0)
5424 ; RV32ZVE32F-NEXT: vmv.x.s s2, v8
5425 ; RV32ZVE32F-NEXT: sw a1, 4(s2)
5426 ; RV32ZVE32F-NEXT: sw a0, 0(s2)
5427 ; RV32ZVE32F-NEXT: andi a0, a7, 2
5428 ; RV32ZVE32F-NEXT: beqz a0, .LBB50_2
5429 ; RV32ZVE32F-NEXT: .LBB50_11: # %cond.store1
5430 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
5431 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
5432 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
5433 ; RV32ZVE32F-NEXT: sw s1, 4(a0)
5434 ; RV32ZVE32F-NEXT: sw s0, 0(a0)
5435 ; RV32ZVE32F-NEXT: andi a0, a7, 4
5436 ; RV32ZVE32F-NEXT: beqz a0, .LBB50_3
5437 ; RV32ZVE32F-NEXT: .LBB50_12: # %cond.store3
5438 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
5439 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
5440 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
5441 ; RV32ZVE32F-NEXT: sw t6, 0(a0)
5442 ; RV32ZVE32F-NEXT: sw t5, 4(a0)
5443 ; RV32ZVE32F-NEXT: andi a0, a7, 8
5444 ; RV32ZVE32F-NEXT: beqz a0, .LBB50_4
5445 ; RV32ZVE32F-NEXT: .LBB50_13: # %cond.store5
5446 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
5447 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
5448 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
5449 ; RV32ZVE32F-NEXT: sw t4, 0(a0)
5450 ; RV32ZVE32F-NEXT: sw t3, 4(a0)
5451 ; RV32ZVE32F-NEXT: andi a0, a7, 16
5452 ; RV32ZVE32F-NEXT: beqz a0, .LBB50_5
5453 ; RV32ZVE32F-NEXT: .LBB50_14: # %cond.store7
5454 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
5455 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
5456 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
5457 ; RV32ZVE32F-NEXT: sw t2, 0(a0)
5458 ; RV32ZVE32F-NEXT: sw t1, 4(a0)
5459 ; RV32ZVE32F-NEXT: andi a0, a7, 32
5460 ; RV32ZVE32F-NEXT: beqz a0, .LBB50_6
5461 ; RV32ZVE32F-NEXT: .LBB50_15: # %cond.store9
5462 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
5463 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
5464 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
5465 ; RV32ZVE32F-NEXT: sw t0, 0(a0)
5466 ; RV32ZVE32F-NEXT: sw a6, 4(a0)
5467 ; RV32ZVE32F-NEXT: andi a0, a7, 64
5468 ; RV32ZVE32F-NEXT: beqz a0, .LBB50_7
5469 ; RV32ZVE32F-NEXT: .LBB50_16: # %cond.store11
5470 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
5471 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
5472 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
5473 ; RV32ZVE32F-NEXT: sw a5, 0(a0)
5474 ; RV32ZVE32F-NEXT: sw a4, 4(a0)
5475 ; RV32ZVE32F-NEXT: andi a0, a7, -128
5476 ; RV32ZVE32F-NEXT: bnez a0, .LBB50_8
5477 ; RV32ZVE32F-NEXT: j .LBB50_9
5479 ; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i32_v8i64:
5480 ; RV64ZVE32F: # %bb.0:
5481 ; RV64ZVE32F-NEXT: ld a2, 56(a0)
5482 ; RV64ZVE32F-NEXT: ld a3, 48(a0)
5483 ; RV64ZVE32F-NEXT: ld a5, 40(a0)
5484 ; RV64ZVE32F-NEXT: ld a6, 32(a0)
5485 ; RV64ZVE32F-NEXT: ld a7, 24(a0)
5486 ; RV64ZVE32F-NEXT: ld t0, 16(a0)
5487 ; RV64ZVE32F-NEXT: ld t1, 8(a0)
5488 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
5489 ; RV64ZVE32F-NEXT: vmv.x.s a4, v0
5490 ; RV64ZVE32F-NEXT: andi t2, a4, 1
5491 ; RV64ZVE32F-NEXT: beqz t2, .LBB50_2
5492 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
5493 ; RV64ZVE32F-NEXT: ld a0, 0(a0)
5494 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
5495 ; RV64ZVE32F-NEXT: vmv.x.s t2, v8
5496 ; RV64ZVE32F-NEXT: slli t2, t2, 32
5497 ; RV64ZVE32F-NEXT: srli t2, t2, 29
5498 ; RV64ZVE32F-NEXT: add t2, a1, t2
5499 ; RV64ZVE32F-NEXT: sd a0, 0(t2)
5500 ; RV64ZVE32F-NEXT: .LBB50_2: # %else
5501 ; RV64ZVE32F-NEXT: andi a0, a4, 2
5502 ; RV64ZVE32F-NEXT: beqz a0, .LBB50_4
5503 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
5504 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
5505 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
5506 ; RV64ZVE32F-NEXT: vmv.x.s a0, v10
5507 ; RV64ZVE32F-NEXT: slli a0, a0, 32
5508 ; RV64ZVE32F-NEXT: srli a0, a0, 29
5509 ; RV64ZVE32F-NEXT: add a0, a1, a0
5510 ; RV64ZVE32F-NEXT: sd t1, 0(a0)
5511 ; RV64ZVE32F-NEXT: .LBB50_4: # %else2
5512 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
5513 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
5514 ; RV64ZVE32F-NEXT: andi a0, a4, 4
5515 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
5516 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
5517 ; RV64ZVE32F-NEXT: bnez a0, .LBB50_12
5518 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
5519 ; RV64ZVE32F-NEXT: andi a0, a4, 8
5520 ; RV64ZVE32F-NEXT: bnez a0, .LBB50_13
5521 ; RV64ZVE32F-NEXT: .LBB50_6: # %else6
5522 ; RV64ZVE32F-NEXT: andi a0, a4, 16
5523 ; RV64ZVE32F-NEXT: bnez a0, .LBB50_14
5524 ; RV64ZVE32F-NEXT: .LBB50_7: # %else8
5525 ; RV64ZVE32F-NEXT: andi a0, a4, 32
5526 ; RV64ZVE32F-NEXT: beqz a0, .LBB50_9
5527 ; RV64ZVE32F-NEXT: .LBB50_8: # %cond.store9
5528 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
5529 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
5530 ; RV64ZVE32F-NEXT: slli a0, a0, 32
5531 ; RV64ZVE32F-NEXT: srli a0, a0, 29
5532 ; RV64ZVE32F-NEXT: add a0, a1, a0
5533 ; RV64ZVE32F-NEXT: sd a5, 0(a0)
5534 ; RV64ZVE32F-NEXT: .LBB50_9: # %else10
5535 ; RV64ZVE32F-NEXT: andi a0, a4, 64
5536 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
5537 ; RV64ZVE32F-NEXT: bnez a0, .LBB50_15
5538 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
5539 ; RV64ZVE32F-NEXT: andi a0, a4, -128
5540 ; RV64ZVE32F-NEXT: bnez a0, .LBB50_16
5541 ; RV64ZVE32F-NEXT: .LBB50_11: # %else14
5542 ; RV64ZVE32F-NEXT: ret
5543 ; RV64ZVE32F-NEXT: .LBB50_12: # %cond.store3
5544 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
5545 ; RV64ZVE32F-NEXT: slli a0, a0, 32
5546 ; RV64ZVE32F-NEXT: srli a0, a0, 29
5547 ; RV64ZVE32F-NEXT: add a0, a1, a0
5548 ; RV64ZVE32F-NEXT: sd t0, 0(a0)
5549 ; RV64ZVE32F-NEXT: andi a0, a4, 8
5550 ; RV64ZVE32F-NEXT: beqz a0, .LBB50_6
5551 ; RV64ZVE32F-NEXT: .LBB50_13: # %cond.store5
5552 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
5553 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
5554 ; RV64ZVE32F-NEXT: slli a0, a0, 32
5555 ; RV64ZVE32F-NEXT: srli a0, a0, 29
5556 ; RV64ZVE32F-NEXT: add a0, a1, a0
5557 ; RV64ZVE32F-NEXT: sd a7, 0(a0)
5558 ; RV64ZVE32F-NEXT: andi a0, a4, 16
5559 ; RV64ZVE32F-NEXT: beqz a0, .LBB50_7
5560 ; RV64ZVE32F-NEXT: .LBB50_14: # %cond.store7
5561 ; RV64ZVE32F-NEXT: vmv.x.s a0, v10
5562 ; RV64ZVE32F-NEXT: slli a0, a0, 32
5563 ; RV64ZVE32F-NEXT: srli a0, a0, 29
5564 ; RV64ZVE32F-NEXT: add a0, a1, a0
5565 ; RV64ZVE32F-NEXT: sd a6, 0(a0)
5566 ; RV64ZVE32F-NEXT: andi a0, a4, 32
5567 ; RV64ZVE32F-NEXT: bnez a0, .LBB50_8
5568 ; RV64ZVE32F-NEXT: j .LBB50_9
5569 ; RV64ZVE32F-NEXT: .LBB50_15: # %cond.store11
5570 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
5571 ; RV64ZVE32F-NEXT: slli a0, a0, 32
5572 ; RV64ZVE32F-NEXT: srli a0, a0, 29
5573 ; RV64ZVE32F-NEXT: add a0, a1, a0
5574 ; RV64ZVE32F-NEXT: sd a3, 0(a0)
5575 ; RV64ZVE32F-NEXT: andi a0, a4, -128
5576 ; RV64ZVE32F-NEXT: beqz a0, .LBB50_11
5577 ; RV64ZVE32F-NEXT: .LBB50_16: # %cond.store13
5578 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
5579 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
5580 ; RV64ZVE32F-NEXT: slli a0, a0, 32
5581 ; RV64ZVE32F-NEXT: srli a0, a0, 29
5582 ; RV64ZVE32F-NEXT: add a0, a1, a0
5583 ; RV64ZVE32F-NEXT: sd a2, 0(a0)
5584 ; RV64ZVE32F-NEXT: ret
5585 %eidxs = zext <8 x i32> %idxs to <8 x i64>
5586 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs
5587 call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m)
5591 define void @mscatter_baseidx_v8i64(<8 x i64> %val, ptr %base, <8 x i64> %idxs, <8 x i1> %m) {
5592 ; RV32V-LABEL: mscatter_baseidx_v8i64:
5594 ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
5595 ; RV32V-NEXT: vnsrl.wi v16, v12, 0
5596 ; RV32V-NEXT: vsll.vi v12, v16, 3
5597 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
5598 ; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t
5601 ; RV64-LABEL: mscatter_baseidx_v8i64:
5603 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
5604 ; RV64-NEXT: vsll.vi v12, v12, 3
5605 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
5608 ; RV32ZVE32F-LABEL: mscatter_baseidx_v8i64:
5609 ; RV32ZVE32F: # %bb.0:
5610 ; RV32ZVE32F-NEXT: addi sp, sp, -48
5611 ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 48
5612 ; RV32ZVE32F-NEXT: sw s0, 44(sp) # 4-byte Folded Spill
5613 ; RV32ZVE32F-NEXT: sw s1, 40(sp) # 4-byte Folded Spill
5614 ; RV32ZVE32F-NEXT: sw s2, 36(sp) # 4-byte Folded Spill
5615 ; RV32ZVE32F-NEXT: sw s3, 32(sp) # 4-byte Folded Spill
5616 ; RV32ZVE32F-NEXT: sw s4, 28(sp) # 4-byte Folded Spill
5617 ; RV32ZVE32F-NEXT: sw s5, 24(sp) # 4-byte Folded Spill
5618 ; RV32ZVE32F-NEXT: sw s6, 20(sp) # 4-byte Folded Spill
5619 ; RV32ZVE32F-NEXT: sw s7, 16(sp) # 4-byte Folded Spill
5620 ; RV32ZVE32F-NEXT: sw s8, 12(sp) # 4-byte Folded Spill
5621 ; RV32ZVE32F-NEXT: .cfi_offset s0, -4
5622 ; RV32ZVE32F-NEXT: .cfi_offset s1, -8
5623 ; RV32ZVE32F-NEXT: .cfi_offset s2, -12
5624 ; RV32ZVE32F-NEXT: .cfi_offset s3, -16
5625 ; RV32ZVE32F-NEXT: .cfi_offset s4, -20
5626 ; RV32ZVE32F-NEXT: .cfi_offset s5, -24
5627 ; RV32ZVE32F-NEXT: .cfi_offset s6, -28
5628 ; RV32ZVE32F-NEXT: .cfi_offset s7, -32
5629 ; RV32ZVE32F-NEXT: .cfi_offset s8, -36
5630 ; RV32ZVE32F-NEXT: lw a3, 60(a0)
5631 ; RV32ZVE32F-NEXT: lw a4, 56(a0)
5632 ; RV32ZVE32F-NEXT: lw a5, 52(a0)
5633 ; RV32ZVE32F-NEXT: lw a6, 48(a0)
5634 ; RV32ZVE32F-NEXT: lw a7, 44(a0)
5635 ; RV32ZVE32F-NEXT: lw t0, 40(a0)
5636 ; RV32ZVE32F-NEXT: lw t1, 36(a0)
5637 ; RV32ZVE32F-NEXT: lw t2, 32(a0)
5638 ; RV32ZVE32F-NEXT: lw t3, 28(a0)
5639 ; RV32ZVE32F-NEXT: lw t4, 24(a0)
5640 ; RV32ZVE32F-NEXT: lw t5, 20(a0)
5641 ; RV32ZVE32F-NEXT: lw t6, 16(a0)
5642 ; RV32ZVE32F-NEXT: lw s1, 12(a0)
5643 ; RV32ZVE32F-NEXT: lw s0, 8(a0)
5644 ; RV32ZVE32F-NEXT: lw s2, 56(a2)
5645 ; RV32ZVE32F-NEXT: lw s3, 48(a2)
5646 ; RV32ZVE32F-NEXT: lw s4, 40(a2)
5647 ; RV32ZVE32F-NEXT: lw s5, 32(a2)
5648 ; RV32ZVE32F-NEXT: lw s6, 0(a2)
5649 ; RV32ZVE32F-NEXT: lw s7, 8(a2)
5650 ; RV32ZVE32F-NEXT: lw s8, 16(a2)
5651 ; RV32ZVE32F-NEXT: lw a2, 24(a2)
5652 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
5653 ; RV32ZVE32F-NEXT: vmv.v.x v8, s6
5654 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, s7
5655 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, s8
5656 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a2
5657 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, s5
5658 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, s4
5659 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, s3
5660 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, s2
5661 ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3
5662 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
5663 ; RV32ZVE32F-NEXT: vmv.x.s a2, v0
5664 ; RV32ZVE32F-NEXT: andi s2, a2, 1
5665 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
5666 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
5667 ; RV32ZVE32F-NEXT: bnez s2, .LBB51_10
5668 ; RV32ZVE32F-NEXT: # %bb.1: # %else
5669 ; RV32ZVE32F-NEXT: andi a0, a2, 2
5670 ; RV32ZVE32F-NEXT: bnez a0, .LBB51_11
5671 ; RV32ZVE32F-NEXT: .LBB51_2: # %else2
5672 ; RV32ZVE32F-NEXT: andi a0, a2, 4
5673 ; RV32ZVE32F-NEXT: bnez a0, .LBB51_12
5674 ; RV32ZVE32F-NEXT: .LBB51_3: # %else4
5675 ; RV32ZVE32F-NEXT: andi a0, a2, 8
5676 ; RV32ZVE32F-NEXT: bnez a0, .LBB51_13
5677 ; RV32ZVE32F-NEXT: .LBB51_4: # %else6
5678 ; RV32ZVE32F-NEXT: andi a0, a2, 16
5679 ; RV32ZVE32F-NEXT: bnez a0, .LBB51_14
5680 ; RV32ZVE32F-NEXT: .LBB51_5: # %else8
5681 ; RV32ZVE32F-NEXT: andi a0, a2, 32
5682 ; RV32ZVE32F-NEXT: bnez a0, .LBB51_15
5683 ; RV32ZVE32F-NEXT: .LBB51_6: # %else10
5684 ; RV32ZVE32F-NEXT: andi a0, a2, 64
5685 ; RV32ZVE32F-NEXT: bnez a0, .LBB51_16
5686 ; RV32ZVE32F-NEXT: .LBB51_7: # %else12
5687 ; RV32ZVE32F-NEXT: andi a0, a2, -128
5688 ; RV32ZVE32F-NEXT: beqz a0, .LBB51_9
5689 ; RV32ZVE32F-NEXT: .LBB51_8: # %cond.store13
5690 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
5691 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
5692 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
5693 ; RV32ZVE32F-NEXT: sw a4, 0(a0)
5694 ; RV32ZVE32F-NEXT: sw a3, 4(a0)
5695 ; RV32ZVE32F-NEXT: .LBB51_9: # %else14
5696 ; RV32ZVE32F-NEXT: lw s0, 44(sp) # 4-byte Folded Reload
5697 ; RV32ZVE32F-NEXT: lw s1, 40(sp) # 4-byte Folded Reload
5698 ; RV32ZVE32F-NEXT: lw s2, 36(sp) # 4-byte Folded Reload
5699 ; RV32ZVE32F-NEXT: lw s3, 32(sp) # 4-byte Folded Reload
5700 ; RV32ZVE32F-NEXT: lw s4, 28(sp) # 4-byte Folded Reload
5701 ; RV32ZVE32F-NEXT: lw s5, 24(sp) # 4-byte Folded Reload
5702 ; RV32ZVE32F-NEXT: lw s6, 20(sp) # 4-byte Folded Reload
5703 ; RV32ZVE32F-NEXT: lw s7, 16(sp) # 4-byte Folded Reload
5704 ; RV32ZVE32F-NEXT: lw s8, 12(sp) # 4-byte Folded Reload
5705 ; RV32ZVE32F-NEXT: addi sp, sp, 48
5706 ; RV32ZVE32F-NEXT: ret
5707 ; RV32ZVE32F-NEXT: .LBB51_10: # %cond.store
5708 ; RV32ZVE32F-NEXT: lw a1, 4(a0)
5709 ; RV32ZVE32F-NEXT: lw a0, 0(a0)
5710 ; RV32ZVE32F-NEXT: vmv.x.s s2, v8
5711 ; RV32ZVE32F-NEXT: sw a1, 4(s2)
5712 ; RV32ZVE32F-NEXT: sw a0, 0(s2)
5713 ; RV32ZVE32F-NEXT: andi a0, a2, 2
5714 ; RV32ZVE32F-NEXT: beqz a0, .LBB51_2
5715 ; RV32ZVE32F-NEXT: .LBB51_11: # %cond.store1
5716 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
5717 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
5718 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
5719 ; RV32ZVE32F-NEXT: sw s1, 4(a0)
5720 ; RV32ZVE32F-NEXT: sw s0, 0(a0)
5721 ; RV32ZVE32F-NEXT: andi a0, a2, 4
5722 ; RV32ZVE32F-NEXT: beqz a0, .LBB51_3
5723 ; RV32ZVE32F-NEXT: .LBB51_12: # %cond.store3
5724 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
5725 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
5726 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
5727 ; RV32ZVE32F-NEXT: sw t6, 0(a0)
5728 ; RV32ZVE32F-NEXT: sw t5, 4(a0)
5729 ; RV32ZVE32F-NEXT: andi a0, a2, 8
5730 ; RV32ZVE32F-NEXT: beqz a0, .LBB51_4
5731 ; RV32ZVE32F-NEXT: .LBB51_13: # %cond.store5
5732 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
5733 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
5734 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
5735 ; RV32ZVE32F-NEXT: sw t4, 0(a0)
5736 ; RV32ZVE32F-NEXT: sw t3, 4(a0)
5737 ; RV32ZVE32F-NEXT: andi a0, a2, 16
5738 ; RV32ZVE32F-NEXT: beqz a0, .LBB51_5
5739 ; RV32ZVE32F-NEXT: .LBB51_14: # %cond.store7
5740 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
5741 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
5742 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
5743 ; RV32ZVE32F-NEXT: sw t2, 0(a0)
5744 ; RV32ZVE32F-NEXT: sw t1, 4(a0)
5745 ; RV32ZVE32F-NEXT: andi a0, a2, 32
5746 ; RV32ZVE32F-NEXT: beqz a0, .LBB51_6
5747 ; RV32ZVE32F-NEXT: .LBB51_15: # %cond.store9
5748 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
5749 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
5750 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
5751 ; RV32ZVE32F-NEXT: sw t0, 0(a0)
5752 ; RV32ZVE32F-NEXT: sw a7, 4(a0)
5753 ; RV32ZVE32F-NEXT: andi a0, a2, 64
5754 ; RV32ZVE32F-NEXT: beqz a0, .LBB51_7
5755 ; RV32ZVE32F-NEXT: .LBB51_16: # %cond.store11
5756 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
5757 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
5758 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
5759 ; RV32ZVE32F-NEXT: sw a6, 0(a0)
5760 ; RV32ZVE32F-NEXT: sw a5, 4(a0)
5761 ; RV32ZVE32F-NEXT: andi a0, a2, -128
5762 ; RV32ZVE32F-NEXT: bnez a0, .LBB51_8
5763 ; RV32ZVE32F-NEXT: j .LBB51_9
5765 ; RV64ZVE32F-LABEL: mscatter_baseidx_v8i64:
5766 ; RV64ZVE32F: # %bb.0:
5767 ; RV64ZVE32F-NEXT: addi sp, sp, -32
5768 ; RV64ZVE32F-NEXT: .cfi_def_cfa_offset 32
5769 ; RV64ZVE32F-NEXT: sd s0, 24(sp) # 8-byte Folded Spill
5770 ; RV64ZVE32F-NEXT: sd s1, 16(sp) # 8-byte Folded Spill
5771 ; RV64ZVE32F-NEXT: sd s2, 8(sp) # 8-byte Folded Spill
5772 ; RV64ZVE32F-NEXT: sd s3, 0(sp) # 8-byte Folded Spill
5773 ; RV64ZVE32F-NEXT: .cfi_offset s0, -8
5774 ; RV64ZVE32F-NEXT: .cfi_offset s1, -16
5775 ; RV64ZVE32F-NEXT: .cfi_offset s2, -24
5776 ; RV64ZVE32F-NEXT: .cfi_offset s3, -32
5777 ; RV64ZVE32F-NEXT: ld a3, 56(a0)
5778 ; RV64ZVE32F-NEXT: ld a4, 48(a0)
5779 ; RV64ZVE32F-NEXT: ld a6, 40(a0)
5780 ; RV64ZVE32F-NEXT: ld t1, 32(a0)
5781 ; RV64ZVE32F-NEXT: ld t3, 24(a0)
5782 ; RV64ZVE32F-NEXT: ld t6, 16(a0)
5783 ; RV64ZVE32F-NEXT: ld s1, 8(a0)
5784 ; RV64ZVE32F-NEXT: ld s2, 8(a2)
5785 ; RV64ZVE32F-NEXT: ld s0, 16(a2)
5786 ; RV64ZVE32F-NEXT: ld t5, 24(a2)
5787 ; RV64ZVE32F-NEXT: ld t4, 32(a2)
5788 ; RV64ZVE32F-NEXT: ld t2, 40(a2)
5789 ; RV64ZVE32F-NEXT: ld t0, 48(a2)
5790 ; RV64ZVE32F-NEXT: ld a5, 56(a2)
5791 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
5792 ; RV64ZVE32F-NEXT: vmv.x.s a7, v0
5793 ; RV64ZVE32F-NEXT: andi s3, a7, 1
5794 ; RV64ZVE32F-NEXT: bnez s3, .LBB51_10
5795 ; RV64ZVE32F-NEXT: # %bb.1: # %else
5796 ; RV64ZVE32F-NEXT: andi a0, a7, 2
5797 ; RV64ZVE32F-NEXT: bnez a0, .LBB51_11
5798 ; RV64ZVE32F-NEXT: .LBB51_2: # %else2
5799 ; RV64ZVE32F-NEXT: andi a0, a7, 4
5800 ; RV64ZVE32F-NEXT: bnez a0, .LBB51_12
5801 ; RV64ZVE32F-NEXT: .LBB51_3: # %else4
5802 ; RV64ZVE32F-NEXT: andi a0, a7, 8
5803 ; RV64ZVE32F-NEXT: bnez a0, .LBB51_13
5804 ; RV64ZVE32F-NEXT: .LBB51_4: # %else6
5805 ; RV64ZVE32F-NEXT: andi a0, a7, 16
5806 ; RV64ZVE32F-NEXT: bnez a0, .LBB51_14
5807 ; RV64ZVE32F-NEXT: .LBB51_5: # %else8
5808 ; RV64ZVE32F-NEXT: andi a0, a7, 32
5809 ; RV64ZVE32F-NEXT: bnez a0, .LBB51_15
5810 ; RV64ZVE32F-NEXT: .LBB51_6: # %else10
5811 ; RV64ZVE32F-NEXT: andi a0, a7, 64
5812 ; RV64ZVE32F-NEXT: bnez a0, .LBB51_16
5813 ; RV64ZVE32F-NEXT: .LBB51_7: # %else12
5814 ; RV64ZVE32F-NEXT: andi a0, a7, -128
5815 ; RV64ZVE32F-NEXT: beqz a0, .LBB51_9
5816 ; RV64ZVE32F-NEXT: .LBB51_8: # %cond.store13
5817 ; RV64ZVE32F-NEXT: slli a5, a5, 3
5818 ; RV64ZVE32F-NEXT: add a1, a1, a5
5819 ; RV64ZVE32F-NEXT: sd a3, 0(a1)
5820 ; RV64ZVE32F-NEXT: .LBB51_9: # %else14
5821 ; RV64ZVE32F-NEXT: ld s0, 24(sp) # 8-byte Folded Reload
5822 ; RV64ZVE32F-NEXT: ld s1, 16(sp) # 8-byte Folded Reload
5823 ; RV64ZVE32F-NEXT: ld s2, 8(sp) # 8-byte Folded Reload
5824 ; RV64ZVE32F-NEXT: ld s3, 0(sp) # 8-byte Folded Reload
5825 ; RV64ZVE32F-NEXT: addi sp, sp, 32
5826 ; RV64ZVE32F-NEXT: ret
5827 ; RV64ZVE32F-NEXT: .LBB51_10: # %cond.store
5828 ; RV64ZVE32F-NEXT: ld a2, 0(a2)
5829 ; RV64ZVE32F-NEXT: ld a0, 0(a0)
5830 ; RV64ZVE32F-NEXT: slli a2, a2, 3
5831 ; RV64ZVE32F-NEXT: add a2, a1, a2
5832 ; RV64ZVE32F-NEXT: sd a0, 0(a2)
5833 ; RV64ZVE32F-NEXT: andi a0, a7, 2
5834 ; RV64ZVE32F-NEXT: beqz a0, .LBB51_2
5835 ; RV64ZVE32F-NEXT: .LBB51_11: # %cond.store1
5836 ; RV64ZVE32F-NEXT: slli s2, s2, 3
5837 ; RV64ZVE32F-NEXT: add s2, a1, s2
5838 ; RV64ZVE32F-NEXT: sd s1, 0(s2)
5839 ; RV64ZVE32F-NEXT: andi a0, a7, 4
5840 ; RV64ZVE32F-NEXT: beqz a0, .LBB51_3
5841 ; RV64ZVE32F-NEXT: .LBB51_12: # %cond.store3
5842 ; RV64ZVE32F-NEXT: slli s0, s0, 3
5843 ; RV64ZVE32F-NEXT: add s0, a1, s0
5844 ; RV64ZVE32F-NEXT: sd t6, 0(s0)
5845 ; RV64ZVE32F-NEXT: andi a0, a7, 8
5846 ; RV64ZVE32F-NEXT: beqz a0, .LBB51_4
5847 ; RV64ZVE32F-NEXT: .LBB51_13: # %cond.store5
5848 ; RV64ZVE32F-NEXT: slli t5, t5, 3
5849 ; RV64ZVE32F-NEXT: add t5, a1, t5
5850 ; RV64ZVE32F-NEXT: sd t3, 0(t5)
5851 ; RV64ZVE32F-NEXT: andi a0, a7, 16
5852 ; RV64ZVE32F-NEXT: beqz a0, .LBB51_5
5853 ; RV64ZVE32F-NEXT: .LBB51_14: # %cond.store7
5854 ; RV64ZVE32F-NEXT: slli t4, t4, 3
5855 ; RV64ZVE32F-NEXT: add t4, a1, t4
5856 ; RV64ZVE32F-NEXT: sd t1, 0(t4)
5857 ; RV64ZVE32F-NEXT: andi a0, a7, 32
5858 ; RV64ZVE32F-NEXT: beqz a0, .LBB51_6
5859 ; RV64ZVE32F-NEXT: .LBB51_15: # %cond.store9
5860 ; RV64ZVE32F-NEXT: slli t2, t2, 3
5861 ; RV64ZVE32F-NEXT: add t2, a1, t2
5862 ; RV64ZVE32F-NEXT: sd a6, 0(t2)
5863 ; RV64ZVE32F-NEXT: andi a0, a7, 64
5864 ; RV64ZVE32F-NEXT: beqz a0, .LBB51_7
5865 ; RV64ZVE32F-NEXT: .LBB51_16: # %cond.store11
5866 ; RV64ZVE32F-NEXT: slli t0, t0, 3
5867 ; RV64ZVE32F-NEXT: add t0, a1, t0
5868 ; RV64ZVE32F-NEXT: sd a4, 0(t0)
5869 ; RV64ZVE32F-NEXT: andi a0, a7, -128
5870 ; RV64ZVE32F-NEXT: bnez a0, .LBB51_8
5871 ; RV64ZVE32F-NEXT: j .LBB51_9
5872 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %idxs
5873 call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m)
5877 declare void @llvm.masked.scatter.v1f16.v1p0(<1 x half>, <1 x ptr>, i32, <1 x i1>)
5879 define void @mscatter_v1f16(<1 x half> %val, <1 x ptr> %ptrs, <1 x i1> %m) {
5880 ; RV32V-LABEL: mscatter_v1f16:
5882 ; RV32V-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
5883 ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t
5886 ; RV64-LABEL: mscatter_v1f16:
5888 ; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
5889 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
5892 ; RV32ZVE32F-LABEL: mscatter_v1f16:
5893 ; RV32ZVE32F: # %bb.0:
5894 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
5895 ; RV32ZVE32F-NEXT: vsoxei32.v v8, (zero), v9, v0.t
5896 ; RV32ZVE32F-NEXT: ret
5898 ; RV64ZVE32F-LABEL: mscatter_v1f16:
5899 ; RV64ZVE32F: # %bb.0:
5900 ; RV64ZVE32F-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
5901 ; RV64ZVE32F-NEXT: vfirst.m a1, v0
5902 ; RV64ZVE32F-NEXT: bnez a1, .LBB52_2
5903 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
5904 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
5905 ; RV64ZVE32F-NEXT: vse16.v v8, (a0)
5906 ; RV64ZVE32F-NEXT: .LBB52_2: # %else
5907 ; RV64ZVE32F-NEXT: ret
5908 call void @llvm.masked.scatter.v1f16.v1p0(<1 x half> %val, <1 x ptr> %ptrs, i32 2, <1 x i1> %m)
5912 declare void @llvm.masked.scatter.v2f16.v2p0(<2 x half>, <2 x ptr>, i32, <2 x i1>)
5914 define void @mscatter_v2f16(<2 x half> %val, <2 x ptr> %ptrs, <2 x i1> %m) {
5915 ; RV32V-LABEL: mscatter_v2f16:
5917 ; RV32V-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
5918 ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t
5921 ; RV64-LABEL: mscatter_v2f16:
5923 ; RV64-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
5924 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
5927 ; RV32ZVE32F-LABEL: mscatter_v2f16:
5928 ; RV32ZVE32F: # %bb.0:
5929 ; RV32ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
5930 ; RV32ZVE32F-NEXT: vsoxei32.v v8, (zero), v9, v0.t
5931 ; RV32ZVE32F-NEXT: ret
5933 ; RV64ZVE32F-LABEL: mscatter_v2f16:
5934 ; RV64ZVE32F: # %bb.0:
5935 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
5936 ; RV64ZVE32F-NEXT: vmv.x.s a2, v0
5937 ; RV64ZVE32F-NEXT: andi a3, a2, 1
5938 ; RV64ZVE32F-NEXT: bnez a3, .LBB53_3
5939 ; RV64ZVE32F-NEXT: # %bb.1: # %else
5940 ; RV64ZVE32F-NEXT: andi a2, a2, 2
5941 ; RV64ZVE32F-NEXT: bnez a2, .LBB53_4
5942 ; RV64ZVE32F-NEXT: .LBB53_2: # %else2
5943 ; RV64ZVE32F-NEXT: ret
5944 ; RV64ZVE32F-NEXT: .LBB53_3: # %cond.store
5945 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
5946 ; RV64ZVE32F-NEXT: vse16.v v8, (a0)
5947 ; RV64ZVE32F-NEXT: andi a2, a2, 2
5948 ; RV64ZVE32F-NEXT: beqz a2, .LBB53_2
5949 ; RV64ZVE32F-NEXT: .LBB53_4: # %cond.store1
5950 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
5951 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
5952 ; RV64ZVE32F-NEXT: vse16.v v8, (a1)
5953 ; RV64ZVE32F-NEXT: ret
5954 call void @llvm.masked.scatter.v2f16.v2p0(<2 x half> %val, <2 x ptr> %ptrs, i32 2, <2 x i1> %m)
5958 declare void @llvm.masked.scatter.v4f16.v4p0(<4 x half>, <4 x ptr>, i32, <4 x i1>)
5960 define void @mscatter_v4f16(<4 x half> %val, <4 x ptr> %ptrs, <4 x i1> %m) {
5961 ; RV32-LABEL: mscatter_v4f16:
5963 ; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
5964 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
5967 ; RV64-LABEL: mscatter_v4f16:
5969 ; RV64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
5970 ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
5973 ; RV64ZVE32F-LABEL: mscatter_v4f16:
5974 ; RV64ZVE32F: # %bb.0:
5975 ; RV64ZVE32F-NEXT: ld a1, 24(a0)
5976 ; RV64ZVE32F-NEXT: ld a2, 16(a0)
5977 ; RV64ZVE32F-NEXT: ld a4, 8(a0)
5978 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
5979 ; RV64ZVE32F-NEXT: vmv.x.s a3, v0
5980 ; RV64ZVE32F-NEXT: andi a5, a3, 1
5981 ; RV64ZVE32F-NEXT: bnez a5, .LBB54_5
5982 ; RV64ZVE32F-NEXT: # %bb.1: # %else
5983 ; RV64ZVE32F-NEXT: andi a0, a3, 2
5984 ; RV64ZVE32F-NEXT: bnez a0, .LBB54_6
5985 ; RV64ZVE32F-NEXT: .LBB54_2: # %else2
5986 ; RV64ZVE32F-NEXT: andi a0, a3, 4
5987 ; RV64ZVE32F-NEXT: bnez a0, .LBB54_7
5988 ; RV64ZVE32F-NEXT: .LBB54_3: # %else4
5989 ; RV64ZVE32F-NEXT: andi a3, a3, 8
5990 ; RV64ZVE32F-NEXT: bnez a3, .LBB54_8
5991 ; RV64ZVE32F-NEXT: .LBB54_4: # %else6
5992 ; RV64ZVE32F-NEXT: ret
5993 ; RV64ZVE32F-NEXT: .LBB54_5: # %cond.store
5994 ; RV64ZVE32F-NEXT: ld a0, 0(a0)
5995 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
5996 ; RV64ZVE32F-NEXT: vse16.v v8, (a0)
5997 ; RV64ZVE32F-NEXT: andi a0, a3, 2
5998 ; RV64ZVE32F-NEXT: beqz a0, .LBB54_2
5999 ; RV64ZVE32F-NEXT: .LBB54_6: # %cond.store1
6000 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
6001 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
6002 ; RV64ZVE32F-NEXT: vse16.v v9, (a4)
6003 ; RV64ZVE32F-NEXT: andi a0, a3, 4
6004 ; RV64ZVE32F-NEXT: beqz a0, .LBB54_3
6005 ; RV64ZVE32F-NEXT: .LBB54_7: # %cond.store3
6006 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
6007 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
6008 ; RV64ZVE32F-NEXT: vse16.v v9, (a2)
6009 ; RV64ZVE32F-NEXT: andi a3, a3, 8
6010 ; RV64ZVE32F-NEXT: beqz a3, .LBB54_4
6011 ; RV64ZVE32F-NEXT: .LBB54_8: # %cond.store5
6012 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
6013 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 3
6014 ; RV64ZVE32F-NEXT: vse16.v v8, (a1)
6015 ; RV64ZVE32F-NEXT: ret
6016 call void @llvm.masked.scatter.v4f16.v4p0(<4 x half> %val, <4 x ptr> %ptrs, i32 2, <4 x i1> %m)
6020 define void @mscatter_truemask_v4f16(<4 x half> %val, <4 x ptr> %ptrs) {
6021 ; RV32-LABEL: mscatter_truemask_v4f16:
6023 ; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
6024 ; RV32-NEXT: vsoxei32.v v8, (zero), v9
6027 ; RV64-LABEL: mscatter_truemask_v4f16:
6029 ; RV64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
6030 ; RV64-NEXT: vsoxei64.v v8, (zero), v10
6033 ; RV64ZVE32F-LABEL: mscatter_truemask_v4f16:
6034 ; RV64ZVE32F: # %bb.0:
6035 ; RV64ZVE32F-NEXT: ld a1, 0(a0)
6036 ; RV64ZVE32F-NEXT: ld a2, 24(a0)
6037 ; RV64ZVE32F-NEXT: ld a3, 8(a0)
6038 ; RV64ZVE32F-NEXT: ld a0, 16(a0)
6039 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
6040 ; RV64ZVE32F-NEXT: vse16.v v8, (a1)
6041 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
6042 ; RV64ZVE32F-NEXT: vse16.v v9, (a3)
6043 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
6044 ; RV64ZVE32F-NEXT: vse16.v v9, (a0)
6045 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 3
6046 ; RV64ZVE32F-NEXT: vse16.v v8, (a2)
6047 ; RV64ZVE32F-NEXT: ret
6048 call void @llvm.masked.scatter.v4f16.v4p0(<4 x half> %val, <4 x ptr> %ptrs, i32 2, <4 x i1> splat (i1 1))
6052 define void @mscatter_falsemask_v4f16(<4 x half> %val, <4 x ptr> %ptrs) {
6053 ; CHECK-LABEL: mscatter_falsemask_v4f16:
6056 call void @llvm.masked.scatter.v4f16.v4p0(<4 x half> %val, <4 x ptr> %ptrs, i32 2, <4 x i1> zeroinitializer)
6060 declare void @llvm.masked.scatter.v8f16.v8p0(<8 x half>, <8 x ptr>, i32, <8 x i1>)
6062 define void @mscatter_v8f16(<8 x half> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
6063 ; RV32-LABEL: mscatter_v8f16:
6065 ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
6066 ; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t
6069 ; RV64-LABEL: mscatter_v8f16:
6071 ; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
6072 ; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t
6075 ; RV64ZVE32F-LABEL: mscatter_v8f16:
6076 ; RV64ZVE32F: # %bb.0:
6077 ; RV64ZVE32F-NEXT: ld a1, 56(a0)
6078 ; RV64ZVE32F-NEXT: ld a2, 48(a0)
6079 ; RV64ZVE32F-NEXT: ld a4, 40(a0)
6080 ; RV64ZVE32F-NEXT: ld a5, 32(a0)
6081 ; RV64ZVE32F-NEXT: ld a6, 24(a0)
6082 ; RV64ZVE32F-NEXT: ld a7, 16(a0)
6083 ; RV64ZVE32F-NEXT: ld t0, 8(a0)
6084 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
6085 ; RV64ZVE32F-NEXT: vmv.x.s a3, v0
6086 ; RV64ZVE32F-NEXT: andi t1, a3, 1
6087 ; RV64ZVE32F-NEXT: bnez t1, .LBB57_9
6088 ; RV64ZVE32F-NEXT: # %bb.1: # %else
6089 ; RV64ZVE32F-NEXT: andi a0, a3, 2
6090 ; RV64ZVE32F-NEXT: bnez a0, .LBB57_10
6091 ; RV64ZVE32F-NEXT: .LBB57_2: # %else2
6092 ; RV64ZVE32F-NEXT: andi a0, a3, 4
6093 ; RV64ZVE32F-NEXT: bnez a0, .LBB57_11
6094 ; RV64ZVE32F-NEXT: .LBB57_3: # %else4
6095 ; RV64ZVE32F-NEXT: andi a0, a3, 8
6096 ; RV64ZVE32F-NEXT: bnez a0, .LBB57_12
6097 ; RV64ZVE32F-NEXT: .LBB57_4: # %else6
6098 ; RV64ZVE32F-NEXT: andi a0, a3, 16
6099 ; RV64ZVE32F-NEXT: bnez a0, .LBB57_13
6100 ; RV64ZVE32F-NEXT: .LBB57_5: # %else8
6101 ; RV64ZVE32F-NEXT: andi a0, a3, 32
6102 ; RV64ZVE32F-NEXT: bnez a0, .LBB57_14
6103 ; RV64ZVE32F-NEXT: .LBB57_6: # %else10
6104 ; RV64ZVE32F-NEXT: andi a0, a3, 64
6105 ; RV64ZVE32F-NEXT: bnez a0, .LBB57_15
6106 ; RV64ZVE32F-NEXT: .LBB57_7: # %else12
6107 ; RV64ZVE32F-NEXT: andi a0, a3, -128
6108 ; RV64ZVE32F-NEXT: bnez a0, .LBB57_16
6109 ; RV64ZVE32F-NEXT: .LBB57_8: # %else14
6110 ; RV64ZVE32F-NEXT: ret
6111 ; RV64ZVE32F-NEXT: .LBB57_9: # %cond.store
6112 ; RV64ZVE32F-NEXT: ld a0, 0(a0)
6113 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6114 ; RV64ZVE32F-NEXT: vse16.v v8, (a0)
6115 ; RV64ZVE32F-NEXT: andi a0, a3, 2
6116 ; RV64ZVE32F-NEXT: beqz a0, .LBB57_2
6117 ; RV64ZVE32F-NEXT: .LBB57_10: # %cond.store1
6118 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6119 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
6120 ; RV64ZVE32F-NEXT: vse16.v v9, (t0)
6121 ; RV64ZVE32F-NEXT: andi a0, a3, 4
6122 ; RV64ZVE32F-NEXT: beqz a0, .LBB57_3
6123 ; RV64ZVE32F-NEXT: .LBB57_11: # %cond.store3
6124 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6125 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
6126 ; RV64ZVE32F-NEXT: vse16.v v9, (a7)
6127 ; RV64ZVE32F-NEXT: andi a0, a3, 8
6128 ; RV64ZVE32F-NEXT: beqz a0, .LBB57_4
6129 ; RV64ZVE32F-NEXT: .LBB57_12: # %cond.store5
6130 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6131 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 3
6132 ; RV64ZVE32F-NEXT: vse16.v v9, (a6)
6133 ; RV64ZVE32F-NEXT: andi a0, a3, 16
6134 ; RV64ZVE32F-NEXT: beqz a0, .LBB57_5
6135 ; RV64ZVE32F-NEXT: .LBB57_13: # %cond.store7
6136 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6137 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
6138 ; RV64ZVE32F-NEXT: vse16.v v9, (a5)
6139 ; RV64ZVE32F-NEXT: andi a0, a3, 32
6140 ; RV64ZVE32F-NEXT: beqz a0, .LBB57_6
6141 ; RV64ZVE32F-NEXT: .LBB57_14: # %cond.store9
6142 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6143 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5
6144 ; RV64ZVE32F-NEXT: vse16.v v9, (a4)
6145 ; RV64ZVE32F-NEXT: andi a0, a3, 64
6146 ; RV64ZVE32F-NEXT: beqz a0, .LBB57_7
6147 ; RV64ZVE32F-NEXT: .LBB57_15: # %cond.store11
6148 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6149 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 6
6150 ; RV64ZVE32F-NEXT: vse16.v v9, (a2)
6151 ; RV64ZVE32F-NEXT: andi a0, a3, -128
6152 ; RV64ZVE32F-NEXT: beqz a0, .LBB57_8
6153 ; RV64ZVE32F-NEXT: .LBB57_16: # %cond.store13
6154 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6155 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
6156 ; RV64ZVE32F-NEXT: vse16.v v8, (a1)
6157 ; RV64ZVE32F-NEXT: ret
6158 call void @llvm.masked.scatter.v8f16.v8p0(<8 x half> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> %m)
6162 define void @mscatter_baseidx_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) {
6163 ; RV32-LABEL: mscatter_baseidx_v8i8_v8f16:
6165 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
6166 ; RV32-NEXT: vsext.vf4 v10, v9
6167 ; RV32-NEXT: vadd.vv v10, v10, v10
6168 ; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma
6169 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
6172 ; RV64-LABEL: mscatter_baseidx_v8i8_v8f16:
6174 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
6175 ; RV64-NEXT: vsext.vf8 v12, v9
6176 ; RV64-NEXT: vadd.vv v12, v12, v12
6177 ; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, ma
6178 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
6181 ; RV64ZVE32F-LABEL: mscatter_baseidx_v8i8_v8f16:
6182 ; RV64ZVE32F: # %bb.0:
6183 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
6184 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
6185 ; RV64ZVE32F-NEXT: andi a2, a1, 1
6186 ; RV64ZVE32F-NEXT: beqz a2, .LBB58_2
6187 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
6188 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
6189 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6190 ; RV64ZVE32F-NEXT: add a2, a0, a2
6191 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6192 ; RV64ZVE32F-NEXT: vse16.v v8, (a2)
6193 ; RV64ZVE32F-NEXT: .LBB58_2: # %else
6194 ; RV64ZVE32F-NEXT: andi a2, a1, 2
6195 ; RV64ZVE32F-NEXT: beqz a2, .LBB58_4
6196 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
6197 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
6198 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1
6199 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
6200 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6201 ; RV64ZVE32F-NEXT: add a2, a0, a2
6202 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6203 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
6204 ; RV64ZVE32F-NEXT: vse16.v v10, (a2)
6205 ; RV64ZVE32F-NEXT: .LBB58_4: # %else2
6206 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
6207 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4
6208 ; RV64ZVE32F-NEXT: andi a2, a1, 4
6209 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
6210 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2
6211 ; RV64ZVE32F-NEXT: bnez a2, .LBB58_12
6212 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
6213 ; RV64ZVE32F-NEXT: andi a2, a1, 8
6214 ; RV64ZVE32F-NEXT: bnez a2, .LBB58_13
6215 ; RV64ZVE32F-NEXT: .LBB58_6: # %else6
6216 ; RV64ZVE32F-NEXT: andi a2, a1, 16
6217 ; RV64ZVE32F-NEXT: bnez a2, .LBB58_14
6218 ; RV64ZVE32F-NEXT: .LBB58_7: # %else8
6219 ; RV64ZVE32F-NEXT: andi a2, a1, 32
6220 ; RV64ZVE32F-NEXT: beqz a2, .LBB58_9
6221 ; RV64ZVE32F-NEXT: .LBB58_8: # %cond.store9
6222 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
6223 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 1
6224 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
6225 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6226 ; RV64ZVE32F-NEXT: add a2, a0, a2
6227 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6228 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5
6229 ; RV64ZVE32F-NEXT: vse16.v v9, (a2)
6230 ; RV64ZVE32F-NEXT: .LBB58_9: # %else10
6231 ; RV64ZVE32F-NEXT: andi a2, a1, 64
6232 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
6233 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 2
6234 ; RV64ZVE32F-NEXT: bnez a2, .LBB58_15
6235 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
6236 ; RV64ZVE32F-NEXT: andi a1, a1, -128
6237 ; RV64ZVE32F-NEXT: bnez a1, .LBB58_16
6238 ; RV64ZVE32F-NEXT: .LBB58_11: # %else14
6239 ; RV64ZVE32F-NEXT: ret
6240 ; RV64ZVE32F-NEXT: .LBB58_12: # %cond.store3
6241 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
6242 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6243 ; RV64ZVE32F-NEXT: add a2, a0, a2
6244 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6245 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2
6246 ; RV64ZVE32F-NEXT: vse16.v v11, (a2)
6247 ; RV64ZVE32F-NEXT: andi a2, a1, 8
6248 ; RV64ZVE32F-NEXT: beqz a2, .LBB58_6
6249 ; RV64ZVE32F-NEXT: .LBB58_13: # %cond.store5
6250 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
6251 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
6252 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
6253 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6254 ; RV64ZVE32F-NEXT: add a2, a0, a2
6255 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6256 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 3
6257 ; RV64ZVE32F-NEXT: vse16.v v9, (a2)
6258 ; RV64ZVE32F-NEXT: andi a2, a1, 16
6259 ; RV64ZVE32F-NEXT: beqz a2, .LBB58_7
6260 ; RV64ZVE32F-NEXT: .LBB58_14: # %cond.store7
6261 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
6262 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
6263 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6264 ; RV64ZVE32F-NEXT: add a2, a0, a2
6265 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6266 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
6267 ; RV64ZVE32F-NEXT: vse16.v v9, (a2)
6268 ; RV64ZVE32F-NEXT: andi a2, a1, 32
6269 ; RV64ZVE32F-NEXT: bnez a2, .LBB58_8
6270 ; RV64ZVE32F-NEXT: j .LBB58_9
6271 ; RV64ZVE32F-NEXT: .LBB58_15: # %cond.store11
6272 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
6273 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6274 ; RV64ZVE32F-NEXT: add a2, a0, a2
6275 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6276 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 6
6277 ; RV64ZVE32F-NEXT: vse16.v v10, (a2)
6278 ; RV64ZVE32F-NEXT: andi a1, a1, -128
6279 ; RV64ZVE32F-NEXT: beqz a1, .LBB58_11
6280 ; RV64ZVE32F-NEXT: .LBB58_16: # %cond.store13
6281 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
6282 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
6283 ; RV64ZVE32F-NEXT: vmv.x.s a1, v9
6284 ; RV64ZVE32F-NEXT: slli a1, a1, 1
6285 ; RV64ZVE32F-NEXT: add a0, a0, a1
6286 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6287 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
6288 ; RV64ZVE32F-NEXT: vse16.v v8, (a0)
6289 ; RV64ZVE32F-NEXT: ret
6290 %ptrs = getelementptr inbounds half, ptr %base, <8 x i8> %idxs
6291 call void @llvm.masked.scatter.v8f16.v8p0(<8 x half> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> %m)
6295 define void @mscatter_baseidx_sext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) {
6296 ; RV32-LABEL: mscatter_baseidx_sext_v8i8_v8f16:
6298 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
6299 ; RV32-NEXT: vsext.vf4 v10, v9
6300 ; RV32-NEXT: vadd.vv v10, v10, v10
6301 ; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma
6302 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
6305 ; RV64-LABEL: mscatter_baseidx_sext_v8i8_v8f16:
6307 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
6308 ; RV64-NEXT: vsext.vf8 v12, v9
6309 ; RV64-NEXT: vadd.vv v12, v12, v12
6310 ; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, ma
6311 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
6314 ; RV64ZVE32F-LABEL: mscatter_baseidx_sext_v8i8_v8f16:
6315 ; RV64ZVE32F: # %bb.0:
6316 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
6317 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
6318 ; RV64ZVE32F-NEXT: andi a2, a1, 1
6319 ; RV64ZVE32F-NEXT: beqz a2, .LBB59_2
6320 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
6321 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
6322 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6323 ; RV64ZVE32F-NEXT: add a2, a0, a2
6324 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6325 ; RV64ZVE32F-NEXT: vse16.v v8, (a2)
6326 ; RV64ZVE32F-NEXT: .LBB59_2: # %else
6327 ; RV64ZVE32F-NEXT: andi a2, a1, 2
6328 ; RV64ZVE32F-NEXT: beqz a2, .LBB59_4
6329 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
6330 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
6331 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1
6332 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
6333 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6334 ; RV64ZVE32F-NEXT: add a2, a0, a2
6335 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6336 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
6337 ; RV64ZVE32F-NEXT: vse16.v v10, (a2)
6338 ; RV64ZVE32F-NEXT: .LBB59_4: # %else2
6339 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
6340 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4
6341 ; RV64ZVE32F-NEXT: andi a2, a1, 4
6342 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
6343 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2
6344 ; RV64ZVE32F-NEXT: bnez a2, .LBB59_12
6345 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
6346 ; RV64ZVE32F-NEXT: andi a2, a1, 8
6347 ; RV64ZVE32F-NEXT: bnez a2, .LBB59_13
6348 ; RV64ZVE32F-NEXT: .LBB59_6: # %else6
6349 ; RV64ZVE32F-NEXT: andi a2, a1, 16
6350 ; RV64ZVE32F-NEXT: bnez a2, .LBB59_14
6351 ; RV64ZVE32F-NEXT: .LBB59_7: # %else8
6352 ; RV64ZVE32F-NEXT: andi a2, a1, 32
6353 ; RV64ZVE32F-NEXT: beqz a2, .LBB59_9
6354 ; RV64ZVE32F-NEXT: .LBB59_8: # %cond.store9
6355 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
6356 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 1
6357 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
6358 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6359 ; RV64ZVE32F-NEXT: add a2, a0, a2
6360 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6361 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5
6362 ; RV64ZVE32F-NEXT: vse16.v v9, (a2)
6363 ; RV64ZVE32F-NEXT: .LBB59_9: # %else10
6364 ; RV64ZVE32F-NEXT: andi a2, a1, 64
6365 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
6366 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 2
6367 ; RV64ZVE32F-NEXT: bnez a2, .LBB59_15
6368 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
6369 ; RV64ZVE32F-NEXT: andi a1, a1, -128
6370 ; RV64ZVE32F-NEXT: bnez a1, .LBB59_16
6371 ; RV64ZVE32F-NEXT: .LBB59_11: # %else14
6372 ; RV64ZVE32F-NEXT: ret
6373 ; RV64ZVE32F-NEXT: .LBB59_12: # %cond.store3
6374 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
6375 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6376 ; RV64ZVE32F-NEXT: add a2, a0, a2
6377 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6378 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2
6379 ; RV64ZVE32F-NEXT: vse16.v v11, (a2)
6380 ; RV64ZVE32F-NEXT: andi a2, a1, 8
6381 ; RV64ZVE32F-NEXT: beqz a2, .LBB59_6
6382 ; RV64ZVE32F-NEXT: .LBB59_13: # %cond.store5
6383 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
6384 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
6385 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
6386 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6387 ; RV64ZVE32F-NEXT: add a2, a0, a2
6388 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6389 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 3
6390 ; RV64ZVE32F-NEXT: vse16.v v9, (a2)
6391 ; RV64ZVE32F-NEXT: andi a2, a1, 16
6392 ; RV64ZVE32F-NEXT: beqz a2, .LBB59_7
6393 ; RV64ZVE32F-NEXT: .LBB59_14: # %cond.store7
6394 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
6395 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
6396 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6397 ; RV64ZVE32F-NEXT: add a2, a0, a2
6398 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6399 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
6400 ; RV64ZVE32F-NEXT: vse16.v v9, (a2)
6401 ; RV64ZVE32F-NEXT: andi a2, a1, 32
6402 ; RV64ZVE32F-NEXT: bnez a2, .LBB59_8
6403 ; RV64ZVE32F-NEXT: j .LBB59_9
6404 ; RV64ZVE32F-NEXT: .LBB59_15: # %cond.store11
6405 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
6406 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6407 ; RV64ZVE32F-NEXT: add a2, a0, a2
6408 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6409 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 6
6410 ; RV64ZVE32F-NEXT: vse16.v v10, (a2)
6411 ; RV64ZVE32F-NEXT: andi a1, a1, -128
6412 ; RV64ZVE32F-NEXT: beqz a1, .LBB59_11
6413 ; RV64ZVE32F-NEXT: .LBB59_16: # %cond.store13
6414 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
6415 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
6416 ; RV64ZVE32F-NEXT: vmv.x.s a1, v9
6417 ; RV64ZVE32F-NEXT: slli a1, a1, 1
6418 ; RV64ZVE32F-NEXT: add a0, a0, a1
6419 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6420 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
6421 ; RV64ZVE32F-NEXT: vse16.v v8, (a0)
6422 ; RV64ZVE32F-NEXT: ret
6423 %eidxs = sext <8 x i8> %idxs to <8 x i16>
6424 %ptrs = getelementptr inbounds half, ptr %base, <8 x i16> %eidxs
6425 call void @llvm.masked.scatter.v8f16.v8p0(<8 x half> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> %m)
6429 define void @mscatter_baseidx_zext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) {
6430 ; RV32-LABEL: mscatter_baseidx_zext_v8i8_v8f16:
6432 ; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
6433 ; RV32-NEXT: vwaddu.vv v10, v9, v9
6434 ; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma
6435 ; RV32-NEXT: vsoxei16.v v8, (a0), v10, v0.t
6438 ; RV64-LABEL: mscatter_baseidx_zext_v8i8_v8f16:
6440 ; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
6441 ; RV64-NEXT: vwaddu.vv v10, v9, v9
6442 ; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, ma
6443 ; RV64-NEXT: vsoxei16.v v8, (a0), v10, v0.t
6446 ; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i8_v8f16:
6447 ; RV64ZVE32F: # %bb.0:
6448 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
6449 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
6450 ; RV64ZVE32F-NEXT: andi a2, a1, 1
6451 ; RV64ZVE32F-NEXT: beqz a2, .LBB60_2
6452 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
6453 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
6454 ; RV64ZVE32F-NEXT: andi a2, a2, 255
6455 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6456 ; RV64ZVE32F-NEXT: add a2, a0, a2
6457 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6458 ; RV64ZVE32F-NEXT: vse16.v v8, (a2)
6459 ; RV64ZVE32F-NEXT: .LBB60_2: # %else
6460 ; RV64ZVE32F-NEXT: andi a2, a1, 2
6461 ; RV64ZVE32F-NEXT: beqz a2, .LBB60_4
6462 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
6463 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
6464 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1
6465 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
6466 ; RV64ZVE32F-NEXT: andi a2, a2, 255
6467 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6468 ; RV64ZVE32F-NEXT: add a2, a0, a2
6469 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6470 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
6471 ; RV64ZVE32F-NEXT: vse16.v v10, (a2)
6472 ; RV64ZVE32F-NEXT: .LBB60_4: # %else2
6473 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
6474 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4
6475 ; RV64ZVE32F-NEXT: andi a2, a1, 4
6476 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
6477 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2
6478 ; RV64ZVE32F-NEXT: bnez a2, .LBB60_12
6479 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
6480 ; RV64ZVE32F-NEXT: andi a2, a1, 8
6481 ; RV64ZVE32F-NEXT: bnez a2, .LBB60_13
6482 ; RV64ZVE32F-NEXT: .LBB60_6: # %else6
6483 ; RV64ZVE32F-NEXT: andi a2, a1, 16
6484 ; RV64ZVE32F-NEXT: bnez a2, .LBB60_14
6485 ; RV64ZVE32F-NEXT: .LBB60_7: # %else8
6486 ; RV64ZVE32F-NEXT: andi a2, a1, 32
6487 ; RV64ZVE32F-NEXT: beqz a2, .LBB60_9
6488 ; RV64ZVE32F-NEXT: .LBB60_8: # %cond.store9
6489 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
6490 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 1
6491 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
6492 ; RV64ZVE32F-NEXT: andi a2, a2, 255
6493 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6494 ; RV64ZVE32F-NEXT: add a2, a0, a2
6495 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6496 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5
6497 ; RV64ZVE32F-NEXT: vse16.v v9, (a2)
6498 ; RV64ZVE32F-NEXT: .LBB60_9: # %else10
6499 ; RV64ZVE32F-NEXT: andi a2, a1, 64
6500 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
6501 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 2
6502 ; RV64ZVE32F-NEXT: bnez a2, .LBB60_15
6503 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
6504 ; RV64ZVE32F-NEXT: andi a1, a1, -128
6505 ; RV64ZVE32F-NEXT: bnez a1, .LBB60_16
6506 ; RV64ZVE32F-NEXT: .LBB60_11: # %else14
6507 ; RV64ZVE32F-NEXT: ret
6508 ; RV64ZVE32F-NEXT: .LBB60_12: # %cond.store3
6509 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
6510 ; RV64ZVE32F-NEXT: andi a2, a2, 255
6511 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6512 ; RV64ZVE32F-NEXT: add a2, a0, a2
6513 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6514 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2
6515 ; RV64ZVE32F-NEXT: vse16.v v11, (a2)
6516 ; RV64ZVE32F-NEXT: andi a2, a1, 8
6517 ; RV64ZVE32F-NEXT: beqz a2, .LBB60_6
6518 ; RV64ZVE32F-NEXT: .LBB60_13: # %cond.store5
6519 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
6520 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
6521 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
6522 ; RV64ZVE32F-NEXT: andi a2, a2, 255
6523 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6524 ; RV64ZVE32F-NEXT: add a2, a0, a2
6525 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6526 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 3
6527 ; RV64ZVE32F-NEXT: vse16.v v9, (a2)
6528 ; RV64ZVE32F-NEXT: andi a2, a1, 16
6529 ; RV64ZVE32F-NEXT: beqz a2, .LBB60_7
6530 ; RV64ZVE32F-NEXT: .LBB60_14: # %cond.store7
6531 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
6532 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
6533 ; RV64ZVE32F-NEXT: andi a2, a2, 255
6534 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6535 ; RV64ZVE32F-NEXT: add a2, a0, a2
6536 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6537 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
6538 ; RV64ZVE32F-NEXT: vse16.v v9, (a2)
6539 ; RV64ZVE32F-NEXT: andi a2, a1, 32
6540 ; RV64ZVE32F-NEXT: bnez a2, .LBB60_8
6541 ; RV64ZVE32F-NEXT: j .LBB60_9
6542 ; RV64ZVE32F-NEXT: .LBB60_15: # %cond.store11
6543 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
6544 ; RV64ZVE32F-NEXT: andi a2, a2, 255
6545 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6546 ; RV64ZVE32F-NEXT: add a2, a0, a2
6547 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6548 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 6
6549 ; RV64ZVE32F-NEXT: vse16.v v10, (a2)
6550 ; RV64ZVE32F-NEXT: andi a1, a1, -128
6551 ; RV64ZVE32F-NEXT: beqz a1, .LBB60_11
6552 ; RV64ZVE32F-NEXT: .LBB60_16: # %cond.store13
6553 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
6554 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
6555 ; RV64ZVE32F-NEXT: vmv.x.s a1, v9
6556 ; RV64ZVE32F-NEXT: andi a1, a1, 255
6557 ; RV64ZVE32F-NEXT: slli a1, a1, 1
6558 ; RV64ZVE32F-NEXT: add a0, a0, a1
6559 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6560 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
6561 ; RV64ZVE32F-NEXT: vse16.v v8, (a0)
6562 ; RV64ZVE32F-NEXT: ret
6563 %eidxs = zext <8 x i8> %idxs to <8 x i16>
6564 %ptrs = getelementptr inbounds half, ptr %base, <8 x i16> %eidxs
6565 call void @llvm.masked.scatter.v8f16.v8p0(<8 x half> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> %m)
6569 define void @mscatter_baseidx_v8f16(<8 x half> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m) {
6570 ; RV32-LABEL: mscatter_baseidx_v8f16:
6572 ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
6573 ; RV32-NEXT: vwadd.vv v10, v9, v9
6574 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
6577 ; RV64-LABEL: mscatter_baseidx_v8f16:
6579 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
6580 ; RV64-NEXT: vsext.vf4 v12, v9
6581 ; RV64-NEXT: vadd.vv v12, v12, v12
6582 ; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, ma
6583 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
6586 ; RV64ZVE32F-LABEL: mscatter_baseidx_v8f16:
6587 ; RV64ZVE32F: # %bb.0:
6588 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
6589 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
6590 ; RV64ZVE32F-NEXT: andi a2, a1, 1
6591 ; RV64ZVE32F-NEXT: beqz a2, .LBB61_2
6592 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
6593 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6594 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
6595 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6596 ; RV64ZVE32F-NEXT: add a2, a0, a2
6597 ; RV64ZVE32F-NEXT: vse16.v v8, (a2)
6598 ; RV64ZVE32F-NEXT: .LBB61_2: # %else
6599 ; RV64ZVE32F-NEXT: andi a2, a1, 2
6600 ; RV64ZVE32F-NEXT: beqz a2, .LBB61_4
6601 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
6602 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
6603 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1
6604 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
6605 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6606 ; RV64ZVE32F-NEXT: add a2, a0, a2
6607 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6608 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
6609 ; RV64ZVE32F-NEXT: vse16.v v10, (a2)
6610 ; RV64ZVE32F-NEXT: .LBB61_4: # %else2
6611 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
6612 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4
6613 ; RV64ZVE32F-NEXT: andi a2, a1, 4
6614 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
6615 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2
6616 ; RV64ZVE32F-NEXT: bnez a2, .LBB61_12
6617 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
6618 ; RV64ZVE32F-NEXT: andi a2, a1, 8
6619 ; RV64ZVE32F-NEXT: bnez a2, .LBB61_13
6620 ; RV64ZVE32F-NEXT: .LBB61_6: # %else6
6621 ; RV64ZVE32F-NEXT: andi a2, a1, 16
6622 ; RV64ZVE32F-NEXT: bnez a2, .LBB61_14
6623 ; RV64ZVE32F-NEXT: .LBB61_7: # %else8
6624 ; RV64ZVE32F-NEXT: andi a2, a1, 32
6625 ; RV64ZVE32F-NEXT: beqz a2, .LBB61_9
6626 ; RV64ZVE32F-NEXT: .LBB61_8: # %cond.store9
6627 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
6628 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 1
6629 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
6630 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6631 ; RV64ZVE32F-NEXT: add a2, a0, a2
6632 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6633 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5
6634 ; RV64ZVE32F-NEXT: vse16.v v9, (a2)
6635 ; RV64ZVE32F-NEXT: .LBB61_9: # %else10
6636 ; RV64ZVE32F-NEXT: andi a2, a1, 64
6637 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
6638 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 2
6639 ; RV64ZVE32F-NEXT: bnez a2, .LBB61_15
6640 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
6641 ; RV64ZVE32F-NEXT: andi a1, a1, -128
6642 ; RV64ZVE32F-NEXT: bnez a1, .LBB61_16
6643 ; RV64ZVE32F-NEXT: .LBB61_11: # %else14
6644 ; RV64ZVE32F-NEXT: ret
6645 ; RV64ZVE32F-NEXT: .LBB61_12: # %cond.store3
6646 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
6647 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6648 ; RV64ZVE32F-NEXT: add a2, a0, a2
6649 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6650 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2
6651 ; RV64ZVE32F-NEXT: vse16.v v11, (a2)
6652 ; RV64ZVE32F-NEXT: andi a2, a1, 8
6653 ; RV64ZVE32F-NEXT: beqz a2, .LBB61_6
6654 ; RV64ZVE32F-NEXT: .LBB61_13: # %cond.store5
6655 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
6656 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
6657 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
6658 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6659 ; RV64ZVE32F-NEXT: add a2, a0, a2
6660 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6661 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 3
6662 ; RV64ZVE32F-NEXT: vse16.v v9, (a2)
6663 ; RV64ZVE32F-NEXT: andi a2, a1, 16
6664 ; RV64ZVE32F-NEXT: beqz a2, .LBB61_7
6665 ; RV64ZVE32F-NEXT: .LBB61_14: # %cond.store7
6666 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6667 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
6668 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6669 ; RV64ZVE32F-NEXT: add a2, a0, a2
6670 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
6671 ; RV64ZVE32F-NEXT: vse16.v v9, (a2)
6672 ; RV64ZVE32F-NEXT: andi a2, a1, 32
6673 ; RV64ZVE32F-NEXT: bnez a2, .LBB61_8
6674 ; RV64ZVE32F-NEXT: j .LBB61_9
6675 ; RV64ZVE32F-NEXT: .LBB61_15: # %cond.store11
6676 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
6677 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6678 ; RV64ZVE32F-NEXT: add a2, a0, a2
6679 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6680 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 6
6681 ; RV64ZVE32F-NEXT: vse16.v v10, (a2)
6682 ; RV64ZVE32F-NEXT: andi a1, a1, -128
6683 ; RV64ZVE32F-NEXT: beqz a1, .LBB61_11
6684 ; RV64ZVE32F-NEXT: .LBB61_16: # %cond.store13
6685 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
6686 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
6687 ; RV64ZVE32F-NEXT: vmv.x.s a1, v9
6688 ; RV64ZVE32F-NEXT: slli a1, a1, 1
6689 ; RV64ZVE32F-NEXT: add a0, a0, a1
6690 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6691 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
6692 ; RV64ZVE32F-NEXT: vse16.v v8, (a0)
6693 ; RV64ZVE32F-NEXT: ret
6694 %ptrs = getelementptr inbounds half, ptr %base, <8 x i16> %idxs
6695 call void @llvm.masked.scatter.v8f16.v8p0(<8 x half> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> %m)
6699 declare void @llvm.masked.scatter.v1f32.v1p0(<1 x float>, <1 x ptr>, i32, <1 x i1>)
6701 define void @mscatter_v1f32(<1 x float> %val, <1 x ptr> %ptrs, <1 x i1> %m) {
6702 ; RV32V-LABEL: mscatter_v1f32:
6704 ; RV32V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
6705 ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t
6708 ; RV64-LABEL: mscatter_v1f32:
6710 ; RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
6711 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
6714 ; RV32ZVE32F-LABEL: mscatter_v1f32:
6715 ; RV32ZVE32F: # %bb.0:
6716 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
6717 ; RV32ZVE32F-NEXT: vsoxei32.v v8, (zero), v9, v0.t
6718 ; RV32ZVE32F-NEXT: ret
6720 ; RV64ZVE32F-LABEL: mscatter_v1f32:
6721 ; RV64ZVE32F: # %bb.0:
6722 ; RV64ZVE32F-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
6723 ; RV64ZVE32F-NEXT: vfirst.m a1, v0
6724 ; RV64ZVE32F-NEXT: bnez a1, .LBB62_2
6725 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
6726 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
6727 ; RV64ZVE32F-NEXT: vse32.v v8, (a0)
6728 ; RV64ZVE32F-NEXT: .LBB62_2: # %else
6729 ; RV64ZVE32F-NEXT: ret
6730 call void @llvm.masked.scatter.v1f32.v1p0(<1 x float> %val, <1 x ptr> %ptrs, i32 4, <1 x i1> %m)
6734 declare void @llvm.masked.scatter.v2f32.v2p0(<2 x float>, <2 x ptr>, i32, <2 x i1>)
6736 define void @mscatter_v2f32(<2 x float> %val, <2 x ptr> %ptrs, <2 x i1> %m) {
6737 ; RV32V-LABEL: mscatter_v2f32:
6739 ; RV32V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
6740 ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t
6743 ; RV64-LABEL: mscatter_v2f32:
6745 ; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
6746 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
6749 ; RV32ZVE32F-LABEL: mscatter_v2f32:
6750 ; RV32ZVE32F: # %bb.0:
6751 ; RV32ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
6752 ; RV32ZVE32F-NEXT: vsoxei32.v v8, (zero), v9, v0.t
6753 ; RV32ZVE32F-NEXT: ret
6755 ; RV64ZVE32F-LABEL: mscatter_v2f32:
6756 ; RV64ZVE32F: # %bb.0:
6757 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
6758 ; RV64ZVE32F-NEXT: vmv.x.s a2, v0
6759 ; RV64ZVE32F-NEXT: andi a3, a2, 1
6760 ; RV64ZVE32F-NEXT: bnez a3, .LBB63_3
6761 ; RV64ZVE32F-NEXT: # %bb.1: # %else
6762 ; RV64ZVE32F-NEXT: andi a2, a2, 2
6763 ; RV64ZVE32F-NEXT: bnez a2, .LBB63_4
6764 ; RV64ZVE32F-NEXT: .LBB63_2: # %else2
6765 ; RV64ZVE32F-NEXT: ret
6766 ; RV64ZVE32F-NEXT: .LBB63_3: # %cond.store
6767 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
6768 ; RV64ZVE32F-NEXT: vse32.v v8, (a0)
6769 ; RV64ZVE32F-NEXT: andi a2, a2, 2
6770 ; RV64ZVE32F-NEXT: beqz a2, .LBB63_2
6771 ; RV64ZVE32F-NEXT: .LBB63_4: # %cond.store1
6772 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
6773 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
6774 ; RV64ZVE32F-NEXT: vse32.v v8, (a1)
6775 ; RV64ZVE32F-NEXT: ret
6776 call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> %val, <2 x ptr> %ptrs, i32 4, <2 x i1> %m)
6780 declare void @llvm.masked.scatter.v4f32.v4p0(<4 x float>, <4 x ptr>, i32, <4 x i1>)
6782 define void @mscatter_v4f32(<4 x float> %val, <4 x ptr> %ptrs, <4 x i1> %m) {
6783 ; RV32-LABEL: mscatter_v4f32:
6785 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
6786 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
6789 ; RV64-LABEL: mscatter_v4f32:
6791 ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
6792 ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
6795 ; RV64ZVE32F-LABEL: mscatter_v4f32:
6796 ; RV64ZVE32F: # %bb.0:
6797 ; RV64ZVE32F-NEXT: ld a1, 24(a0)
6798 ; RV64ZVE32F-NEXT: ld a2, 16(a0)
6799 ; RV64ZVE32F-NEXT: ld a4, 8(a0)
6800 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
6801 ; RV64ZVE32F-NEXT: vmv.x.s a3, v0
6802 ; RV64ZVE32F-NEXT: andi a5, a3, 1
6803 ; RV64ZVE32F-NEXT: bnez a5, .LBB64_5
6804 ; RV64ZVE32F-NEXT: # %bb.1: # %else
6805 ; RV64ZVE32F-NEXT: andi a0, a3, 2
6806 ; RV64ZVE32F-NEXT: bnez a0, .LBB64_6
6807 ; RV64ZVE32F-NEXT: .LBB64_2: # %else2
6808 ; RV64ZVE32F-NEXT: andi a0, a3, 4
6809 ; RV64ZVE32F-NEXT: bnez a0, .LBB64_7
6810 ; RV64ZVE32F-NEXT: .LBB64_3: # %else4
6811 ; RV64ZVE32F-NEXT: andi a3, a3, 8
6812 ; RV64ZVE32F-NEXT: bnez a3, .LBB64_8
6813 ; RV64ZVE32F-NEXT: .LBB64_4: # %else6
6814 ; RV64ZVE32F-NEXT: ret
6815 ; RV64ZVE32F-NEXT: .LBB64_5: # %cond.store
6816 ; RV64ZVE32F-NEXT: ld a0, 0(a0)
6817 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
6818 ; RV64ZVE32F-NEXT: vse32.v v8, (a0)
6819 ; RV64ZVE32F-NEXT: andi a0, a3, 2
6820 ; RV64ZVE32F-NEXT: beqz a0, .LBB64_2
6821 ; RV64ZVE32F-NEXT: .LBB64_6: # %cond.store1
6822 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
6823 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
6824 ; RV64ZVE32F-NEXT: vse32.v v9, (a4)
6825 ; RV64ZVE32F-NEXT: andi a0, a3, 4
6826 ; RV64ZVE32F-NEXT: beqz a0, .LBB64_3
6827 ; RV64ZVE32F-NEXT: .LBB64_7: # %cond.store3
6828 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
6829 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
6830 ; RV64ZVE32F-NEXT: vse32.v v9, (a2)
6831 ; RV64ZVE32F-NEXT: andi a3, a3, 8
6832 ; RV64ZVE32F-NEXT: beqz a3, .LBB64_4
6833 ; RV64ZVE32F-NEXT: .LBB64_8: # %cond.store5
6834 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
6835 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 3
6836 ; RV64ZVE32F-NEXT: vse32.v v8, (a1)
6837 ; RV64ZVE32F-NEXT: ret
6838 call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> %val, <4 x ptr> %ptrs, i32 4, <4 x i1> %m)
6842 define void @mscatter_truemask_v4f32(<4 x float> %val, <4 x ptr> %ptrs) {
6843 ; RV32-LABEL: mscatter_truemask_v4f32:
6845 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
6846 ; RV32-NEXT: vsoxei32.v v8, (zero), v9
6849 ; RV64-LABEL: mscatter_truemask_v4f32:
6851 ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
6852 ; RV64-NEXT: vsoxei64.v v8, (zero), v10
6855 ; RV64ZVE32F-LABEL: mscatter_truemask_v4f32:
6856 ; RV64ZVE32F: # %bb.0:
6857 ; RV64ZVE32F-NEXT: ld a1, 0(a0)
6858 ; RV64ZVE32F-NEXT: ld a2, 24(a0)
6859 ; RV64ZVE32F-NEXT: ld a3, 8(a0)
6860 ; RV64ZVE32F-NEXT: ld a0, 16(a0)
6861 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
6862 ; RV64ZVE32F-NEXT: vse32.v v8, (a1)
6863 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
6864 ; RV64ZVE32F-NEXT: vse32.v v9, (a3)
6865 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
6866 ; RV64ZVE32F-NEXT: vse32.v v9, (a0)
6867 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 3
6868 ; RV64ZVE32F-NEXT: vse32.v v8, (a2)
6869 ; RV64ZVE32F-NEXT: ret
6870 call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> %val, <4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 1))
6874 define void @mscatter_falsemask_v4f32(<4 x float> %val, <4 x ptr> %ptrs) {
6875 ; CHECK-LABEL: mscatter_falsemask_v4f32:
6878 call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> %val, <4 x ptr> %ptrs, i32 4, <4 x i1> zeroinitializer)
6882 declare void @llvm.masked.scatter.v8f32.v8p0(<8 x float>, <8 x ptr>, i32, <8 x i1>)
6884 define void @mscatter_v8f32(<8 x float> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
6885 ; RV32-LABEL: mscatter_v8f32:
6887 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
6888 ; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t
6891 ; RV64-LABEL: mscatter_v8f32:
6893 ; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma
6894 ; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t
6897 ; RV64ZVE32F-LABEL: mscatter_v8f32:
6898 ; RV64ZVE32F: # %bb.0:
6899 ; RV64ZVE32F-NEXT: ld a1, 56(a0)
6900 ; RV64ZVE32F-NEXT: ld a2, 48(a0)
6901 ; RV64ZVE32F-NEXT: ld a4, 40(a0)
6902 ; RV64ZVE32F-NEXT: ld a5, 32(a0)
6903 ; RV64ZVE32F-NEXT: ld a6, 24(a0)
6904 ; RV64ZVE32F-NEXT: ld a7, 16(a0)
6905 ; RV64ZVE32F-NEXT: ld t0, 8(a0)
6906 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
6907 ; RV64ZVE32F-NEXT: vmv.x.s a3, v0
6908 ; RV64ZVE32F-NEXT: andi t1, a3, 1
6909 ; RV64ZVE32F-NEXT: bnez t1, .LBB67_9
6910 ; RV64ZVE32F-NEXT: # %bb.1: # %else
6911 ; RV64ZVE32F-NEXT: andi a0, a3, 2
6912 ; RV64ZVE32F-NEXT: bnez a0, .LBB67_10
6913 ; RV64ZVE32F-NEXT: .LBB67_2: # %else2
6914 ; RV64ZVE32F-NEXT: andi a0, a3, 4
6915 ; RV64ZVE32F-NEXT: bnez a0, .LBB67_11
6916 ; RV64ZVE32F-NEXT: .LBB67_3: # %else4
6917 ; RV64ZVE32F-NEXT: andi a0, a3, 8
6918 ; RV64ZVE32F-NEXT: bnez a0, .LBB67_12
6919 ; RV64ZVE32F-NEXT: .LBB67_4: # %else6
6920 ; RV64ZVE32F-NEXT: andi a0, a3, 16
6921 ; RV64ZVE32F-NEXT: bnez a0, .LBB67_13
6922 ; RV64ZVE32F-NEXT: .LBB67_5: # %else8
6923 ; RV64ZVE32F-NEXT: andi a0, a3, 32
6924 ; RV64ZVE32F-NEXT: bnez a0, .LBB67_14
6925 ; RV64ZVE32F-NEXT: .LBB67_6: # %else10
6926 ; RV64ZVE32F-NEXT: andi a0, a3, 64
6927 ; RV64ZVE32F-NEXT: bnez a0, .LBB67_15
6928 ; RV64ZVE32F-NEXT: .LBB67_7: # %else12
6929 ; RV64ZVE32F-NEXT: andi a0, a3, -128
6930 ; RV64ZVE32F-NEXT: bnez a0, .LBB67_16
6931 ; RV64ZVE32F-NEXT: .LBB67_8: # %else14
6932 ; RV64ZVE32F-NEXT: ret
6933 ; RV64ZVE32F-NEXT: .LBB67_9: # %cond.store
6934 ; RV64ZVE32F-NEXT: ld a0, 0(a0)
6935 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
6936 ; RV64ZVE32F-NEXT: vse32.v v8, (a0)
6937 ; RV64ZVE32F-NEXT: andi a0, a3, 2
6938 ; RV64ZVE32F-NEXT: beqz a0, .LBB67_2
6939 ; RV64ZVE32F-NEXT: .LBB67_10: # %cond.store1
6940 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
6941 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
6942 ; RV64ZVE32F-NEXT: vse32.v v10, (t0)
6943 ; RV64ZVE32F-NEXT: andi a0, a3, 4
6944 ; RV64ZVE32F-NEXT: beqz a0, .LBB67_3
6945 ; RV64ZVE32F-NEXT: .LBB67_11: # %cond.store3
6946 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
6947 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 2
6948 ; RV64ZVE32F-NEXT: vse32.v v10, (a7)
6949 ; RV64ZVE32F-NEXT: andi a0, a3, 8
6950 ; RV64ZVE32F-NEXT: beqz a0, .LBB67_4
6951 ; RV64ZVE32F-NEXT: .LBB67_12: # %cond.store5
6952 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
6953 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
6954 ; RV64ZVE32F-NEXT: vse32.v v10, (a6)
6955 ; RV64ZVE32F-NEXT: andi a0, a3, 16
6956 ; RV64ZVE32F-NEXT: beqz a0, .LBB67_5
6957 ; RV64ZVE32F-NEXT: .LBB67_13: # %cond.store7
6958 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
6959 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
6960 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
6961 ; RV64ZVE32F-NEXT: vse32.v v10, (a5)
6962 ; RV64ZVE32F-NEXT: andi a0, a3, 32
6963 ; RV64ZVE32F-NEXT: beqz a0, .LBB67_6
6964 ; RV64ZVE32F-NEXT: .LBB67_14: # %cond.store9
6965 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
6966 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 5
6967 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
6968 ; RV64ZVE32F-NEXT: vse32.v v10, (a4)
6969 ; RV64ZVE32F-NEXT: andi a0, a3, 64
6970 ; RV64ZVE32F-NEXT: beqz a0, .LBB67_7
6971 ; RV64ZVE32F-NEXT: .LBB67_15: # %cond.store11
6972 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
6973 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 6
6974 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
6975 ; RV64ZVE32F-NEXT: vse32.v v10, (a2)
6976 ; RV64ZVE32F-NEXT: andi a0, a3, -128
6977 ; RV64ZVE32F-NEXT: beqz a0, .LBB67_8
6978 ; RV64ZVE32F-NEXT: .LBB67_16: # %cond.store13
6979 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
6980 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
6981 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
6982 ; RV64ZVE32F-NEXT: vse32.v v8, (a1)
6983 ; RV64ZVE32F-NEXT: ret
6984 call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> %val, <8 x ptr> %ptrs, i32 4, <8 x i1> %m)
6988 define void @mscatter_baseidx_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) {
6989 ; RV32-LABEL: mscatter_baseidx_v8i8_v8f32:
6991 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
6992 ; RV32-NEXT: vsext.vf4 v12, v10
6993 ; RV32-NEXT: vsll.vi v10, v12, 2
6994 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
6997 ; RV64-LABEL: mscatter_baseidx_v8i8_v8f32:
6999 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
7000 ; RV64-NEXT: vsext.vf8 v12, v10
7001 ; RV64-NEXT: vsll.vi v12, v12, 2
7002 ; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
7003 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
7006 ; RV64ZVE32F-LABEL: mscatter_baseidx_v8i8_v8f32:
7007 ; RV64ZVE32F: # %bb.0:
7008 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
7009 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
7010 ; RV64ZVE32F-NEXT: andi a2, a1, 1
7011 ; RV64ZVE32F-NEXT: beqz a2, .LBB68_2
7012 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
7013 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
7014 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7015 ; RV64ZVE32F-NEXT: add a2, a0, a2
7016 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7017 ; RV64ZVE32F-NEXT: vse32.v v8, (a2)
7018 ; RV64ZVE32F-NEXT: .LBB68_2: # %else
7019 ; RV64ZVE32F-NEXT: andi a2, a1, 2
7020 ; RV64ZVE32F-NEXT: beqz a2, .LBB68_4
7021 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
7022 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
7023 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1
7024 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
7025 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7026 ; RV64ZVE32F-NEXT: add a2, a0, a2
7027 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
7028 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 1
7029 ; RV64ZVE32F-NEXT: vse32.v v11, (a2)
7030 ; RV64ZVE32F-NEXT: .LBB68_4: # %else2
7031 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
7032 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4
7033 ; RV64ZVE32F-NEXT: andi a2, a1, 4
7034 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
7035 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
7036 ; RV64ZVE32F-NEXT: bnez a2, .LBB68_12
7037 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
7038 ; RV64ZVE32F-NEXT: andi a2, a1, 8
7039 ; RV64ZVE32F-NEXT: bnez a2, .LBB68_13
7040 ; RV64ZVE32F-NEXT: .LBB68_6: # %else6
7041 ; RV64ZVE32F-NEXT: andi a2, a1, 16
7042 ; RV64ZVE32F-NEXT: bnez a2, .LBB68_14
7043 ; RV64ZVE32F-NEXT: .LBB68_7: # %else8
7044 ; RV64ZVE32F-NEXT: andi a2, a1, 32
7045 ; RV64ZVE32F-NEXT: beqz a2, .LBB68_9
7046 ; RV64ZVE32F-NEXT: .LBB68_8: # %cond.store9
7047 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
7048 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 1
7049 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
7050 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7051 ; RV64ZVE32F-NEXT: add a2, a0, a2
7052 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
7053 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5
7054 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7055 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
7056 ; RV64ZVE32F-NEXT: .LBB68_9: # %else10
7057 ; RV64ZVE32F-NEXT: andi a2, a1, 64
7058 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
7059 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2
7060 ; RV64ZVE32F-NEXT: bnez a2, .LBB68_15
7061 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
7062 ; RV64ZVE32F-NEXT: andi a1, a1, -128
7063 ; RV64ZVE32F-NEXT: bnez a1, .LBB68_16
7064 ; RV64ZVE32F-NEXT: .LBB68_11: # %else14
7065 ; RV64ZVE32F-NEXT: ret
7066 ; RV64ZVE32F-NEXT: .LBB68_12: # %cond.store3
7067 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
7068 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7069 ; RV64ZVE32F-NEXT: add a2, a0, a2
7070 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
7071 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2
7072 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7073 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
7074 ; RV64ZVE32F-NEXT: andi a2, a1, 8
7075 ; RV64ZVE32F-NEXT: beqz a2, .LBB68_6
7076 ; RV64ZVE32F-NEXT: .LBB68_13: # %cond.store5
7077 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
7078 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
7079 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
7080 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7081 ; RV64ZVE32F-NEXT: add a2, a0, a2
7082 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
7083 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
7084 ; RV64ZVE32F-NEXT: vse32.v v10, (a2)
7085 ; RV64ZVE32F-NEXT: andi a2, a1, 16
7086 ; RV64ZVE32F-NEXT: beqz a2, .LBB68_7
7087 ; RV64ZVE32F-NEXT: .LBB68_14: # %cond.store7
7088 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
7089 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
7090 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7091 ; RV64ZVE32F-NEXT: add a2, a0, a2
7092 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
7093 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
7094 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7095 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
7096 ; RV64ZVE32F-NEXT: andi a2, a1, 32
7097 ; RV64ZVE32F-NEXT: bnez a2, .LBB68_8
7098 ; RV64ZVE32F-NEXT: j .LBB68_9
7099 ; RV64ZVE32F-NEXT: .LBB68_15: # %cond.store11
7100 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
7101 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7102 ; RV64ZVE32F-NEXT: add a2, a0, a2
7103 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
7104 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6
7105 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7106 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
7107 ; RV64ZVE32F-NEXT: andi a1, a1, -128
7108 ; RV64ZVE32F-NEXT: beqz a1, .LBB68_11
7109 ; RV64ZVE32F-NEXT: .LBB68_16: # %cond.store13
7110 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
7111 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
7112 ; RV64ZVE32F-NEXT: vmv.x.s a1, v10
7113 ; RV64ZVE32F-NEXT: slli a1, a1, 2
7114 ; RV64ZVE32F-NEXT: add a0, a0, a1
7115 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
7116 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
7117 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7118 ; RV64ZVE32F-NEXT: vse32.v v8, (a0)
7119 ; RV64ZVE32F-NEXT: ret
7120 %ptrs = getelementptr inbounds float, ptr %base, <8 x i8> %idxs
7121 call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> %val, <8 x ptr> %ptrs, i32 4, <8 x i1> %m)
7125 define void @mscatter_baseidx_sext_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) {
7126 ; RV32-LABEL: mscatter_baseidx_sext_v8i8_v8f32:
7128 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
7129 ; RV32-NEXT: vsext.vf4 v12, v10
7130 ; RV32-NEXT: vsll.vi v10, v12, 2
7131 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
7134 ; RV64-LABEL: mscatter_baseidx_sext_v8i8_v8f32:
7136 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
7137 ; RV64-NEXT: vsext.vf8 v12, v10
7138 ; RV64-NEXT: vsll.vi v12, v12, 2
7139 ; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
7140 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
7143 ; RV64ZVE32F-LABEL: mscatter_baseidx_sext_v8i8_v8f32:
7144 ; RV64ZVE32F: # %bb.0:
7145 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
7146 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
7147 ; RV64ZVE32F-NEXT: andi a2, a1, 1
7148 ; RV64ZVE32F-NEXT: beqz a2, .LBB69_2
7149 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
7150 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
7151 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7152 ; RV64ZVE32F-NEXT: add a2, a0, a2
7153 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7154 ; RV64ZVE32F-NEXT: vse32.v v8, (a2)
7155 ; RV64ZVE32F-NEXT: .LBB69_2: # %else
7156 ; RV64ZVE32F-NEXT: andi a2, a1, 2
7157 ; RV64ZVE32F-NEXT: beqz a2, .LBB69_4
7158 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
7159 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
7160 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1
7161 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
7162 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7163 ; RV64ZVE32F-NEXT: add a2, a0, a2
7164 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
7165 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 1
7166 ; RV64ZVE32F-NEXT: vse32.v v11, (a2)
7167 ; RV64ZVE32F-NEXT: .LBB69_4: # %else2
7168 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
7169 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4
7170 ; RV64ZVE32F-NEXT: andi a2, a1, 4
7171 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
7172 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
7173 ; RV64ZVE32F-NEXT: bnez a2, .LBB69_12
7174 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
7175 ; RV64ZVE32F-NEXT: andi a2, a1, 8
7176 ; RV64ZVE32F-NEXT: bnez a2, .LBB69_13
7177 ; RV64ZVE32F-NEXT: .LBB69_6: # %else6
7178 ; RV64ZVE32F-NEXT: andi a2, a1, 16
7179 ; RV64ZVE32F-NEXT: bnez a2, .LBB69_14
7180 ; RV64ZVE32F-NEXT: .LBB69_7: # %else8
7181 ; RV64ZVE32F-NEXT: andi a2, a1, 32
7182 ; RV64ZVE32F-NEXT: beqz a2, .LBB69_9
7183 ; RV64ZVE32F-NEXT: .LBB69_8: # %cond.store9
7184 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
7185 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 1
7186 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
7187 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7188 ; RV64ZVE32F-NEXT: add a2, a0, a2
7189 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
7190 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5
7191 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7192 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
7193 ; RV64ZVE32F-NEXT: .LBB69_9: # %else10
7194 ; RV64ZVE32F-NEXT: andi a2, a1, 64
7195 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
7196 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2
7197 ; RV64ZVE32F-NEXT: bnez a2, .LBB69_15
7198 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
7199 ; RV64ZVE32F-NEXT: andi a1, a1, -128
7200 ; RV64ZVE32F-NEXT: bnez a1, .LBB69_16
7201 ; RV64ZVE32F-NEXT: .LBB69_11: # %else14
7202 ; RV64ZVE32F-NEXT: ret
7203 ; RV64ZVE32F-NEXT: .LBB69_12: # %cond.store3
7204 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
7205 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7206 ; RV64ZVE32F-NEXT: add a2, a0, a2
7207 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
7208 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2
7209 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7210 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
7211 ; RV64ZVE32F-NEXT: andi a2, a1, 8
7212 ; RV64ZVE32F-NEXT: beqz a2, .LBB69_6
7213 ; RV64ZVE32F-NEXT: .LBB69_13: # %cond.store5
7214 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
7215 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
7216 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
7217 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7218 ; RV64ZVE32F-NEXT: add a2, a0, a2
7219 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
7220 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
7221 ; RV64ZVE32F-NEXT: vse32.v v10, (a2)
7222 ; RV64ZVE32F-NEXT: andi a2, a1, 16
7223 ; RV64ZVE32F-NEXT: beqz a2, .LBB69_7
7224 ; RV64ZVE32F-NEXT: .LBB69_14: # %cond.store7
7225 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
7226 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
7227 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7228 ; RV64ZVE32F-NEXT: add a2, a0, a2
7229 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
7230 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
7231 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7232 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
7233 ; RV64ZVE32F-NEXT: andi a2, a1, 32
7234 ; RV64ZVE32F-NEXT: bnez a2, .LBB69_8
7235 ; RV64ZVE32F-NEXT: j .LBB69_9
7236 ; RV64ZVE32F-NEXT: .LBB69_15: # %cond.store11
7237 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
7238 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7239 ; RV64ZVE32F-NEXT: add a2, a0, a2
7240 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
7241 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6
7242 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7243 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
7244 ; RV64ZVE32F-NEXT: andi a1, a1, -128
7245 ; RV64ZVE32F-NEXT: beqz a1, .LBB69_11
7246 ; RV64ZVE32F-NEXT: .LBB69_16: # %cond.store13
7247 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
7248 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
7249 ; RV64ZVE32F-NEXT: vmv.x.s a1, v10
7250 ; RV64ZVE32F-NEXT: slli a1, a1, 2
7251 ; RV64ZVE32F-NEXT: add a0, a0, a1
7252 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
7253 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
7254 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7255 ; RV64ZVE32F-NEXT: vse32.v v8, (a0)
7256 ; RV64ZVE32F-NEXT: ret
7257 %eidxs = sext <8 x i8> %idxs to <8 x i32>
7258 %ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %eidxs
7259 call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> %val, <8 x ptr> %ptrs, i32 4, <8 x i1> %m)
7263 define void @mscatter_baseidx_zext_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) {
7264 ; RV32-LABEL: mscatter_baseidx_zext_v8i8_v8f32:
7266 ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
7267 ; RV32-NEXT: vzext.vf2 v11, v10
7268 ; RV32-NEXT: vsll.vi v10, v11, 2
7269 ; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
7270 ; RV32-NEXT: vsoxei16.v v8, (a0), v10, v0.t
7273 ; RV64-LABEL: mscatter_baseidx_zext_v8i8_v8f32:
7275 ; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
7276 ; RV64-NEXT: vzext.vf2 v11, v10
7277 ; RV64-NEXT: vsll.vi v10, v11, 2
7278 ; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
7279 ; RV64-NEXT: vsoxei16.v v8, (a0), v10, v0.t
7282 ; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i8_v8f32:
7283 ; RV64ZVE32F: # %bb.0:
7284 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
7285 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
7286 ; RV64ZVE32F-NEXT: andi a2, a1, 1
7287 ; RV64ZVE32F-NEXT: beqz a2, .LBB70_2
7288 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
7289 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
7290 ; RV64ZVE32F-NEXT: andi a2, a2, 255
7291 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7292 ; RV64ZVE32F-NEXT: add a2, a0, a2
7293 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7294 ; RV64ZVE32F-NEXT: vse32.v v8, (a2)
7295 ; RV64ZVE32F-NEXT: .LBB70_2: # %else
7296 ; RV64ZVE32F-NEXT: andi a2, a1, 2
7297 ; RV64ZVE32F-NEXT: beqz a2, .LBB70_4
7298 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
7299 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
7300 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1
7301 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
7302 ; RV64ZVE32F-NEXT: andi a2, a2, 255
7303 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7304 ; RV64ZVE32F-NEXT: add a2, a0, a2
7305 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
7306 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 1
7307 ; RV64ZVE32F-NEXT: vse32.v v11, (a2)
7308 ; RV64ZVE32F-NEXT: .LBB70_4: # %else2
7309 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
7310 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4
7311 ; RV64ZVE32F-NEXT: andi a2, a1, 4
7312 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
7313 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
7314 ; RV64ZVE32F-NEXT: bnez a2, .LBB70_12
7315 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
7316 ; RV64ZVE32F-NEXT: andi a2, a1, 8
7317 ; RV64ZVE32F-NEXT: bnez a2, .LBB70_13
7318 ; RV64ZVE32F-NEXT: .LBB70_6: # %else6
7319 ; RV64ZVE32F-NEXT: andi a2, a1, 16
7320 ; RV64ZVE32F-NEXT: bnez a2, .LBB70_14
7321 ; RV64ZVE32F-NEXT: .LBB70_7: # %else8
7322 ; RV64ZVE32F-NEXT: andi a2, a1, 32
7323 ; RV64ZVE32F-NEXT: beqz a2, .LBB70_9
7324 ; RV64ZVE32F-NEXT: .LBB70_8: # %cond.store9
7325 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
7326 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 1
7327 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
7328 ; RV64ZVE32F-NEXT: andi a2, a2, 255
7329 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7330 ; RV64ZVE32F-NEXT: add a2, a0, a2
7331 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
7332 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5
7333 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7334 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
7335 ; RV64ZVE32F-NEXT: .LBB70_9: # %else10
7336 ; RV64ZVE32F-NEXT: andi a2, a1, 64
7337 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
7338 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2
7339 ; RV64ZVE32F-NEXT: bnez a2, .LBB70_15
7340 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
7341 ; RV64ZVE32F-NEXT: andi a1, a1, -128
7342 ; RV64ZVE32F-NEXT: bnez a1, .LBB70_16
7343 ; RV64ZVE32F-NEXT: .LBB70_11: # %else14
7344 ; RV64ZVE32F-NEXT: ret
7345 ; RV64ZVE32F-NEXT: .LBB70_12: # %cond.store3
7346 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
7347 ; RV64ZVE32F-NEXT: andi a2, a2, 255
7348 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7349 ; RV64ZVE32F-NEXT: add a2, a0, a2
7350 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
7351 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2
7352 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7353 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
7354 ; RV64ZVE32F-NEXT: andi a2, a1, 8
7355 ; RV64ZVE32F-NEXT: beqz a2, .LBB70_6
7356 ; RV64ZVE32F-NEXT: .LBB70_13: # %cond.store5
7357 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
7358 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
7359 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
7360 ; RV64ZVE32F-NEXT: andi a2, a2, 255
7361 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7362 ; RV64ZVE32F-NEXT: add a2, a0, a2
7363 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
7364 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
7365 ; RV64ZVE32F-NEXT: vse32.v v10, (a2)
7366 ; RV64ZVE32F-NEXT: andi a2, a1, 16
7367 ; RV64ZVE32F-NEXT: beqz a2, .LBB70_7
7368 ; RV64ZVE32F-NEXT: .LBB70_14: # %cond.store7
7369 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
7370 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
7371 ; RV64ZVE32F-NEXT: andi a2, a2, 255
7372 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7373 ; RV64ZVE32F-NEXT: add a2, a0, a2
7374 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
7375 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
7376 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7377 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
7378 ; RV64ZVE32F-NEXT: andi a2, a1, 32
7379 ; RV64ZVE32F-NEXT: bnez a2, .LBB70_8
7380 ; RV64ZVE32F-NEXT: j .LBB70_9
7381 ; RV64ZVE32F-NEXT: .LBB70_15: # %cond.store11
7382 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
7383 ; RV64ZVE32F-NEXT: andi a2, a2, 255
7384 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7385 ; RV64ZVE32F-NEXT: add a2, a0, a2
7386 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
7387 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6
7388 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7389 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
7390 ; RV64ZVE32F-NEXT: andi a1, a1, -128
7391 ; RV64ZVE32F-NEXT: beqz a1, .LBB70_11
7392 ; RV64ZVE32F-NEXT: .LBB70_16: # %cond.store13
7393 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
7394 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
7395 ; RV64ZVE32F-NEXT: vmv.x.s a1, v10
7396 ; RV64ZVE32F-NEXT: andi a1, a1, 255
7397 ; RV64ZVE32F-NEXT: slli a1, a1, 2
7398 ; RV64ZVE32F-NEXT: add a0, a0, a1
7399 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
7400 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
7401 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7402 ; RV64ZVE32F-NEXT: vse32.v v8, (a0)
7403 ; RV64ZVE32F-NEXT: ret
7404 %eidxs = zext <8 x i8> %idxs to <8 x i32>
7405 %ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %eidxs
7406 call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> %val, <8 x ptr> %ptrs, i32 4, <8 x i1> %m)
7410 define void @mscatter_baseidx_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m) {
7411 ; RV32-LABEL: mscatter_baseidx_v8i16_v8f32:
7413 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
7414 ; RV32-NEXT: vsext.vf2 v12, v10
7415 ; RV32-NEXT: vsll.vi v10, v12, 2
7416 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
7419 ; RV64-LABEL: mscatter_baseidx_v8i16_v8f32:
7421 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
7422 ; RV64-NEXT: vsext.vf4 v12, v10
7423 ; RV64-NEXT: vsll.vi v12, v12, 2
7424 ; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
7425 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
7428 ; RV64ZVE32F-LABEL: mscatter_baseidx_v8i16_v8f32:
7429 ; RV64ZVE32F: # %bb.0:
7430 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
7431 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
7432 ; RV64ZVE32F-NEXT: andi a2, a1, 1
7433 ; RV64ZVE32F-NEXT: beqz a2, .LBB71_2
7434 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
7435 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
7436 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
7437 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7438 ; RV64ZVE32F-NEXT: add a2, a0, a2
7439 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7440 ; RV64ZVE32F-NEXT: vse32.v v8, (a2)
7441 ; RV64ZVE32F-NEXT: .LBB71_2: # %else
7442 ; RV64ZVE32F-NEXT: andi a2, a1, 2
7443 ; RV64ZVE32F-NEXT: beqz a2, .LBB71_4
7444 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
7445 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
7446 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1
7447 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
7448 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7449 ; RV64ZVE32F-NEXT: add a2, a0, a2
7450 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
7451 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 1
7452 ; RV64ZVE32F-NEXT: vse32.v v11, (a2)
7453 ; RV64ZVE32F-NEXT: .LBB71_4: # %else2
7454 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
7455 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4
7456 ; RV64ZVE32F-NEXT: andi a2, a1, 4
7457 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
7458 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
7459 ; RV64ZVE32F-NEXT: bnez a2, .LBB71_12
7460 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
7461 ; RV64ZVE32F-NEXT: andi a2, a1, 8
7462 ; RV64ZVE32F-NEXT: bnez a2, .LBB71_13
7463 ; RV64ZVE32F-NEXT: .LBB71_6: # %else6
7464 ; RV64ZVE32F-NEXT: andi a2, a1, 16
7465 ; RV64ZVE32F-NEXT: bnez a2, .LBB71_14
7466 ; RV64ZVE32F-NEXT: .LBB71_7: # %else8
7467 ; RV64ZVE32F-NEXT: andi a2, a1, 32
7468 ; RV64ZVE32F-NEXT: beqz a2, .LBB71_9
7469 ; RV64ZVE32F-NEXT: .LBB71_8: # %cond.store9
7470 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
7471 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 1
7472 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
7473 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7474 ; RV64ZVE32F-NEXT: add a2, a0, a2
7475 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
7476 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5
7477 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7478 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
7479 ; RV64ZVE32F-NEXT: .LBB71_9: # %else10
7480 ; RV64ZVE32F-NEXT: andi a2, a1, 64
7481 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
7482 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2
7483 ; RV64ZVE32F-NEXT: bnez a2, .LBB71_15
7484 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
7485 ; RV64ZVE32F-NEXT: andi a1, a1, -128
7486 ; RV64ZVE32F-NEXT: bnez a1, .LBB71_16
7487 ; RV64ZVE32F-NEXT: .LBB71_11: # %else14
7488 ; RV64ZVE32F-NEXT: ret
7489 ; RV64ZVE32F-NEXT: .LBB71_12: # %cond.store3
7490 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
7491 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7492 ; RV64ZVE32F-NEXT: add a2, a0, a2
7493 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
7494 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2
7495 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7496 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
7497 ; RV64ZVE32F-NEXT: andi a2, a1, 8
7498 ; RV64ZVE32F-NEXT: beqz a2, .LBB71_6
7499 ; RV64ZVE32F-NEXT: .LBB71_13: # %cond.store5
7500 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
7501 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
7502 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
7503 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7504 ; RV64ZVE32F-NEXT: add a2, a0, a2
7505 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
7506 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
7507 ; RV64ZVE32F-NEXT: vse32.v v10, (a2)
7508 ; RV64ZVE32F-NEXT: andi a2, a1, 16
7509 ; RV64ZVE32F-NEXT: beqz a2, .LBB71_7
7510 ; RV64ZVE32F-NEXT: .LBB71_14: # %cond.store7
7511 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
7512 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
7513 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7514 ; RV64ZVE32F-NEXT: add a2, a0, a2
7515 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
7516 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
7517 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7518 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
7519 ; RV64ZVE32F-NEXT: andi a2, a1, 32
7520 ; RV64ZVE32F-NEXT: bnez a2, .LBB71_8
7521 ; RV64ZVE32F-NEXT: j .LBB71_9
7522 ; RV64ZVE32F-NEXT: .LBB71_15: # %cond.store11
7523 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
7524 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7525 ; RV64ZVE32F-NEXT: add a2, a0, a2
7526 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
7527 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6
7528 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7529 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
7530 ; RV64ZVE32F-NEXT: andi a1, a1, -128
7531 ; RV64ZVE32F-NEXT: beqz a1, .LBB71_11
7532 ; RV64ZVE32F-NEXT: .LBB71_16: # %cond.store13
7533 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
7534 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
7535 ; RV64ZVE32F-NEXT: vmv.x.s a1, v10
7536 ; RV64ZVE32F-NEXT: slli a1, a1, 2
7537 ; RV64ZVE32F-NEXT: add a0, a0, a1
7538 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
7539 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
7540 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7541 ; RV64ZVE32F-NEXT: vse32.v v8, (a0)
7542 ; RV64ZVE32F-NEXT: ret
7543 %ptrs = getelementptr inbounds float, ptr %base, <8 x i16> %idxs
7544 call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> %val, <8 x ptr> %ptrs, i32 4, <8 x i1> %m)
7548 define void @mscatter_baseidx_sext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m) {
7549 ; RV32-LABEL: mscatter_baseidx_sext_v8i16_v8f32:
7551 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
7552 ; RV32-NEXT: vsext.vf2 v12, v10
7553 ; RV32-NEXT: vsll.vi v10, v12, 2
7554 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
7557 ; RV64-LABEL: mscatter_baseidx_sext_v8i16_v8f32:
7559 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
7560 ; RV64-NEXT: vsext.vf4 v12, v10
7561 ; RV64-NEXT: vsll.vi v12, v12, 2
7562 ; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
7563 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
7566 ; RV64ZVE32F-LABEL: mscatter_baseidx_sext_v8i16_v8f32:
7567 ; RV64ZVE32F: # %bb.0:
7568 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
7569 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
7570 ; RV64ZVE32F-NEXT: andi a2, a1, 1
7571 ; RV64ZVE32F-NEXT: beqz a2, .LBB72_2
7572 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
7573 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
7574 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
7575 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7576 ; RV64ZVE32F-NEXT: add a2, a0, a2
7577 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7578 ; RV64ZVE32F-NEXT: vse32.v v8, (a2)
7579 ; RV64ZVE32F-NEXT: .LBB72_2: # %else
7580 ; RV64ZVE32F-NEXT: andi a2, a1, 2
7581 ; RV64ZVE32F-NEXT: beqz a2, .LBB72_4
7582 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
7583 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
7584 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1
7585 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
7586 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7587 ; RV64ZVE32F-NEXT: add a2, a0, a2
7588 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
7589 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 1
7590 ; RV64ZVE32F-NEXT: vse32.v v11, (a2)
7591 ; RV64ZVE32F-NEXT: .LBB72_4: # %else2
7592 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
7593 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4
7594 ; RV64ZVE32F-NEXT: andi a2, a1, 4
7595 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
7596 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
7597 ; RV64ZVE32F-NEXT: bnez a2, .LBB72_12
7598 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
7599 ; RV64ZVE32F-NEXT: andi a2, a1, 8
7600 ; RV64ZVE32F-NEXT: bnez a2, .LBB72_13
7601 ; RV64ZVE32F-NEXT: .LBB72_6: # %else6
7602 ; RV64ZVE32F-NEXT: andi a2, a1, 16
7603 ; RV64ZVE32F-NEXT: bnez a2, .LBB72_14
7604 ; RV64ZVE32F-NEXT: .LBB72_7: # %else8
7605 ; RV64ZVE32F-NEXT: andi a2, a1, 32
7606 ; RV64ZVE32F-NEXT: beqz a2, .LBB72_9
7607 ; RV64ZVE32F-NEXT: .LBB72_8: # %cond.store9
7608 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
7609 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 1
7610 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
7611 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7612 ; RV64ZVE32F-NEXT: add a2, a0, a2
7613 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
7614 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5
7615 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7616 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
7617 ; RV64ZVE32F-NEXT: .LBB72_9: # %else10
7618 ; RV64ZVE32F-NEXT: andi a2, a1, 64
7619 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
7620 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2
7621 ; RV64ZVE32F-NEXT: bnez a2, .LBB72_15
7622 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
7623 ; RV64ZVE32F-NEXT: andi a1, a1, -128
7624 ; RV64ZVE32F-NEXT: bnez a1, .LBB72_16
7625 ; RV64ZVE32F-NEXT: .LBB72_11: # %else14
7626 ; RV64ZVE32F-NEXT: ret
7627 ; RV64ZVE32F-NEXT: .LBB72_12: # %cond.store3
7628 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
7629 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7630 ; RV64ZVE32F-NEXT: add a2, a0, a2
7631 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
7632 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2
7633 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7634 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
7635 ; RV64ZVE32F-NEXT: andi a2, a1, 8
7636 ; RV64ZVE32F-NEXT: beqz a2, .LBB72_6
7637 ; RV64ZVE32F-NEXT: .LBB72_13: # %cond.store5
7638 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
7639 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
7640 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
7641 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7642 ; RV64ZVE32F-NEXT: add a2, a0, a2
7643 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
7644 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
7645 ; RV64ZVE32F-NEXT: vse32.v v10, (a2)
7646 ; RV64ZVE32F-NEXT: andi a2, a1, 16
7647 ; RV64ZVE32F-NEXT: beqz a2, .LBB72_7
7648 ; RV64ZVE32F-NEXT: .LBB72_14: # %cond.store7
7649 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
7650 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
7651 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7652 ; RV64ZVE32F-NEXT: add a2, a0, a2
7653 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
7654 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
7655 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7656 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
7657 ; RV64ZVE32F-NEXT: andi a2, a1, 32
7658 ; RV64ZVE32F-NEXT: bnez a2, .LBB72_8
7659 ; RV64ZVE32F-NEXT: j .LBB72_9
7660 ; RV64ZVE32F-NEXT: .LBB72_15: # %cond.store11
7661 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
7662 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7663 ; RV64ZVE32F-NEXT: add a2, a0, a2
7664 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
7665 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6
7666 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7667 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
7668 ; RV64ZVE32F-NEXT: andi a1, a1, -128
7669 ; RV64ZVE32F-NEXT: beqz a1, .LBB72_11
7670 ; RV64ZVE32F-NEXT: .LBB72_16: # %cond.store13
7671 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
7672 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
7673 ; RV64ZVE32F-NEXT: vmv.x.s a1, v10
7674 ; RV64ZVE32F-NEXT: slli a1, a1, 2
7675 ; RV64ZVE32F-NEXT: add a0, a0, a1
7676 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
7677 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
7678 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7679 ; RV64ZVE32F-NEXT: vse32.v v8, (a0)
7680 ; RV64ZVE32F-NEXT: ret
7681 %eidxs = sext <8 x i16> %idxs to <8 x i32>
7682 %ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %eidxs
7683 call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> %val, <8 x ptr> %ptrs, i32 4, <8 x i1> %m)
7687 define void @mscatter_baseidx_zext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m) {
7688 ; RV32-LABEL: mscatter_baseidx_zext_v8i16_v8f32:
7690 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
7691 ; RV32-NEXT: vzext.vf2 v12, v10
7692 ; RV32-NEXT: vsll.vi v10, v12, 2
7693 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
7696 ; RV64-LABEL: mscatter_baseidx_zext_v8i16_v8f32:
7698 ; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma
7699 ; RV64-NEXT: vzext.vf2 v12, v10
7700 ; RV64-NEXT: vsll.vi v10, v12, 2
7701 ; RV64-NEXT: vsoxei32.v v8, (a0), v10, v0.t
7704 ; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i16_v8f32:
7705 ; RV64ZVE32F: # %bb.0:
7706 ; RV64ZVE32F-NEXT: lui a1, 16
7707 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
7708 ; RV64ZVE32F-NEXT: vmv.x.s a2, v0
7709 ; RV64ZVE32F-NEXT: andi a3, a2, 1
7710 ; RV64ZVE32F-NEXT: addiw a1, a1, -1
7711 ; RV64ZVE32F-NEXT: beqz a3, .LBB73_2
7712 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
7713 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
7714 ; RV64ZVE32F-NEXT: vmv.x.s a3, v10
7715 ; RV64ZVE32F-NEXT: and a3, a3, a1
7716 ; RV64ZVE32F-NEXT: slli a3, a3, 2
7717 ; RV64ZVE32F-NEXT: add a3, a0, a3
7718 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7719 ; RV64ZVE32F-NEXT: vse32.v v8, (a3)
7720 ; RV64ZVE32F-NEXT: .LBB73_2: # %else
7721 ; RV64ZVE32F-NEXT: andi a3, a2, 2
7722 ; RV64ZVE32F-NEXT: beqz a3, .LBB73_4
7723 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
7724 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
7725 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1
7726 ; RV64ZVE32F-NEXT: vmv.x.s a3, v11
7727 ; RV64ZVE32F-NEXT: and a3, a3, a1
7728 ; RV64ZVE32F-NEXT: slli a3, a3, 2
7729 ; RV64ZVE32F-NEXT: add a3, a0, a3
7730 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
7731 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 1
7732 ; RV64ZVE32F-NEXT: vse32.v v11, (a3)
7733 ; RV64ZVE32F-NEXT: .LBB73_4: # %else2
7734 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
7735 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4
7736 ; RV64ZVE32F-NEXT: andi a3, a2, 4
7737 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
7738 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
7739 ; RV64ZVE32F-NEXT: bnez a3, .LBB73_12
7740 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
7741 ; RV64ZVE32F-NEXT: andi a3, a2, 8
7742 ; RV64ZVE32F-NEXT: bnez a3, .LBB73_13
7743 ; RV64ZVE32F-NEXT: .LBB73_6: # %else6
7744 ; RV64ZVE32F-NEXT: andi a3, a2, 16
7745 ; RV64ZVE32F-NEXT: bnez a3, .LBB73_14
7746 ; RV64ZVE32F-NEXT: .LBB73_7: # %else8
7747 ; RV64ZVE32F-NEXT: andi a3, a2, 32
7748 ; RV64ZVE32F-NEXT: beqz a3, .LBB73_9
7749 ; RV64ZVE32F-NEXT: .LBB73_8: # %cond.store9
7750 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
7751 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 1
7752 ; RV64ZVE32F-NEXT: vmv.x.s a3, v10
7753 ; RV64ZVE32F-NEXT: and a3, a3, a1
7754 ; RV64ZVE32F-NEXT: slli a3, a3, 2
7755 ; RV64ZVE32F-NEXT: add a3, a0, a3
7756 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
7757 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5
7758 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7759 ; RV64ZVE32F-NEXT: vse32.v v12, (a3)
7760 ; RV64ZVE32F-NEXT: .LBB73_9: # %else10
7761 ; RV64ZVE32F-NEXT: andi a3, a2, 64
7762 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
7763 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2
7764 ; RV64ZVE32F-NEXT: bnez a3, .LBB73_15
7765 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
7766 ; RV64ZVE32F-NEXT: andi a2, a2, -128
7767 ; RV64ZVE32F-NEXT: bnez a2, .LBB73_16
7768 ; RV64ZVE32F-NEXT: .LBB73_11: # %else14
7769 ; RV64ZVE32F-NEXT: ret
7770 ; RV64ZVE32F-NEXT: .LBB73_12: # %cond.store3
7771 ; RV64ZVE32F-NEXT: vmv.x.s a3, v10
7772 ; RV64ZVE32F-NEXT: and a3, a3, a1
7773 ; RV64ZVE32F-NEXT: slli a3, a3, 2
7774 ; RV64ZVE32F-NEXT: add a3, a0, a3
7775 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
7776 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2
7777 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7778 ; RV64ZVE32F-NEXT: vse32.v v12, (a3)
7779 ; RV64ZVE32F-NEXT: andi a3, a2, 8
7780 ; RV64ZVE32F-NEXT: beqz a3, .LBB73_6
7781 ; RV64ZVE32F-NEXT: .LBB73_13: # %cond.store5
7782 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
7783 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
7784 ; RV64ZVE32F-NEXT: vmv.x.s a3, v10
7785 ; RV64ZVE32F-NEXT: and a3, a3, a1
7786 ; RV64ZVE32F-NEXT: slli a3, a3, 2
7787 ; RV64ZVE32F-NEXT: add a3, a0, a3
7788 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
7789 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
7790 ; RV64ZVE32F-NEXT: vse32.v v10, (a3)
7791 ; RV64ZVE32F-NEXT: andi a3, a2, 16
7792 ; RV64ZVE32F-NEXT: beqz a3, .LBB73_7
7793 ; RV64ZVE32F-NEXT: .LBB73_14: # %cond.store7
7794 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
7795 ; RV64ZVE32F-NEXT: vmv.x.s a3, v11
7796 ; RV64ZVE32F-NEXT: and a3, a3, a1
7797 ; RV64ZVE32F-NEXT: slli a3, a3, 2
7798 ; RV64ZVE32F-NEXT: add a3, a0, a3
7799 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
7800 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
7801 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7802 ; RV64ZVE32F-NEXT: vse32.v v12, (a3)
7803 ; RV64ZVE32F-NEXT: andi a3, a2, 32
7804 ; RV64ZVE32F-NEXT: bnez a3, .LBB73_8
7805 ; RV64ZVE32F-NEXT: j .LBB73_9
7806 ; RV64ZVE32F-NEXT: .LBB73_15: # %cond.store11
7807 ; RV64ZVE32F-NEXT: vmv.x.s a3, v10
7808 ; RV64ZVE32F-NEXT: and a3, a3, a1
7809 ; RV64ZVE32F-NEXT: slli a3, a3, 2
7810 ; RV64ZVE32F-NEXT: add a3, a0, a3
7811 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
7812 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6
7813 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7814 ; RV64ZVE32F-NEXT: vse32.v v12, (a3)
7815 ; RV64ZVE32F-NEXT: andi a2, a2, -128
7816 ; RV64ZVE32F-NEXT: beqz a2, .LBB73_11
7817 ; RV64ZVE32F-NEXT: .LBB73_16: # %cond.store13
7818 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
7819 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
7820 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
7821 ; RV64ZVE32F-NEXT: and a1, a2, a1
7822 ; RV64ZVE32F-NEXT: slli a1, a1, 2
7823 ; RV64ZVE32F-NEXT: add a0, a0, a1
7824 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
7825 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
7826 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7827 ; RV64ZVE32F-NEXT: vse32.v v8, (a0)
7828 ; RV64ZVE32F-NEXT: ret
7829 %eidxs = zext <8 x i16> %idxs to <8 x i32>
7830 %ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %eidxs
7831 call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> %val, <8 x ptr> %ptrs, i32 4, <8 x i1> %m)
7835 define void @mscatter_baseidx_v8f32(<8 x float> %val, ptr %base, <8 x i32> %idxs, <8 x i1> %m) {
7836 ; RV32-LABEL: mscatter_baseidx_v8f32:
7838 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
7839 ; RV32-NEXT: vsll.vi v10, v10, 2
7840 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
7843 ; RV64-LABEL: mscatter_baseidx_v8f32:
7845 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
7846 ; RV64-NEXT: vsext.vf2 v12, v10
7847 ; RV64-NEXT: vsll.vi v12, v12, 2
7848 ; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
7849 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
7852 ; RV64ZVE32F-LABEL: mscatter_baseidx_v8f32:
7853 ; RV64ZVE32F: # %bb.0:
7854 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
7855 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
7856 ; RV64ZVE32F-NEXT: andi a2, a1, 1
7857 ; RV64ZVE32F-NEXT: beqz a2, .LBB74_2
7858 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
7859 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7860 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
7861 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7862 ; RV64ZVE32F-NEXT: add a2, a0, a2
7863 ; RV64ZVE32F-NEXT: vse32.v v8, (a2)
7864 ; RV64ZVE32F-NEXT: .LBB74_2: # %else
7865 ; RV64ZVE32F-NEXT: andi a2, a1, 2
7866 ; RV64ZVE32F-NEXT: beqz a2, .LBB74_4
7867 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
7868 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7869 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v10, 1
7870 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12
7871 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7872 ; RV64ZVE32F-NEXT: add a2, a0, a2
7873 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 1
7874 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
7875 ; RV64ZVE32F-NEXT: .LBB74_4: # %else2
7876 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
7877 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v10, 4
7878 ; RV64ZVE32F-NEXT: andi a2, a1, 4
7879 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
7880 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
7881 ; RV64ZVE32F-NEXT: bnez a2, .LBB74_12
7882 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
7883 ; RV64ZVE32F-NEXT: andi a2, a1, 8
7884 ; RV64ZVE32F-NEXT: bnez a2, .LBB74_13
7885 ; RV64ZVE32F-NEXT: .LBB74_6: # %else6
7886 ; RV64ZVE32F-NEXT: andi a2, a1, 16
7887 ; RV64ZVE32F-NEXT: bnez a2, .LBB74_14
7888 ; RV64ZVE32F-NEXT: .LBB74_7: # %else8
7889 ; RV64ZVE32F-NEXT: andi a2, a1, 32
7890 ; RV64ZVE32F-NEXT: beqz a2, .LBB74_9
7891 ; RV64ZVE32F-NEXT: .LBB74_8: # %cond.store9
7892 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7893 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v12, 1
7894 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
7895 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7896 ; RV64ZVE32F-NEXT: add a2, a0, a2
7897 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
7898 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 5
7899 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7900 ; RV64ZVE32F-NEXT: vse32.v v10, (a2)
7901 ; RV64ZVE32F-NEXT: .LBB74_9: # %else10
7902 ; RV64ZVE32F-NEXT: andi a2, a1, 64
7903 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
7904 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v12, 2
7905 ; RV64ZVE32F-NEXT: bnez a2, .LBB74_15
7906 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
7907 ; RV64ZVE32F-NEXT: andi a1, a1, -128
7908 ; RV64ZVE32F-NEXT: bnez a1, .LBB74_16
7909 ; RV64ZVE32F-NEXT: .LBB74_11: # %else14
7910 ; RV64ZVE32F-NEXT: ret
7911 ; RV64ZVE32F-NEXT: .LBB74_12: # %cond.store3
7912 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
7913 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7914 ; RV64ZVE32F-NEXT: add a2, a0, a2
7915 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2
7916 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7917 ; RV64ZVE32F-NEXT: vse32.v v11, (a2)
7918 ; RV64ZVE32F-NEXT: andi a2, a1, 8
7919 ; RV64ZVE32F-NEXT: beqz a2, .LBB74_6
7920 ; RV64ZVE32F-NEXT: .LBB74_13: # %cond.store5
7921 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7922 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
7923 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
7924 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7925 ; RV64ZVE32F-NEXT: add a2, a0, a2
7926 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
7927 ; RV64ZVE32F-NEXT: vse32.v v10, (a2)
7928 ; RV64ZVE32F-NEXT: andi a2, a1, 16
7929 ; RV64ZVE32F-NEXT: beqz a2, .LBB74_7
7930 ; RV64ZVE32F-NEXT: .LBB74_14: # %cond.store7
7931 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
7932 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12
7933 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7934 ; RV64ZVE32F-NEXT: add a2, a0, a2
7935 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
7936 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7937 ; RV64ZVE32F-NEXT: vse32.v v10, (a2)
7938 ; RV64ZVE32F-NEXT: andi a2, a1, 32
7939 ; RV64ZVE32F-NEXT: bnez a2, .LBB74_8
7940 ; RV64ZVE32F-NEXT: j .LBB74_9
7941 ; RV64ZVE32F-NEXT: .LBB74_15: # %cond.store11
7942 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
7943 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7944 ; RV64ZVE32F-NEXT: add a2, a0, a2
7945 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
7946 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6
7947 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7948 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
7949 ; RV64ZVE32F-NEXT: andi a1, a1, -128
7950 ; RV64ZVE32F-NEXT: beqz a1, .LBB74_11
7951 ; RV64ZVE32F-NEXT: .LBB74_16: # %cond.store13
7952 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7953 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
7954 ; RV64ZVE32F-NEXT: vmv.x.s a1, v10
7955 ; RV64ZVE32F-NEXT: slli a1, a1, 2
7956 ; RV64ZVE32F-NEXT: add a0, a0, a1
7957 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
7958 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
7959 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7960 ; RV64ZVE32F-NEXT: vse32.v v8, (a0)
7961 ; RV64ZVE32F-NEXT: ret
7962 %ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %idxs
7963 call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> %val, <8 x ptr> %ptrs, i32 4, <8 x i1> %m)
7967 declare void @llvm.masked.scatter.v1f64.v1p0(<1 x double>, <1 x ptr>, i32, <1 x i1>)
7969 define void @mscatter_v1f64(<1 x double> %val, <1 x ptr> %ptrs, <1 x i1> %m) {
7970 ; RV32V-LABEL: mscatter_v1f64:
7972 ; RV32V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
7973 ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t
7976 ; RV64-LABEL: mscatter_v1f64:
7978 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
7979 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
7982 ; RV32ZVE32F-LABEL: mscatter_v1f64:
7983 ; RV32ZVE32F: # %bb.0:
7984 ; RV32ZVE32F-NEXT: vsetvli a0, zero, e8, mf4, ta, ma
7985 ; RV32ZVE32F-NEXT: vfirst.m a0, v0
7986 ; RV32ZVE32F-NEXT: bnez a0, .LBB75_2
7987 ; RV32ZVE32F-NEXT: # %bb.1: # %cond.store
7988 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
7989 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
7990 ; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
7991 ; RV32ZVE32F-NEXT: .LBB75_2: # %else
7992 ; RV32ZVE32F-NEXT: ret
7994 ; RV64ZVE32F-LABEL: mscatter_v1f64:
7995 ; RV64ZVE32F: # %bb.0:
7996 ; RV64ZVE32F-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
7997 ; RV64ZVE32F-NEXT: vfirst.m a1, v0
7998 ; RV64ZVE32F-NEXT: bnez a1, .LBB75_2
7999 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
8000 ; RV64ZVE32F-NEXT: fsd fa0, 0(a0)
8001 ; RV64ZVE32F-NEXT: .LBB75_2: # %else
8002 ; RV64ZVE32F-NEXT: ret
8003 call void @llvm.masked.scatter.v1f64.v1p0(<1 x double> %val, <1 x ptr> %ptrs, i32 8, <1 x i1> %m)
8007 declare void @llvm.masked.scatter.v2f64.v2p0(<2 x double>, <2 x ptr>, i32, <2 x i1>)
8009 define void @mscatter_v2f64(<2 x double> %val, <2 x ptr> %ptrs, <2 x i1> %m) {
8010 ; RV32V-LABEL: mscatter_v2f64:
8012 ; RV32V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
8013 ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t
8016 ; RV64-LABEL: mscatter_v2f64:
8018 ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
8019 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
8022 ; RV32ZVE32F-LABEL: mscatter_v2f64:
8023 ; RV32ZVE32F: # %bb.0:
8024 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
8025 ; RV32ZVE32F-NEXT: vmv.x.s a0, v0
8026 ; RV32ZVE32F-NEXT: andi a1, a0, 1
8027 ; RV32ZVE32F-NEXT: bnez a1, .LBB76_3
8028 ; RV32ZVE32F-NEXT: # %bb.1: # %else
8029 ; RV32ZVE32F-NEXT: andi a0, a0, 2
8030 ; RV32ZVE32F-NEXT: bnez a0, .LBB76_4
8031 ; RV32ZVE32F-NEXT: .LBB76_2: # %else2
8032 ; RV32ZVE32F-NEXT: ret
8033 ; RV32ZVE32F-NEXT: .LBB76_3: # %cond.store
8034 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
8035 ; RV32ZVE32F-NEXT: vmv.x.s a1, v8
8036 ; RV32ZVE32F-NEXT: fsd fa0, 0(a1)
8037 ; RV32ZVE32F-NEXT: andi a0, a0, 2
8038 ; RV32ZVE32F-NEXT: beqz a0, .LBB76_2
8039 ; RV32ZVE32F-NEXT: .LBB76_4: # %cond.store1
8040 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
8041 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 1
8042 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
8043 ; RV32ZVE32F-NEXT: fsd fa1, 0(a0)
8044 ; RV32ZVE32F-NEXT: ret
8046 ; RV64ZVE32F-LABEL: mscatter_v2f64:
8047 ; RV64ZVE32F: # %bb.0:
8048 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
8049 ; RV64ZVE32F-NEXT: vmv.x.s a2, v0
8050 ; RV64ZVE32F-NEXT: andi a3, a2, 1
8051 ; RV64ZVE32F-NEXT: bnez a3, .LBB76_3
8052 ; RV64ZVE32F-NEXT: # %bb.1: # %else
8053 ; RV64ZVE32F-NEXT: andi a2, a2, 2
8054 ; RV64ZVE32F-NEXT: bnez a2, .LBB76_4
8055 ; RV64ZVE32F-NEXT: .LBB76_2: # %else2
8056 ; RV64ZVE32F-NEXT: ret
8057 ; RV64ZVE32F-NEXT: .LBB76_3: # %cond.store
8058 ; RV64ZVE32F-NEXT: fsd fa0, 0(a0)
8059 ; RV64ZVE32F-NEXT: andi a2, a2, 2
8060 ; RV64ZVE32F-NEXT: beqz a2, .LBB76_2
8061 ; RV64ZVE32F-NEXT: .LBB76_4: # %cond.store1
8062 ; RV64ZVE32F-NEXT: fsd fa1, 0(a1)
8063 ; RV64ZVE32F-NEXT: ret
8064 call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> %val, <2 x ptr> %ptrs, i32 8, <2 x i1> %m)
8068 declare void @llvm.masked.scatter.v4f64.v4p0(<4 x double>, <4 x ptr>, i32, <4 x i1>)
8070 define void @mscatter_v4f64(<4 x double> %val, <4 x ptr> %ptrs, <4 x i1> %m) {
8071 ; RV32V-LABEL: mscatter_v4f64:
8073 ; RV32V-NEXT: vsetivli zero, 4, e64, m2, ta, ma
8074 ; RV32V-NEXT: vsoxei32.v v8, (zero), v10, v0.t
8077 ; RV64-LABEL: mscatter_v4f64:
8079 ; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
8080 ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
8083 ; RV32ZVE32F-LABEL: mscatter_v4f64:
8084 ; RV32ZVE32F: # %bb.0:
8085 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
8086 ; RV32ZVE32F-NEXT: vmv.x.s a0, v0
8087 ; RV32ZVE32F-NEXT: andi a1, a0, 1
8088 ; RV32ZVE32F-NEXT: bnez a1, .LBB77_5
8089 ; RV32ZVE32F-NEXT: # %bb.1: # %else
8090 ; RV32ZVE32F-NEXT: andi a1, a0, 2
8091 ; RV32ZVE32F-NEXT: bnez a1, .LBB77_6
8092 ; RV32ZVE32F-NEXT: .LBB77_2: # %else2
8093 ; RV32ZVE32F-NEXT: andi a1, a0, 4
8094 ; RV32ZVE32F-NEXT: bnez a1, .LBB77_7
8095 ; RV32ZVE32F-NEXT: .LBB77_3: # %else4
8096 ; RV32ZVE32F-NEXT: andi a0, a0, 8
8097 ; RV32ZVE32F-NEXT: bnez a0, .LBB77_8
8098 ; RV32ZVE32F-NEXT: .LBB77_4: # %else6
8099 ; RV32ZVE32F-NEXT: ret
8100 ; RV32ZVE32F-NEXT: .LBB77_5: # %cond.store
8101 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
8102 ; RV32ZVE32F-NEXT: vmv.x.s a1, v8
8103 ; RV32ZVE32F-NEXT: fsd fa0, 0(a1)
8104 ; RV32ZVE32F-NEXT: andi a1, a0, 2
8105 ; RV32ZVE32F-NEXT: beqz a1, .LBB77_2
8106 ; RV32ZVE32F-NEXT: .LBB77_6: # %cond.store1
8107 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
8108 ; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 1
8109 ; RV32ZVE32F-NEXT: vmv.x.s a1, v9
8110 ; RV32ZVE32F-NEXT: fsd fa1, 0(a1)
8111 ; RV32ZVE32F-NEXT: andi a1, a0, 4
8112 ; RV32ZVE32F-NEXT: beqz a1, .LBB77_3
8113 ; RV32ZVE32F-NEXT: .LBB77_7: # %cond.store3
8114 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
8115 ; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 2
8116 ; RV32ZVE32F-NEXT: vmv.x.s a1, v9
8117 ; RV32ZVE32F-NEXT: fsd fa2, 0(a1)
8118 ; RV32ZVE32F-NEXT: andi a0, a0, 8
8119 ; RV32ZVE32F-NEXT: beqz a0, .LBB77_4
8120 ; RV32ZVE32F-NEXT: .LBB77_8: # %cond.store5
8121 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
8122 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 3
8123 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
8124 ; RV32ZVE32F-NEXT: fsd fa3, 0(a0)
8125 ; RV32ZVE32F-NEXT: ret
8127 ; RV64ZVE32F-LABEL: mscatter_v4f64:
8128 ; RV64ZVE32F: # %bb.0:
8129 ; RV64ZVE32F-NEXT: ld a1, 24(a0)
8130 ; RV64ZVE32F-NEXT: ld a2, 16(a0)
8131 ; RV64ZVE32F-NEXT: ld a4, 8(a0)
8132 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
8133 ; RV64ZVE32F-NEXT: vmv.x.s a3, v0
8134 ; RV64ZVE32F-NEXT: andi a5, a3, 1
8135 ; RV64ZVE32F-NEXT: bnez a5, .LBB77_5
8136 ; RV64ZVE32F-NEXT: # %bb.1: # %else
8137 ; RV64ZVE32F-NEXT: andi a0, a3, 2
8138 ; RV64ZVE32F-NEXT: bnez a0, .LBB77_6
8139 ; RV64ZVE32F-NEXT: .LBB77_2: # %else2
8140 ; RV64ZVE32F-NEXT: andi a0, a3, 4
8141 ; RV64ZVE32F-NEXT: bnez a0, .LBB77_7
8142 ; RV64ZVE32F-NEXT: .LBB77_3: # %else4
8143 ; RV64ZVE32F-NEXT: andi a3, a3, 8
8144 ; RV64ZVE32F-NEXT: bnez a3, .LBB77_8
8145 ; RV64ZVE32F-NEXT: .LBB77_4: # %else6
8146 ; RV64ZVE32F-NEXT: ret
8147 ; RV64ZVE32F-NEXT: .LBB77_5: # %cond.store
8148 ; RV64ZVE32F-NEXT: ld a0, 0(a0)
8149 ; RV64ZVE32F-NEXT: fsd fa0, 0(a0)
8150 ; RV64ZVE32F-NEXT: andi a0, a3, 2
8151 ; RV64ZVE32F-NEXT: beqz a0, .LBB77_2
8152 ; RV64ZVE32F-NEXT: .LBB77_6: # %cond.store1
8153 ; RV64ZVE32F-NEXT: fsd fa1, 0(a4)
8154 ; RV64ZVE32F-NEXT: andi a0, a3, 4
8155 ; RV64ZVE32F-NEXT: beqz a0, .LBB77_3
8156 ; RV64ZVE32F-NEXT: .LBB77_7: # %cond.store3
8157 ; RV64ZVE32F-NEXT: fsd fa2, 0(a2)
8158 ; RV64ZVE32F-NEXT: andi a3, a3, 8
8159 ; RV64ZVE32F-NEXT: beqz a3, .LBB77_4
8160 ; RV64ZVE32F-NEXT: .LBB77_8: # %cond.store5
8161 ; RV64ZVE32F-NEXT: fsd fa3, 0(a1)
8162 ; RV64ZVE32F-NEXT: ret
8163 call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> %val, <4 x ptr> %ptrs, i32 8, <4 x i1> %m)
8167 define void @mscatter_truemask_v4f64(<4 x double> %val, <4 x ptr> %ptrs) {
8168 ; RV32V-LABEL: mscatter_truemask_v4f64:
8170 ; RV32V-NEXT: vsetivli zero, 4, e64, m2, ta, ma
8171 ; RV32V-NEXT: vsoxei32.v v8, (zero), v10
8174 ; RV64-LABEL: mscatter_truemask_v4f64:
8176 ; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
8177 ; RV64-NEXT: vsoxei64.v v8, (zero), v10
8180 ; RV32ZVE32F-LABEL: mscatter_truemask_v4f64:
8181 ; RV32ZVE32F: # %bb.0:
8182 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
8183 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
8184 ; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
8185 ; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 1
8186 ; RV32ZVE32F-NEXT: vmv.x.s a0, v9
8187 ; RV32ZVE32F-NEXT: fsd fa1, 0(a0)
8188 ; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 2
8189 ; RV32ZVE32F-NEXT: vmv.x.s a0, v9
8190 ; RV32ZVE32F-NEXT: fsd fa2, 0(a0)
8191 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 3
8192 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
8193 ; RV32ZVE32F-NEXT: fsd fa3, 0(a0)
8194 ; RV32ZVE32F-NEXT: ret
8196 ; RV64ZVE32F-LABEL: mscatter_truemask_v4f64:
8197 ; RV64ZVE32F: # %bb.0:
8198 ; RV64ZVE32F-NEXT: ld a1, 0(a0)
8199 ; RV64ZVE32F-NEXT: ld a2, 8(a0)
8200 ; RV64ZVE32F-NEXT: ld a3, 16(a0)
8201 ; RV64ZVE32F-NEXT: ld a0, 24(a0)
8202 ; RV64ZVE32F-NEXT: fsd fa0, 0(a1)
8203 ; RV64ZVE32F-NEXT: fsd fa1, 0(a2)
8204 ; RV64ZVE32F-NEXT: fsd fa2, 0(a3)
8205 ; RV64ZVE32F-NEXT: fsd fa3, 0(a0)
8206 ; RV64ZVE32F-NEXT: ret
8207 call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> %val, <4 x ptr> %ptrs, i32 8, <4 x i1> splat (i1 1))
8211 define void @mscatter_falsemask_v4f64(<4 x double> %val, <4 x ptr> %ptrs) {
8212 ; CHECK-LABEL: mscatter_falsemask_v4f64:
8215 call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> %val, <4 x ptr> %ptrs, i32 8, <4 x i1> zeroinitializer)
8219 declare void @llvm.masked.scatter.v8f64.v8p0(<8 x double>, <8 x ptr>, i32, <8 x i1>)
8221 define void @mscatter_v8f64(<8 x double> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
8222 ; RV32V-LABEL: mscatter_v8f64:
8224 ; RV32V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
8225 ; RV32V-NEXT: vsoxei32.v v8, (zero), v12, v0.t
8228 ; RV64-LABEL: mscatter_v8f64:
8230 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
8231 ; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t
8234 ; RV32ZVE32F-LABEL: mscatter_v8f64:
8235 ; RV32ZVE32F: # %bb.0:
8236 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
8237 ; RV32ZVE32F-NEXT: vmv.x.s a0, v0
8238 ; RV32ZVE32F-NEXT: andi a1, a0, 1
8239 ; RV32ZVE32F-NEXT: bnez a1, .LBB80_9
8240 ; RV32ZVE32F-NEXT: # %bb.1: # %else
8241 ; RV32ZVE32F-NEXT: andi a1, a0, 2
8242 ; RV32ZVE32F-NEXT: bnez a1, .LBB80_10
8243 ; RV32ZVE32F-NEXT: .LBB80_2: # %else2
8244 ; RV32ZVE32F-NEXT: andi a1, a0, 4
8245 ; RV32ZVE32F-NEXT: bnez a1, .LBB80_11
8246 ; RV32ZVE32F-NEXT: .LBB80_3: # %else4
8247 ; RV32ZVE32F-NEXT: andi a1, a0, 8
8248 ; RV32ZVE32F-NEXT: bnez a1, .LBB80_12
8249 ; RV32ZVE32F-NEXT: .LBB80_4: # %else6
8250 ; RV32ZVE32F-NEXT: andi a1, a0, 16
8251 ; RV32ZVE32F-NEXT: bnez a1, .LBB80_13
8252 ; RV32ZVE32F-NEXT: .LBB80_5: # %else8
8253 ; RV32ZVE32F-NEXT: andi a1, a0, 32
8254 ; RV32ZVE32F-NEXT: bnez a1, .LBB80_14
8255 ; RV32ZVE32F-NEXT: .LBB80_6: # %else10
8256 ; RV32ZVE32F-NEXT: andi a1, a0, 64
8257 ; RV32ZVE32F-NEXT: bnez a1, .LBB80_15
8258 ; RV32ZVE32F-NEXT: .LBB80_7: # %else12
8259 ; RV32ZVE32F-NEXT: andi a0, a0, -128
8260 ; RV32ZVE32F-NEXT: bnez a0, .LBB80_16
8261 ; RV32ZVE32F-NEXT: .LBB80_8: # %else14
8262 ; RV32ZVE32F-NEXT: ret
8263 ; RV32ZVE32F-NEXT: .LBB80_9: # %cond.store
8264 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
8265 ; RV32ZVE32F-NEXT: vmv.x.s a1, v8
8266 ; RV32ZVE32F-NEXT: fsd fa0, 0(a1)
8267 ; RV32ZVE32F-NEXT: andi a1, a0, 2
8268 ; RV32ZVE32F-NEXT: beqz a1, .LBB80_2
8269 ; RV32ZVE32F-NEXT: .LBB80_10: # %cond.store1
8270 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
8271 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
8272 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
8273 ; RV32ZVE32F-NEXT: fsd fa1, 0(a1)
8274 ; RV32ZVE32F-NEXT: andi a1, a0, 4
8275 ; RV32ZVE32F-NEXT: beqz a1, .LBB80_3
8276 ; RV32ZVE32F-NEXT: .LBB80_11: # %cond.store3
8277 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
8278 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
8279 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
8280 ; RV32ZVE32F-NEXT: fsd fa2, 0(a1)
8281 ; RV32ZVE32F-NEXT: andi a1, a0, 8
8282 ; RV32ZVE32F-NEXT: beqz a1, .LBB80_4
8283 ; RV32ZVE32F-NEXT: .LBB80_12: # %cond.store5
8284 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
8285 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
8286 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
8287 ; RV32ZVE32F-NEXT: fsd fa3, 0(a1)
8288 ; RV32ZVE32F-NEXT: andi a1, a0, 16
8289 ; RV32ZVE32F-NEXT: beqz a1, .LBB80_5
8290 ; RV32ZVE32F-NEXT: .LBB80_13: # %cond.store7
8291 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
8292 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
8293 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
8294 ; RV32ZVE32F-NEXT: fsd fa4, 0(a1)
8295 ; RV32ZVE32F-NEXT: andi a1, a0, 32
8296 ; RV32ZVE32F-NEXT: beqz a1, .LBB80_6
8297 ; RV32ZVE32F-NEXT: .LBB80_14: # %cond.store9
8298 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
8299 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
8300 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
8301 ; RV32ZVE32F-NEXT: fsd fa5, 0(a1)
8302 ; RV32ZVE32F-NEXT: andi a1, a0, 64
8303 ; RV32ZVE32F-NEXT: beqz a1, .LBB80_7
8304 ; RV32ZVE32F-NEXT: .LBB80_15: # %cond.store11
8305 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
8306 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
8307 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
8308 ; RV32ZVE32F-NEXT: fsd fa6, 0(a1)
8309 ; RV32ZVE32F-NEXT: andi a0, a0, -128
8310 ; RV32ZVE32F-NEXT: beqz a0, .LBB80_8
8311 ; RV32ZVE32F-NEXT: .LBB80_16: # %cond.store13
8312 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
8313 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
8314 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
8315 ; RV32ZVE32F-NEXT: fsd fa7, 0(a0)
8316 ; RV32ZVE32F-NEXT: ret
8318 ; RV64ZVE32F-LABEL: mscatter_v8f64:
8319 ; RV64ZVE32F: # %bb.0:
8320 ; RV64ZVE32F-NEXT: ld a1, 56(a0)
8321 ; RV64ZVE32F-NEXT: ld a2, 48(a0)
8322 ; RV64ZVE32F-NEXT: ld a4, 40(a0)
8323 ; RV64ZVE32F-NEXT: ld a5, 32(a0)
8324 ; RV64ZVE32F-NEXT: ld a6, 24(a0)
8325 ; RV64ZVE32F-NEXT: ld a7, 16(a0)
8326 ; RV64ZVE32F-NEXT: ld t0, 8(a0)
8327 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
8328 ; RV64ZVE32F-NEXT: vmv.x.s a3, v0
8329 ; RV64ZVE32F-NEXT: andi t1, a3, 1
8330 ; RV64ZVE32F-NEXT: bnez t1, .LBB80_9
8331 ; RV64ZVE32F-NEXT: # %bb.1: # %else
8332 ; RV64ZVE32F-NEXT: andi a0, a3, 2
8333 ; RV64ZVE32F-NEXT: bnez a0, .LBB80_10
8334 ; RV64ZVE32F-NEXT: .LBB80_2: # %else2
8335 ; RV64ZVE32F-NEXT: andi a0, a3, 4
8336 ; RV64ZVE32F-NEXT: bnez a0, .LBB80_11
8337 ; RV64ZVE32F-NEXT: .LBB80_3: # %else4
8338 ; RV64ZVE32F-NEXT: andi a0, a3, 8
8339 ; RV64ZVE32F-NEXT: bnez a0, .LBB80_12
8340 ; RV64ZVE32F-NEXT: .LBB80_4: # %else6
8341 ; RV64ZVE32F-NEXT: andi a0, a3, 16
8342 ; RV64ZVE32F-NEXT: bnez a0, .LBB80_13
8343 ; RV64ZVE32F-NEXT: .LBB80_5: # %else8
8344 ; RV64ZVE32F-NEXT: andi a0, a3, 32
8345 ; RV64ZVE32F-NEXT: bnez a0, .LBB80_14
8346 ; RV64ZVE32F-NEXT: .LBB80_6: # %else10
8347 ; RV64ZVE32F-NEXT: andi a0, a3, 64
8348 ; RV64ZVE32F-NEXT: bnez a0, .LBB80_15
8349 ; RV64ZVE32F-NEXT: .LBB80_7: # %else12
8350 ; RV64ZVE32F-NEXT: andi a0, a3, -128
8351 ; RV64ZVE32F-NEXT: bnez a0, .LBB80_16
8352 ; RV64ZVE32F-NEXT: .LBB80_8: # %else14
8353 ; RV64ZVE32F-NEXT: ret
8354 ; RV64ZVE32F-NEXT: .LBB80_9: # %cond.store
8355 ; RV64ZVE32F-NEXT: ld a0, 0(a0)
8356 ; RV64ZVE32F-NEXT: fsd fa0, 0(a0)
8357 ; RV64ZVE32F-NEXT: andi a0, a3, 2
8358 ; RV64ZVE32F-NEXT: beqz a0, .LBB80_2
8359 ; RV64ZVE32F-NEXT: .LBB80_10: # %cond.store1
8360 ; RV64ZVE32F-NEXT: fsd fa1, 0(t0)
8361 ; RV64ZVE32F-NEXT: andi a0, a3, 4
8362 ; RV64ZVE32F-NEXT: beqz a0, .LBB80_3
8363 ; RV64ZVE32F-NEXT: .LBB80_11: # %cond.store3
8364 ; RV64ZVE32F-NEXT: fsd fa2, 0(a7)
8365 ; RV64ZVE32F-NEXT: andi a0, a3, 8
8366 ; RV64ZVE32F-NEXT: beqz a0, .LBB80_4
8367 ; RV64ZVE32F-NEXT: .LBB80_12: # %cond.store5
8368 ; RV64ZVE32F-NEXT: fsd fa3, 0(a6)
8369 ; RV64ZVE32F-NEXT: andi a0, a3, 16
8370 ; RV64ZVE32F-NEXT: beqz a0, .LBB80_5
8371 ; RV64ZVE32F-NEXT: .LBB80_13: # %cond.store7
8372 ; RV64ZVE32F-NEXT: fsd fa4, 0(a5)
8373 ; RV64ZVE32F-NEXT: andi a0, a3, 32
8374 ; RV64ZVE32F-NEXT: beqz a0, .LBB80_6
8375 ; RV64ZVE32F-NEXT: .LBB80_14: # %cond.store9
8376 ; RV64ZVE32F-NEXT: fsd fa5, 0(a4)
8377 ; RV64ZVE32F-NEXT: andi a0, a3, 64
8378 ; RV64ZVE32F-NEXT: beqz a0, .LBB80_7
8379 ; RV64ZVE32F-NEXT: .LBB80_15: # %cond.store11
8380 ; RV64ZVE32F-NEXT: fsd fa6, 0(a2)
8381 ; RV64ZVE32F-NEXT: andi a0, a3, -128
8382 ; RV64ZVE32F-NEXT: beqz a0, .LBB80_8
8383 ; RV64ZVE32F-NEXT: .LBB80_16: # %cond.store13
8384 ; RV64ZVE32F-NEXT: fsd fa7, 0(a1)
8385 ; RV64ZVE32F-NEXT: ret
8386 call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m)
8390 define void @mscatter_baseidx_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) {
8391 ; RV32V-LABEL: mscatter_baseidx_v8i8_v8f64:
8393 ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
8394 ; RV32V-NEXT: vsext.vf4 v14, v12
8395 ; RV32V-NEXT: vsll.vi v12, v14, 3
8396 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
8397 ; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t
8400 ; RV64-LABEL: mscatter_baseidx_v8i8_v8f64:
8402 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
8403 ; RV64-NEXT: vsext.vf8 v16, v12
8404 ; RV64-NEXT: vsll.vi v12, v16, 3
8405 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
8408 ; RV32ZVE32F-LABEL: mscatter_baseidx_v8i8_v8f64:
8409 ; RV32ZVE32F: # %bb.0:
8410 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
8411 ; RV32ZVE32F-NEXT: vsext.vf4 v10, v8
8412 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3
8413 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
8414 ; RV32ZVE32F-NEXT: vmv.x.s a1, v0
8415 ; RV32ZVE32F-NEXT: andi a2, a1, 1
8416 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
8417 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0
8418 ; RV32ZVE32F-NEXT: bnez a2, .LBB81_9
8419 ; RV32ZVE32F-NEXT: # %bb.1: # %else
8420 ; RV32ZVE32F-NEXT: andi a0, a1, 2
8421 ; RV32ZVE32F-NEXT: bnez a0, .LBB81_10
8422 ; RV32ZVE32F-NEXT: .LBB81_2: # %else2
8423 ; RV32ZVE32F-NEXT: andi a0, a1, 4
8424 ; RV32ZVE32F-NEXT: bnez a0, .LBB81_11
8425 ; RV32ZVE32F-NEXT: .LBB81_3: # %else4
8426 ; RV32ZVE32F-NEXT: andi a0, a1, 8
8427 ; RV32ZVE32F-NEXT: bnez a0, .LBB81_12
8428 ; RV32ZVE32F-NEXT: .LBB81_4: # %else6
8429 ; RV32ZVE32F-NEXT: andi a0, a1, 16
8430 ; RV32ZVE32F-NEXT: bnez a0, .LBB81_13
8431 ; RV32ZVE32F-NEXT: .LBB81_5: # %else8
8432 ; RV32ZVE32F-NEXT: andi a0, a1, 32
8433 ; RV32ZVE32F-NEXT: bnez a0, .LBB81_14
8434 ; RV32ZVE32F-NEXT: .LBB81_6: # %else10
8435 ; RV32ZVE32F-NEXT: andi a0, a1, 64
8436 ; RV32ZVE32F-NEXT: bnez a0, .LBB81_15
8437 ; RV32ZVE32F-NEXT: .LBB81_7: # %else12
8438 ; RV32ZVE32F-NEXT: andi a0, a1, -128
8439 ; RV32ZVE32F-NEXT: bnez a0, .LBB81_16
8440 ; RV32ZVE32F-NEXT: .LBB81_8: # %else14
8441 ; RV32ZVE32F-NEXT: ret
8442 ; RV32ZVE32F-NEXT: .LBB81_9: # %cond.store
8443 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
8444 ; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
8445 ; RV32ZVE32F-NEXT: andi a0, a1, 2
8446 ; RV32ZVE32F-NEXT: beqz a0, .LBB81_2
8447 ; RV32ZVE32F-NEXT: .LBB81_10: # %cond.store1
8448 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
8449 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
8450 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
8451 ; RV32ZVE32F-NEXT: fsd fa1, 0(a0)
8452 ; RV32ZVE32F-NEXT: andi a0, a1, 4
8453 ; RV32ZVE32F-NEXT: beqz a0, .LBB81_3
8454 ; RV32ZVE32F-NEXT: .LBB81_11: # %cond.store3
8455 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
8456 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
8457 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
8458 ; RV32ZVE32F-NEXT: fsd fa2, 0(a0)
8459 ; RV32ZVE32F-NEXT: andi a0, a1, 8
8460 ; RV32ZVE32F-NEXT: beqz a0, .LBB81_4
8461 ; RV32ZVE32F-NEXT: .LBB81_12: # %cond.store5
8462 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
8463 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
8464 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
8465 ; RV32ZVE32F-NEXT: fsd fa3, 0(a0)
8466 ; RV32ZVE32F-NEXT: andi a0, a1, 16
8467 ; RV32ZVE32F-NEXT: beqz a0, .LBB81_5
8468 ; RV32ZVE32F-NEXT: .LBB81_13: # %cond.store7
8469 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
8470 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
8471 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
8472 ; RV32ZVE32F-NEXT: fsd fa4, 0(a0)
8473 ; RV32ZVE32F-NEXT: andi a0, a1, 32
8474 ; RV32ZVE32F-NEXT: beqz a0, .LBB81_6
8475 ; RV32ZVE32F-NEXT: .LBB81_14: # %cond.store9
8476 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
8477 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
8478 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
8479 ; RV32ZVE32F-NEXT: fsd fa5, 0(a0)
8480 ; RV32ZVE32F-NEXT: andi a0, a1, 64
8481 ; RV32ZVE32F-NEXT: beqz a0, .LBB81_7
8482 ; RV32ZVE32F-NEXT: .LBB81_15: # %cond.store11
8483 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
8484 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
8485 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
8486 ; RV32ZVE32F-NEXT: fsd fa6, 0(a0)
8487 ; RV32ZVE32F-NEXT: andi a0, a1, -128
8488 ; RV32ZVE32F-NEXT: beqz a0, .LBB81_8
8489 ; RV32ZVE32F-NEXT: .LBB81_16: # %cond.store13
8490 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
8491 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
8492 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
8493 ; RV32ZVE32F-NEXT: fsd fa7, 0(a0)
8494 ; RV32ZVE32F-NEXT: ret
8496 ; RV64ZVE32F-LABEL: mscatter_baseidx_v8i8_v8f64:
8497 ; RV64ZVE32F: # %bb.0:
8498 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
8499 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
8500 ; RV64ZVE32F-NEXT: andi a2, a1, 1
8501 ; RV64ZVE32F-NEXT: beqz a2, .LBB81_2
8502 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
8503 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
8504 ; RV64ZVE32F-NEXT: slli a2, a2, 3
8505 ; RV64ZVE32F-NEXT: add a2, a0, a2
8506 ; RV64ZVE32F-NEXT: fsd fa0, 0(a2)
8507 ; RV64ZVE32F-NEXT: .LBB81_2: # %else
8508 ; RV64ZVE32F-NEXT: andi a2, a1, 2
8509 ; RV64ZVE32F-NEXT: beqz a2, .LBB81_4
8510 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
8511 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
8512 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
8513 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
8514 ; RV64ZVE32F-NEXT: slli a2, a2, 3
8515 ; RV64ZVE32F-NEXT: add a2, a0, a2
8516 ; RV64ZVE32F-NEXT: fsd fa1, 0(a2)
8517 ; RV64ZVE32F-NEXT: .LBB81_4: # %else2
8518 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
8519 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
8520 ; RV64ZVE32F-NEXT: andi a2, a1, 4
8521 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
8522 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
8523 ; RV64ZVE32F-NEXT: bnez a2, .LBB81_12
8524 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
8525 ; RV64ZVE32F-NEXT: andi a2, a1, 8
8526 ; RV64ZVE32F-NEXT: bnez a2, .LBB81_13
8527 ; RV64ZVE32F-NEXT: .LBB81_6: # %else6
8528 ; RV64ZVE32F-NEXT: andi a2, a1, 16
8529 ; RV64ZVE32F-NEXT: bnez a2, .LBB81_14
8530 ; RV64ZVE32F-NEXT: .LBB81_7: # %else8
8531 ; RV64ZVE32F-NEXT: andi a2, a1, 32
8532 ; RV64ZVE32F-NEXT: beqz a2, .LBB81_9
8533 ; RV64ZVE32F-NEXT: .LBB81_8: # %cond.store9
8534 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
8535 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
8536 ; RV64ZVE32F-NEXT: slli a2, a2, 3
8537 ; RV64ZVE32F-NEXT: add a2, a0, a2
8538 ; RV64ZVE32F-NEXT: fsd fa5, 0(a2)
8539 ; RV64ZVE32F-NEXT: .LBB81_9: # %else10
8540 ; RV64ZVE32F-NEXT: andi a2, a1, 64
8541 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
8542 ; RV64ZVE32F-NEXT: bnez a2, .LBB81_15
8543 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
8544 ; RV64ZVE32F-NEXT: andi a1, a1, -128
8545 ; RV64ZVE32F-NEXT: bnez a1, .LBB81_16
8546 ; RV64ZVE32F-NEXT: .LBB81_11: # %else14
8547 ; RV64ZVE32F-NEXT: ret
8548 ; RV64ZVE32F-NEXT: .LBB81_12: # %cond.store3
8549 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
8550 ; RV64ZVE32F-NEXT: slli a2, a2, 3
8551 ; RV64ZVE32F-NEXT: add a2, a0, a2
8552 ; RV64ZVE32F-NEXT: fsd fa2, 0(a2)
8553 ; RV64ZVE32F-NEXT: andi a2, a1, 8
8554 ; RV64ZVE32F-NEXT: beqz a2, .LBB81_6
8555 ; RV64ZVE32F-NEXT: .LBB81_13: # %cond.store5
8556 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
8557 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
8558 ; RV64ZVE32F-NEXT: slli a2, a2, 3
8559 ; RV64ZVE32F-NEXT: add a2, a0, a2
8560 ; RV64ZVE32F-NEXT: fsd fa3, 0(a2)
8561 ; RV64ZVE32F-NEXT: andi a2, a1, 16
8562 ; RV64ZVE32F-NEXT: beqz a2, .LBB81_7
8563 ; RV64ZVE32F-NEXT: .LBB81_14: # %cond.store7
8564 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
8565 ; RV64ZVE32F-NEXT: slli a2, a2, 3
8566 ; RV64ZVE32F-NEXT: add a2, a0, a2
8567 ; RV64ZVE32F-NEXT: fsd fa4, 0(a2)
8568 ; RV64ZVE32F-NEXT: andi a2, a1, 32
8569 ; RV64ZVE32F-NEXT: bnez a2, .LBB81_8
8570 ; RV64ZVE32F-NEXT: j .LBB81_9
8571 ; RV64ZVE32F-NEXT: .LBB81_15: # %cond.store11
8572 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
8573 ; RV64ZVE32F-NEXT: slli a2, a2, 3
8574 ; RV64ZVE32F-NEXT: add a2, a0, a2
8575 ; RV64ZVE32F-NEXT: fsd fa6, 0(a2)
8576 ; RV64ZVE32F-NEXT: andi a1, a1, -128
8577 ; RV64ZVE32F-NEXT: beqz a1, .LBB81_11
8578 ; RV64ZVE32F-NEXT: .LBB81_16: # %cond.store13
8579 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
8580 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8
8581 ; RV64ZVE32F-NEXT: slli a1, a1, 3
8582 ; RV64ZVE32F-NEXT: add a0, a0, a1
8583 ; RV64ZVE32F-NEXT: fsd fa7, 0(a0)
8584 ; RV64ZVE32F-NEXT: ret
8585 %ptrs = getelementptr inbounds double, ptr %base, <8 x i8> %idxs
8586 call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m)
8590 define void @mscatter_baseidx_sext_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) {
8591 ; RV32V-LABEL: mscatter_baseidx_sext_v8i8_v8f64:
8593 ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
8594 ; RV32V-NEXT: vsext.vf4 v14, v12
8595 ; RV32V-NEXT: vsll.vi v12, v14, 3
8596 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
8597 ; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t
8600 ; RV64-LABEL: mscatter_baseidx_sext_v8i8_v8f64:
8602 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
8603 ; RV64-NEXT: vsext.vf8 v16, v12
8604 ; RV64-NEXT: vsll.vi v12, v16, 3
8605 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
8608 ; RV32ZVE32F-LABEL: mscatter_baseidx_sext_v8i8_v8f64:
8609 ; RV32ZVE32F: # %bb.0:
8610 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
8611 ; RV32ZVE32F-NEXT: vsext.vf4 v10, v8
8612 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3
8613 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
8614 ; RV32ZVE32F-NEXT: vmv.x.s a1, v0
8615 ; RV32ZVE32F-NEXT: andi a2, a1, 1
8616 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
8617 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0
8618 ; RV32ZVE32F-NEXT: bnez a2, .LBB82_9
8619 ; RV32ZVE32F-NEXT: # %bb.1: # %else
8620 ; RV32ZVE32F-NEXT: andi a0, a1, 2
8621 ; RV32ZVE32F-NEXT: bnez a0, .LBB82_10
8622 ; RV32ZVE32F-NEXT: .LBB82_2: # %else2
8623 ; RV32ZVE32F-NEXT: andi a0, a1, 4
8624 ; RV32ZVE32F-NEXT: bnez a0, .LBB82_11
8625 ; RV32ZVE32F-NEXT: .LBB82_3: # %else4
8626 ; RV32ZVE32F-NEXT: andi a0, a1, 8
8627 ; RV32ZVE32F-NEXT: bnez a0, .LBB82_12
8628 ; RV32ZVE32F-NEXT: .LBB82_4: # %else6
8629 ; RV32ZVE32F-NEXT: andi a0, a1, 16
8630 ; RV32ZVE32F-NEXT: bnez a0, .LBB82_13
8631 ; RV32ZVE32F-NEXT: .LBB82_5: # %else8
8632 ; RV32ZVE32F-NEXT: andi a0, a1, 32
8633 ; RV32ZVE32F-NEXT: bnez a0, .LBB82_14
8634 ; RV32ZVE32F-NEXT: .LBB82_6: # %else10
8635 ; RV32ZVE32F-NEXT: andi a0, a1, 64
8636 ; RV32ZVE32F-NEXT: bnez a0, .LBB82_15
8637 ; RV32ZVE32F-NEXT: .LBB82_7: # %else12
8638 ; RV32ZVE32F-NEXT: andi a0, a1, -128
8639 ; RV32ZVE32F-NEXT: bnez a0, .LBB82_16
8640 ; RV32ZVE32F-NEXT: .LBB82_8: # %else14
8641 ; RV32ZVE32F-NEXT: ret
8642 ; RV32ZVE32F-NEXT: .LBB82_9: # %cond.store
8643 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
8644 ; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
8645 ; RV32ZVE32F-NEXT: andi a0, a1, 2
8646 ; RV32ZVE32F-NEXT: beqz a0, .LBB82_2
8647 ; RV32ZVE32F-NEXT: .LBB82_10: # %cond.store1
8648 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
8649 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
8650 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
8651 ; RV32ZVE32F-NEXT: fsd fa1, 0(a0)
8652 ; RV32ZVE32F-NEXT: andi a0, a1, 4
8653 ; RV32ZVE32F-NEXT: beqz a0, .LBB82_3
8654 ; RV32ZVE32F-NEXT: .LBB82_11: # %cond.store3
8655 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
8656 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
8657 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
8658 ; RV32ZVE32F-NEXT: fsd fa2, 0(a0)
8659 ; RV32ZVE32F-NEXT: andi a0, a1, 8
8660 ; RV32ZVE32F-NEXT: beqz a0, .LBB82_4
8661 ; RV32ZVE32F-NEXT: .LBB82_12: # %cond.store5
8662 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
8663 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
8664 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
8665 ; RV32ZVE32F-NEXT: fsd fa3, 0(a0)
8666 ; RV32ZVE32F-NEXT: andi a0, a1, 16
8667 ; RV32ZVE32F-NEXT: beqz a0, .LBB82_5
8668 ; RV32ZVE32F-NEXT: .LBB82_13: # %cond.store7
8669 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
8670 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
8671 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
8672 ; RV32ZVE32F-NEXT: fsd fa4, 0(a0)
8673 ; RV32ZVE32F-NEXT: andi a0, a1, 32
8674 ; RV32ZVE32F-NEXT: beqz a0, .LBB82_6
8675 ; RV32ZVE32F-NEXT: .LBB82_14: # %cond.store9
8676 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
8677 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
8678 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
8679 ; RV32ZVE32F-NEXT: fsd fa5, 0(a0)
8680 ; RV32ZVE32F-NEXT: andi a0, a1, 64
8681 ; RV32ZVE32F-NEXT: beqz a0, .LBB82_7
8682 ; RV32ZVE32F-NEXT: .LBB82_15: # %cond.store11
8683 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
8684 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
8685 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
8686 ; RV32ZVE32F-NEXT: fsd fa6, 0(a0)
8687 ; RV32ZVE32F-NEXT: andi a0, a1, -128
8688 ; RV32ZVE32F-NEXT: beqz a0, .LBB82_8
8689 ; RV32ZVE32F-NEXT: .LBB82_16: # %cond.store13
8690 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
8691 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
8692 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
8693 ; RV32ZVE32F-NEXT: fsd fa7, 0(a0)
8694 ; RV32ZVE32F-NEXT: ret
8696 ; RV64ZVE32F-LABEL: mscatter_baseidx_sext_v8i8_v8f64:
8697 ; RV64ZVE32F: # %bb.0:
8698 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
8699 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
8700 ; RV64ZVE32F-NEXT: andi a2, a1, 1
8701 ; RV64ZVE32F-NEXT: beqz a2, .LBB82_2
8702 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
8703 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
8704 ; RV64ZVE32F-NEXT: slli a2, a2, 3
8705 ; RV64ZVE32F-NEXT: add a2, a0, a2
8706 ; RV64ZVE32F-NEXT: fsd fa0, 0(a2)
8707 ; RV64ZVE32F-NEXT: .LBB82_2: # %else
8708 ; RV64ZVE32F-NEXT: andi a2, a1, 2
8709 ; RV64ZVE32F-NEXT: beqz a2, .LBB82_4
8710 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
8711 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
8712 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
8713 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
8714 ; RV64ZVE32F-NEXT: slli a2, a2, 3
8715 ; RV64ZVE32F-NEXT: add a2, a0, a2
8716 ; RV64ZVE32F-NEXT: fsd fa1, 0(a2)
8717 ; RV64ZVE32F-NEXT: .LBB82_4: # %else2
8718 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
8719 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
8720 ; RV64ZVE32F-NEXT: andi a2, a1, 4
8721 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
8722 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
8723 ; RV64ZVE32F-NEXT: bnez a2, .LBB82_12
8724 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
8725 ; RV64ZVE32F-NEXT: andi a2, a1, 8
8726 ; RV64ZVE32F-NEXT: bnez a2, .LBB82_13
8727 ; RV64ZVE32F-NEXT: .LBB82_6: # %else6
8728 ; RV64ZVE32F-NEXT: andi a2, a1, 16
8729 ; RV64ZVE32F-NEXT: bnez a2, .LBB82_14
8730 ; RV64ZVE32F-NEXT: .LBB82_7: # %else8
8731 ; RV64ZVE32F-NEXT: andi a2, a1, 32
8732 ; RV64ZVE32F-NEXT: beqz a2, .LBB82_9
8733 ; RV64ZVE32F-NEXT: .LBB82_8: # %cond.store9
8734 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
8735 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
8736 ; RV64ZVE32F-NEXT: slli a2, a2, 3
8737 ; RV64ZVE32F-NEXT: add a2, a0, a2
8738 ; RV64ZVE32F-NEXT: fsd fa5, 0(a2)
8739 ; RV64ZVE32F-NEXT: .LBB82_9: # %else10
8740 ; RV64ZVE32F-NEXT: andi a2, a1, 64
8741 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
8742 ; RV64ZVE32F-NEXT: bnez a2, .LBB82_15
8743 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
8744 ; RV64ZVE32F-NEXT: andi a1, a1, -128
8745 ; RV64ZVE32F-NEXT: bnez a1, .LBB82_16
8746 ; RV64ZVE32F-NEXT: .LBB82_11: # %else14
8747 ; RV64ZVE32F-NEXT: ret
8748 ; RV64ZVE32F-NEXT: .LBB82_12: # %cond.store3
8749 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
8750 ; RV64ZVE32F-NEXT: slli a2, a2, 3
8751 ; RV64ZVE32F-NEXT: add a2, a0, a2
8752 ; RV64ZVE32F-NEXT: fsd fa2, 0(a2)
8753 ; RV64ZVE32F-NEXT: andi a2, a1, 8
8754 ; RV64ZVE32F-NEXT: beqz a2, .LBB82_6
8755 ; RV64ZVE32F-NEXT: .LBB82_13: # %cond.store5
8756 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
8757 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
8758 ; RV64ZVE32F-NEXT: slli a2, a2, 3
8759 ; RV64ZVE32F-NEXT: add a2, a0, a2
8760 ; RV64ZVE32F-NEXT: fsd fa3, 0(a2)
8761 ; RV64ZVE32F-NEXT: andi a2, a1, 16
8762 ; RV64ZVE32F-NEXT: beqz a2, .LBB82_7
8763 ; RV64ZVE32F-NEXT: .LBB82_14: # %cond.store7
8764 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
8765 ; RV64ZVE32F-NEXT: slli a2, a2, 3
8766 ; RV64ZVE32F-NEXT: add a2, a0, a2
8767 ; RV64ZVE32F-NEXT: fsd fa4, 0(a2)
8768 ; RV64ZVE32F-NEXT: andi a2, a1, 32
8769 ; RV64ZVE32F-NEXT: bnez a2, .LBB82_8
8770 ; RV64ZVE32F-NEXT: j .LBB82_9
8771 ; RV64ZVE32F-NEXT: .LBB82_15: # %cond.store11
8772 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
8773 ; RV64ZVE32F-NEXT: slli a2, a2, 3
8774 ; RV64ZVE32F-NEXT: add a2, a0, a2
8775 ; RV64ZVE32F-NEXT: fsd fa6, 0(a2)
8776 ; RV64ZVE32F-NEXT: andi a1, a1, -128
8777 ; RV64ZVE32F-NEXT: beqz a1, .LBB82_11
8778 ; RV64ZVE32F-NEXT: .LBB82_16: # %cond.store13
8779 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
8780 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8
8781 ; RV64ZVE32F-NEXT: slli a1, a1, 3
8782 ; RV64ZVE32F-NEXT: add a0, a0, a1
8783 ; RV64ZVE32F-NEXT: fsd fa7, 0(a0)
8784 ; RV64ZVE32F-NEXT: ret
8785 %eidxs = sext <8 x i8> %idxs to <8 x i64>
8786 %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
8787 call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m)
8791 define void @mscatter_baseidx_zext_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) {
8792 ; RV32V-LABEL: mscatter_baseidx_zext_v8i8_v8f64:
8794 ; RV32V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
8795 ; RV32V-NEXT: vzext.vf2 v13, v12
8796 ; RV32V-NEXT: vsll.vi v12, v13, 3
8797 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
8798 ; RV32V-NEXT: vsoxei16.v v8, (a0), v12, v0.t
8801 ; RV64-LABEL: mscatter_baseidx_zext_v8i8_v8f64:
8803 ; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
8804 ; RV64-NEXT: vzext.vf2 v13, v12
8805 ; RV64-NEXT: vsll.vi v12, v13, 3
8806 ; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, ma
8807 ; RV64-NEXT: vsoxei16.v v8, (a0), v12, v0.t
8810 ; RV32ZVE32F-LABEL: mscatter_baseidx_zext_v8i8_v8f64:
8811 ; RV32ZVE32F: # %bb.0:
8812 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
8813 ; RV32ZVE32F-NEXT: vzext.vf4 v10, v8
8814 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3
8815 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
8816 ; RV32ZVE32F-NEXT: vmv.x.s a1, v0
8817 ; RV32ZVE32F-NEXT: andi a2, a1, 1
8818 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
8819 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0
8820 ; RV32ZVE32F-NEXT: bnez a2, .LBB83_9
8821 ; RV32ZVE32F-NEXT: # %bb.1: # %else
8822 ; RV32ZVE32F-NEXT: andi a0, a1, 2
8823 ; RV32ZVE32F-NEXT: bnez a0, .LBB83_10
8824 ; RV32ZVE32F-NEXT: .LBB83_2: # %else2
8825 ; RV32ZVE32F-NEXT: andi a0, a1, 4
8826 ; RV32ZVE32F-NEXT: bnez a0, .LBB83_11
8827 ; RV32ZVE32F-NEXT: .LBB83_3: # %else4
8828 ; RV32ZVE32F-NEXT: andi a0, a1, 8
8829 ; RV32ZVE32F-NEXT: bnez a0, .LBB83_12
8830 ; RV32ZVE32F-NEXT: .LBB83_4: # %else6
8831 ; RV32ZVE32F-NEXT: andi a0, a1, 16
8832 ; RV32ZVE32F-NEXT: bnez a0, .LBB83_13
8833 ; RV32ZVE32F-NEXT: .LBB83_5: # %else8
8834 ; RV32ZVE32F-NEXT: andi a0, a1, 32
8835 ; RV32ZVE32F-NEXT: bnez a0, .LBB83_14
8836 ; RV32ZVE32F-NEXT: .LBB83_6: # %else10
8837 ; RV32ZVE32F-NEXT: andi a0, a1, 64
8838 ; RV32ZVE32F-NEXT: bnez a0, .LBB83_15
8839 ; RV32ZVE32F-NEXT: .LBB83_7: # %else12
8840 ; RV32ZVE32F-NEXT: andi a0, a1, -128
8841 ; RV32ZVE32F-NEXT: bnez a0, .LBB83_16
8842 ; RV32ZVE32F-NEXT: .LBB83_8: # %else14
8843 ; RV32ZVE32F-NEXT: ret
8844 ; RV32ZVE32F-NEXT: .LBB83_9: # %cond.store
8845 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
8846 ; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
8847 ; RV32ZVE32F-NEXT: andi a0, a1, 2
8848 ; RV32ZVE32F-NEXT: beqz a0, .LBB83_2
8849 ; RV32ZVE32F-NEXT: .LBB83_10: # %cond.store1
8850 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
8851 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
8852 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
8853 ; RV32ZVE32F-NEXT: fsd fa1, 0(a0)
8854 ; RV32ZVE32F-NEXT: andi a0, a1, 4
8855 ; RV32ZVE32F-NEXT: beqz a0, .LBB83_3
8856 ; RV32ZVE32F-NEXT: .LBB83_11: # %cond.store3
8857 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
8858 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
8859 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
8860 ; RV32ZVE32F-NEXT: fsd fa2, 0(a0)
8861 ; RV32ZVE32F-NEXT: andi a0, a1, 8
8862 ; RV32ZVE32F-NEXT: beqz a0, .LBB83_4
8863 ; RV32ZVE32F-NEXT: .LBB83_12: # %cond.store5
8864 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
8865 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
8866 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
8867 ; RV32ZVE32F-NEXT: fsd fa3, 0(a0)
8868 ; RV32ZVE32F-NEXT: andi a0, a1, 16
8869 ; RV32ZVE32F-NEXT: beqz a0, .LBB83_5
8870 ; RV32ZVE32F-NEXT: .LBB83_13: # %cond.store7
8871 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
8872 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
8873 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
8874 ; RV32ZVE32F-NEXT: fsd fa4, 0(a0)
8875 ; RV32ZVE32F-NEXT: andi a0, a1, 32
8876 ; RV32ZVE32F-NEXT: beqz a0, .LBB83_6
8877 ; RV32ZVE32F-NEXT: .LBB83_14: # %cond.store9
8878 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
8879 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
8880 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
8881 ; RV32ZVE32F-NEXT: fsd fa5, 0(a0)
8882 ; RV32ZVE32F-NEXT: andi a0, a1, 64
8883 ; RV32ZVE32F-NEXT: beqz a0, .LBB83_7
8884 ; RV32ZVE32F-NEXT: .LBB83_15: # %cond.store11
8885 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
8886 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
8887 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
8888 ; RV32ZVE32F-NEXT: fsd fa6, 0(a0)
8889 ; RV32ZVE32F-NEXT: andi a0, a1, -128
8890 ; RV32ZVE32F-NEXT: beqz a0, .LBB83_8
8891 ; RV32ZVE32F-NEXT: .LBB83_16: # %cond.store13
8892 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
8893 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
8894 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
8895 ; RV32ZVE32F-NEXT: fsd fa7, 0(a0)
8896 ; RV32ZVE32F-NEXT: ret
8898 ; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i8_v8f64:
8899 ; RV64ZVE32F: # %bb.0:
8900 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
8901 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
8902 ; RV64ZVE32F-NEXT: andi a2, a1, 1
8903 ; RV64ZVE32F-NEXT: beqz a2, .LBB83_2
8904 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
8905 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
8906 ; RV64ZVE32F-NEXT: andi a2, a2, 255
8907 ; RV64ZVE32F-NEXT: slli a2, a2, 3
8908 ; RV64ZVE32F-NEXT: add a2, a0, a2
8909 ; RV64ZVE32F-NEXT: fsd fa0, 0(a2)
8910 ; RV64ZVE32F-NEXT: .LBB83_2: # %else
8911 ; RV64ZVE32F-NEXT: andi a2, a1, 2
8912 ; RV64ZVE32F-NEXT: beqz a2, .LBB83_4
8913 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
8914 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
8915 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
8916 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
8917 ; RV64ZVE32F-NEXT: andi a2, a2, 255
8918 ; RV64ZVE32F-NEXT: slli a2, a2, 3
8919 ; RV64ZVE32F-NEXT: add a2, a0, a2
8920 ; RV64ZVE32F-NEXT: fsd fa1, 0(a2)
8921 ; RV64ZVE32F-NEXT: .LBB83_4: # %else2
8922 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
8923 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
8924 ; RV64ZVE32F-NEXT: andi a2, a1, 4
8925 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
8926 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
8927 ; RV64ZVE32F-NEXT: bnez a2, .LBB83_12
8928 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
8929 ; RV64ZVE32F-NEXT: andi a2, a1, 8
8930 ; RV64ZVE32F-NEXT: bnez a2, .LBB83_13
8931 ; RV64ZVE32F-NEXT: .LBB83_6: # %else6
8932 ; RV64ZVE32F-NEXT: andi a2, a1, 16
8933 ; RV64ZVE32F-NEXT: bnez a2, .LBB83_14
8934 ; RV64ZVE32F-NEXT: .LBB83_7: # %else8
8935 ; RV64ZVE32F-NEXT: andi a2, a1, 32
8936 ; RV64ZVE32F-NEXT: beqz a2, .LBB83_9
8937 ; RV64ZVE32F-NEXT: .LBB83_8: # %cond.store9
8938 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
8939 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
8940 ; RV64ZVE32F-NEXT: andi a2, a2, 255
8941 ; RV64ZVE32F-NEXT: slli a2, a2, 3
8942 ; RV64ZVE32F-NEXT: add a2, a0, a2
8943 ; RV64ZVE32F-NEXT: fsd fa5, 0(a2)
8944 ; RV64ZVE32F-NEXT: .LBB83_9: # %else10
8945 ; RV64ZVE32F-NEXT: andi a2, a1, 64
8946 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
8947 ; RV64ZVE32F-NEXT: bnez a2, .LBB83_15
8948 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
8949 ; RV64ZVE32F-NEXT: andi a1, a1, -128
8950 ; RV64ZVE32F-NEXT: bnez a1, .LBB83_16
8951 ; RV64ZVE32F-NEXT: .LBB83_11: # %else14
8952 ; RV64ZVE32F-NEXT: ret
8953 ; RV64ZVE32F-NEXT: .LBB83_12: # %cond.store3
8954 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
8955 ; RV64ZVE32F-NEXT: andi a2, a2, 255
8956 ; RV64ZVE32F-NEXT: slli a2, a2, 3
8957 ; RV64ZVE32F-NEXT: add a2, a0, a2
8958 ; RV64ZVE32F-NEXT: fsd fa2, 0(a2)
8959 ; RV64ZVE32F-NEXT: andi a2, a1, 8
8960 ; RV64ZVE32F-NEXT: beqz a2, .LBB83_6
8961 ; RV64ZVE32F-NEXT: .LBB83_13: # %cond.store5
8962 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
8963 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
8964 ; RV64ZVE32F-NEXT: andi a2, a2, 255
8965 ; RV64ZVE32F-NEXT: slli a2, a2, 3
8966 ; RV64ZVE32F-NEXT: add a2, a0, a2
8967 ; RV64ZVE32F-NEXT: fsd fa3, 0(a2)
8968 ; RV64ZVE32F-NEXT: andi a2, a1, 16
8969 ; RV64ZVE32F-NEXT: beqz a2, .LBB83_7
8970 ; RV64ZVE32F-NEXT: .LBB83_14: # %cond.store7
8971 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
8972 ; RV64ZVE32F-NEXT: andi a2, a2, 255
8973 ; RV64ZVE32F-NEXT: slli a2, a2, 3
8974 ; RV64ZVE32F-NEXT: add a2, a0, a2
8975 ; RV64ZVE32F-NEXT: fsd fa4, 0(a2)
8976 ; RV64ZVE32F-NEXT: andi a2, a1, 32
8977 ; RV64ZVE32F-NEXT: bnez a2, .LBB83_8
8978 ; RV64ZVE32F-NEXT: j .LBB83_9
8979 ; RV64ZVE32F-NEXT: .LBB83_15: # %cond.store11
8980 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
8981 ; RV64ZVE32F-NEXT: andi a2, a2, 255
8982 ; RV64ZVE32F-NEXT: slli a2, a2, 3
8983 ; RV64ZVE32F-NEXT: add a2, a0, a2
8984 ; RV64ZVE32F-NEXT: fsd fa6, 0(a2)
8985 ; RV64ZVE32F-NEXT: andi a1, a1, -128
8986 ; RV64ZVE32F-NEXT: beqz a1, .LBB83_11
8987 ; RV64ZVE32F-NEXT: .LBB83_16: # %cond.store13
8988 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
8989 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8
8990 ; RV64ZVE32F-NEXT: andi a1, a1, 255
8991 ; RV64ZVE32F-NEXT: slli a1, a1, 3
8992 ; RV64ZVE32F-NEXT: add a0, a0, a1
8993 ; RV64ZVE32F-NEXT: fsd fa7, 0(a0)
8994 ; RV64ZVE32F-NEXT: ret
8995 %eidxs = zext <8 x i8> %idxs to <8 x i64>
8996 %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
8997 call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m)
9001 define void @mscatter_baseidx_v8i16_v8f64(<8 x double> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m) {
9002 ; RV32V-LABEL: mscatter_baseidx_v8i16_v8f64:
9004 ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
9005 ; RV32V-NEXT: vsext.vf2 v14, v12
9006 ; RV32V-NEXT: vsll.vi v12, v14, 3
9007 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
9008 ; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t
9011 ; RV64-LABEL: mscatter_baseidx_v8i16_v8f64:
9013 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
9014 ; RV64-NEXT: vsext.vf4 v16, v12
9015 ; RV64-NEXT: vsll.vi v12, v16, 3
9016 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
9019 ; RV32ZVE32F-LABEL: mscatter_baseidx_v8i16_v8f64:
9020 ; RV32ZVE32F: # %bb.0:
9021 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
9022 ; RV32ZVE32F-NEXT: vsext.vf2 v10, v8
9023 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3
9024 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
9025 ; RV32ZVE32F-NEXT: vmv.x.s a1, v0
9026 ; RV32ZVE32F-NEXT: andi a2, a1, 1
9027 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
9028 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0
9029 ; RV32ZVE32F-NEXT: bnez a2, .LBB84_9
9030 ; RV32ZVE32F-NEXT: # %bb.1: # %else
9031 ; RV32ZVE32F-NEXT: andi a0, a1, 2
9032 ; RV32ZVE32F-NEXT: bnez a0, .LBB84_10
9033 ; RV32ZVE32F-NEXT: .LBB84_2: # %else2
9034 ; RV32ZVE32F-NEXT: andi a0, a1, 4
9035 ; RV32ZVE32F-NEXT: bnez a0, .LBB84_11
9036 ; RV32ZVE32F-NEXT: .LBB84_3: # %else4
9037 ; RV32ZVE32F-NEXT: andi a0, a1, 8
9038 ; RV32ZVE32F-NEXT: bnez a0, .LBB84_12
9039 ; RV32ZVE32F-NEXT: .LBB84_4: # %else6
9040 ; RV32ZVE32F-NEXT: andi a0, a1, 16
9041 ; RV32ZVE32F-NEXT: bnez a0, .LBB84_13
9042 ; RV32ZVE32F-NEXT: .LBB84_5: # %else8
9043 ; RV32ZVE32F-NEXT: andi a0, a1, 32
9044 ; RV32ZVE32F-NEXT: bnez a0, .LBB84_14
9045 ; RV32ZVE32F-NEXT: .LBB84_6: # %else10
9046 ; RV32ZVE32F-NEXT: andi a0, a1, 64
9047 ; RV32ZVE32F-NEXT: bnez a0, .LBB84_15
9048 ; RV32ZVE32F-NEXT: .LBB84_7: # %else12
9049 ; RV32ZVE32F-NEXT: andi a0, a1, -128
9050 ; RV32ZVE32F-NEXT: bnez a0, .LBB84_16
9051 ; RV32ZVE32F-NEXT: .LBB84_8: # %else14
9052 ; RV32ZVE32F-NEXT: ret
9053 ; RV32ZVE32F-NEXT: .LBB84_9: # %cond.store
9054 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
9055 ; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
9056 ; RV32ZVE32F-NEXT: andi a0, a1, 2
9057 ; RV32ZVE32F-NEXT: beqz a0, .LBB84_2
9058 ; RV32ZVE32F-NEXT: .LBB84_10: # %cond.store1
9059 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
9060 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
9061 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
9062 ; RV32ZVE32F-NEXT: fsd fa1, 0(a0)
9063 ; RV32ZVE32F-NEXT: andi a0, a1, 4
9064 ; RV32ZVE32F-NEXT: beqz a0, .LBB84_3
9065 ; RV32ZVE32F-NEXT: .LBB84_11: # %cond.store3
9066 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
9067 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
9068 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
9069 ; RV32ZVE32F-NEXT: fsd fa2, 0(a0)
9070 ; RV32ZVE32F-NEXT: andi a0, a1, 8
9071 ; RV32ZVE32F-NEXT: beqz a0, .LBB84_4
9072 ; RV32ZVE32F-NEXT: .LBB84_12: # %cond.store5
9073 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
9074 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
9075 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
9076 ; RV32ZVE32F-NEXT: fsd fa3, 0(a0)
9077 ; RV32ZVE32F-NEXT: andi a0, a1, 16
9078 ; RV32ZVE32F-NEXT: beqz a0, .LBB84_5
9079 ; RV32ZVE32F-NEXT: .LBB84_13: # %cond.store7
9080 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
9081 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
9082 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
9083 ; RV32ZVE32F-NEXT: fsd fa4, 0(a0)
9084 ; RV32ZVE32F-NEXT: andi a0, a1, 32
9085 ; RV32ZVE32F-NEXT: beqz a0, .LBB84_6
9086 ; RV32ZVE32F-NEXT: .LBB84_14: # %cond.store9
9087 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
9088 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
9089 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
9090 ; RV32ZVE32F-NEXT: fsd fa5, 0(a0)
9091 ; RV32ZVE32F-NEXT: andi a0, a1, 64
9092 ; RV32ZVE32F-NEXT: beqz a0, .LBB84_7
9093 ; RV32ZVE32F-NEXT: .LBB84_15: # %cond.store11
9094 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
9095 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
9096 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
9097 ; RV32ZVE32F-NEXT: fsd fa6, 0(a0)
9098 ; RV32ZVE32F-NEXT: andi a0, a1, -128
9099 ; RV32ZVE32F-NEXT: beqz a0, .LBB84_8
9100 ; RV32ZVE32F-NEXT: .LBB84_16: # %cond.store13
9101 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
9102 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
9103 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
9104 ; RV32ZVE32F-NEXT: fsd fa7, 0(a0)
9105 ; RV32ZVE32F-NEXT: ret
9107 ; RV64ZVE32F-LABEL: mscatter_baseidx_v8i16_v8f64:
9108 ; RV64ZVE32F: # %bb.0:
9109 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
9110 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
9111 ; RV64ZVE32F-NEXT: andi a2, a1, 1
9112 ; RV64ZVE32F-NEXT: beqz a2, .LBB84_2
9113 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
9114 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
9115 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
9116 ; RV64ZVE32F-NEXT: slli a2, a2, 3
9117 ; RV64ZVE32F-NEXT: add a2, a0, a2
9118 ; RV64ZVE32F-NEXT: fsd fa0, 0(a2)
9119 ; RV64ZVE32F-NEXT: .LBB84_2: # %else
9120 ; RV64ZVE32F-NEXT: andi a2, a1, 2
9121 ; RV64ZVE32F-NEXT: beqz a2, .LBB84_4
9122 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
9123 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
9124 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
9125 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
9126 ; RV64ZVE32F-NEXT: slli a2, a2, 3
9127 ; RV64ZVE32F-NEXT: add a2, a0, a2
9128 ; RV64ZVE32F-NEXT: fsd fa1, 0(a2)
9129 ; RV64ZVE32F-NEXT: .LBB84_4: # %else2
9130 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
9131 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
9132 ; RV64ZVE32F-NEXT: andi a2, a1, 4
9133 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
9134 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
9135 ; RV64ZVE32F-NEXT: bnez a2, .LBB84_12
9136 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
9137 ; RV64ZVE32F-NEXT: andi a2, a1, 8
9138 ; RV64ZVE32F-NEXT: bnez a2, .LBB84_13
9139 ; RV64ZVE32F-NEXT: .LBB84_6: # %else6
9140 ; RV64ZVE32F-NEXT: andi a2, a1, 16
9141 ; RV64ZVE32F-NEXT: bnez a2, .LBB84_14
9142 ; RV64ZVE32F-NEXT: .LBB84_7: # %else8
9143 ; RV64ZVE32F-NEXT: andi a2, a1, 32
9144 ; RV64ZVE32F-NEXT: beqz a2, .LBB84_9
9145 ; RV64ZVE32F-NEXT: .LBB84_8: # %cond.store9
9146 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
9147 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
9148 ; RV64ZVE32F-NEXT: slli a2, a2, 3
9149 ; RV64ZVE32F-NEXT: add a2, a0, a2
9150 ; RV64ZVE32F-NEXT: fsd fa5, 0(a2)
9151 ; RV64ZVE32F-NEXT: .LBB84_9: # %else10
9152 ; RV64ZVE32F-NEXT: andi a2, a1, 64
9153 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
9154 ; RV64ZVE32F-NEXT: bnez a2, .LBB84_15
9155 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
9156 ; RV64ZVE32F-NEXT: andi a1, a1, -128
9157 ; RV64ZVE32F-NEXT: bnez a1, .LBB84_16
9158 ; RV64ZVE32F-NEXT: .LBB84_11: # %else14
9159 ; RV64ZVE32F-NEXT: ret
9160 ; RV64ZVE32F-NEXT: .LBB84_12: # %cond.store3
9161 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
9162 ; RV64ZVE32F-NEXT: slli a2, a2, 3
9163 ; RV64ZVE32F-NEXT: add a2, a0, a2
9164 ; RV64ZVE32F-NEXT: fsd fa2, 0(a2)
9165 ; RV64ZVE32F-NEXT: andi a2, a1, 8
9166 ; RV64ZVE32F-NEXT: beqz a2, .LBB84_6
9167 ; RV64ZVE32F-NEXT: .LBB84_13: # %cond.store5
9168 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
9169 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
9170 ; RV64ZVE32F-NEXT: slli a2, a2, 3
9171 ; RV64ZVE32F-NEXT: add a2, a0, a2
9172 ; RV64ZVE32F-NEXT: fsd fa3, 0(a2)
9173 ; RV64ZVE32F-NEXT: andi a2, a1, 16
9174 ; RV64ZVE32F-NEXT: beqz a2, .LBB84_7
9175 ; RV64ZVE32F-NEXT: .LBB84_14: # %cond.store7
9176 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
9177 ; RV64ZVE32F-NEXT: slli a2, a2, 3
9178 ; RV64ZVE32F-NEXT: add a2, a0, a2
9179 ; RV64ZVE32F-NEXT: fsd fa4, 0(a2)
9180 ; RV64ZVE32F-NEXT: andi a2, a1, 32
9181 ; RV64ZVE32F-NEXT: bnez a2, .LBB84_8
9182 ; RV64ZVE32F-NEXT: j .LBB84_9
9183 ; RV64ZVE32F-NEXT: .LBB84_15: # %cond.store11
9184 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
9185 ; RV64ZVE32F-NEXT: slli a2, a2, 3
9186 ; RV64ZVE32F-NEXT: add a2, a0, a2
9187 ; RV64ZVE32F-NEXT: fsd fa6, 0(a2)
9188 ; RV64ZVE32F-NEXT: andi a1, a1, -128
9189 ; RV64ZVE32F-NEXT: beqz a1, .LBB84_11
9190 ; RV64ZVE32F-NEXT: .LBB84_16: # %cond.store13
9191 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
9192 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8
9193 ; RV64ZVE32F-NEXT: slli a1, a1, 3
9194 ; RV64ZVE32F-NEXT: add a0, a0, a1
9195 ; RV64ZVE32F-NEXT: fsd fa7, 0(a0)
9196 ; RV64ZVE32F-NEXT: ret
9197 %ptrs = getelementptr inbounds double, ptr %base, <8 x i16> %idxs
9198 call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m)
9202 define void @mscatter_baseidx_sext_v8i16_v8f64(<8 x double> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m) {
9203 ; RV32V-LABEL: mscatter_baseidx_sext_v8i16_v8f64:
9205 ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
9206 ; RV32V-NEXT: vsext.vf2 v14, v12
9207 ; RV32V-NEXT: vsll.vi v12, v14, 3
9208 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
9209 ; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t
9212 ; RV64-LABEL: mscatter_baseidx_sext_v8i16_v8f64:
9214 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
9215 ; RV64-NEXT: vsext.vf4 v16, v12
9216 ; RV64-NEXT: vsll.vi v12, v16, 3
9217 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
9220 ; RV32ZVE32F-LABEL: mscatter_baseidx_sext_v8i16_v8f64:
9221 ; RV32ZVE32F: # %bb.0:
9222 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
9223 ; RV32ZVE32F-NEXT: vsext.vf2 v10, v8
9224 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3
9225 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
9226 ; RV32ZVE32F-NEXT: vmv.x.s a1, v0
9227 ; RV32ZVE32F-NEXT: andi a2, a1, 1
9228 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
9229 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0
9230 ; RV32ZVE32F-NEXT: bnez a2, .LBB85_9
9231 ; RV32ZVE32F-NEXT: # %bb.1: # %else
9232 ; RV32ZVE32F-NEXT: andi a0, a1, 2
9233 ; RV32ZVE32F-NEXT: bnez a0, .LBB85_10
9234 ; RV32ZVE32F-NEXT: .LBB85_2: # %else2
9235 ; RV32ZVE32F-NEXT: andi a0, a1, 4
9236 ; RV32ZVE32F-NEXT: bnez a0, .LBB85_11
9237 ; RV32ZVE32F-NEXT: .LBB85_3: # %else4
9238 ; RV32ZVE32F-NEXT: andi a0, a1, 8
9239 ; RV32ZVE32F-NEXT: bnez a0, .LBB85_12
9240 ; RV32ZVE32F-NEXT: .LBB85_4: # %else6
9241 ; RV32ZVE32F-NEXT: andi a0, a1, 16
9242 ; RV32ZVE32F-NEXT: bnez a0, .LBB85_13
9243 ; RV32ZVE32F-NEXT: .LBB85_5: # %else8
9244 ; RV32ZVE32F-NEXT: andi a0, a1, 32
9245 ; RV32ZVE32F-NEXT: bnez a0, .LBB85_14
9246 ; RV32ZVE32F-NEXT: .LBB85_6: # %else10
9247 ; RV32ZVE32F-NEXT: andi a0, a1, 64
9248 ; RV32ZVE32F-NEXT: bnez a0, .LBB85_15
9249 ; RV32ZVE32F-NEXT: .LBB85_7: # %else12
9250 ; RV32ZVE32F-NEXT: andi a0, a1, -128
9251 ; RV32ZVE32F-NEXT: bnez a0, .LBB85_16
9252 ; RV32ZVE32F-NEXT: .LBB85_8: # %else14
9253 ; RV32ZVE32F-NEXT: ret
9254 ; RV32ZVE32F-NEXT: .LBB85_9: # %cond.store
9255 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
9256 ; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
9257 ; RV32ZVE32F-NEXT: andi a0, a1, 2
9258 ; RV32ZVE32F-NEXT: beqz a0, .LBB85_2
9259 ; RV32ZVE32F-NEXT: .LBB85_10: # %cond.store1
9260 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
9261 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
9262 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
9263 ; RV32ZVE32F-NEXT: fsd fa1, 0(a0)
9264 ; RV32ZVE32F-NEXT: andi a0, a1, 4
9265 ; RV32ZVE32F-NEXT: beqz a0, .LBB85_3
9266 ; RV32ZVE32F-NEXT: .LBB85_11: # %cond.store3
9267 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
9268 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
9269 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
9270 ; RV32ZVE32F-NEXT: fsd fa2, 0(a0)
9271 ; RV32ZVE32F-NEXT: andi a0, a1, 8
9272 ; RV32ZVE32F-NEXT: beqz a0, .LBB85_4
9273 ; RV32ZVE32F-NEXT: .LBB85_12: # %cond.store5
9274 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
9275 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
9276 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
9277 ; RV32ZVE32F-NEXT: fsd fa3, 0(a0)
9278 ; RV32ZVE32F-NEXT: andi a0, a1, 16
9279 ; RV32ZVE32F-NEXT: beqz a0, .LBB85_5
9280 ; RV32ZVE32F-NEXT: .LBB85_13: # %cond.store7
9281 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
9282 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
9283 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
9284 ; RV32ZVE32F-NEXT: fsd fa4, 0(a0)
9285 ; RV32ZVE32F-NEXT: andi a0, a1, 32
9286 ; RV32ZVE32F-NEXT: beqz a0, .LBB85_6
9287 ; RV32ZVE32F-NEXT: .LBB85_14: # %cond.store9
9288 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
9289 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
9290 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
9291 ; RV32ZVE32F-NEXT: fsd fa5, 0(a0)
9292 ; RV32ZVE32F-NEXT: andi a0, a1, 64
9293 ; RV32ZVE32F-NEXT: beqz a0, .LBB85_7
9294 ; RV32ZVE32F-NEXT: .LBB85_15: # %cond.store11
9295 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
9296 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
9297 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
9298 ; RV32ZVE32F-NEXT: fsd fa6, 0(a0)
9299 ; RV32ZVE32F-NEXT: andi a0, a1, -128
9300 ; RV32ZVE32F-NEXT: beqz a0, .LBB85_8
9301 ; RV32ZVE32F-NEXT: .LBB85_16: # %cond.store13
9302 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
9303 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
9304 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
9305 ; RV32ZVE32F-NEXT: fsd fa7, 0(a0)
9306 ; RV32ZVE32F-NEXT: ret
9308 ; RV64ZVE32F-LABEL: mscatter_baseidx_sext_v8i16_v8f64:
9309 ; RV64ZVE32F: # %bb.0:
9310 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
9311 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
9312 ; RV64ZVE32F-NEXT: andi a2, a1, 1
9313 ; RV64ZVE32F-NEXT: beqz a2, .LBB85_2
9314 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
9315 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
9316 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
9317 ; RV64ZVE32F-NEXT: slli a2, a2, 3
9318 ; RV64ZVE32F-NEXT: add a2, a0, a2
9319 ; RV64ZVE32F-NEXT: fsd fa0, 0(a2)
9320 ; RV64ZVE32F-NEXT: .LBB85_2: # %else
9321 ; RV64ZVE32F-NEXT: andi a2, a1, 2
9322 ; RV64ZVE32F-NEXT: beqz a2, .LBB85_4
9323 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
9324 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
9325 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
9326 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
9327 ; RV64ZVE32F-NEXT: slli a2, a2, 3
9328 ; RV64ZVE32F-NEXT: add a2, a0, a2
9329 ; RV64ZVE32F-NEXT: fsd fa1, 0(a2)
9330 ; RV64ZVE32F-NEXT: .LBB85_4: # %else2
9331 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
9332 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
9333 ; RV64ZVE32F-NEXT: andi a2, a1, 4
9334 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
9335 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
9336 ; RV64ZVE32F-NEXT: bnez a2, .LBB85_12
9337 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
9338 ; RV64ZVE32F-NEXT: andi a2, a1, 8
9339 ; RV64ZVE32F-NEXT: bnez a2, .LBB85_13
9340 ; RV64ZVE32F-NEXT: .LBB85_6: # %else6
9341 ; RV64ZVE32F-NEXT: andi a2, a1, 16
9342 ; RV64ZVE32F-NEXT: bnez a2, .LBB85_14
9343 ; RV64ZVE32F-NEXT: .LBB85_7: # %else8
9344 ; RV64ZVE32F-NEXT: andi a2, a1, 32
9345 ; RV64ZVE32F-NEXT: beqz a2, .LBB85_9
9346 ; RV64ZVE32F-NEXT: .LBB85_8: # %cond.store9
9347 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
9348 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
9349 ; RV64ZVE32F-NEXT: slli a2, a2, 3
9350 ; RV64ZVE32F-NEXT: add a2, a0, a2
9351 ; RV64ZVE32F-NEXT: fsd fa5, 0(a2)
9352 ; RV64ZVE32F-NEXT: .LBB85_9: # %else10
9353 ; RV64ZVE32F-NEXT: andi a2, a1, 64
9354 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
9355 ; RV64ZVE32F-NEXT: bnez a2, .LBB85_15
9356 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
9357 ; RV64ZVE32F-NEXT: andi a1, a1, -128
9358 ; RV64ZVE32F-NEXT: bnez a1, .LBB85_16
9359 ; RV64ZVE32F-NEXT: .LBB85_11: # %else14
9360 ; RV64ZVE32F-NEXT: ret
9361 ; RV64ZVE32F-NEXT: .LBB85_12: # %cond.store3
9362 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
9363 ; RV64ZVE32F-NEXT: slli a2, a2, 3
9364 ; RV64ZVE32F-NEXT: add a2, a0, a2
9365 ; RV64ZVE32F-NEXT: fsd fa2, 0(a2)
9366 ; RV64ZVE32F-NEXT: andi a2, a1, 8
9367 ; RV64ZVE32F-NEXT: beqz a2, .LBB85_6
9368 ; RV64ZVE32F-NEXT: .LBB85_13: # %cond.store5
9369 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
9370 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
9371 ; RV64ZVE32F-NEXT: slli a2, a2, 3
9372 ; RV64ZVE32F-NEXT: add a2, a0, a2
9373 ; RV64ZVE32F-NEXT: fsd fa3, 0(a2)
9374 ; RV64ZVE32F-NEXT: andi a2, a1, 16
9375 ; RV64ZVE32F-NEXT: beqz a2, .LBB85_7
9376 ; RV64ZVE32F-NEXT: .LBB85_14: # %cond.store7
9377 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
9378 ; RV64ZVE32F-NEXT: slli a2, a2, 3
9379 ; RV64ZVE32F-NEXT: add a2, a0, a2
9380 ; RV64ZVE32F-NEXT: fsd fa4, 0(a2)
9381 ; RV64ZVE32F-NEXT: andi a2, a1, 32
9382 ; RV64ZVE32F-NEXT: bnez a2, .LBB85_8
9383 ; RV64ZVE32F-NEXT: j .LBB85_9
9384 ; RV64ZVE32F-NEXT: .LBB85_15: # %cond.store11
9385 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
9386 ; RV64ZVE32F-NEXT: slli a2, a2, 3
9387 ; RV64ZVE32F-NEXT: add a2, a0, a2
9388 ; RV64ZVE32F-NEXT: fsd fa6, 0(a2)
9389 ; RV64ZVE32F-NEXT: andi a1, a1, -128
9390 ; RV64ZVE32F-NEXT: beqz a1, .LBB85_11
9391 ; RV64ZVE32F-NEXT: .LBB85_16: # %cond.store13
9392 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
9393 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8
9394 ; RV64ZVE32F-NEXT: slli a1, a1, 3
9395 ; RV64ZVE32F-NEXT: add a0, a0, a1
9396 ; RV64ZVE32F-NEXT: fsd fa7, 0(a0)
9397 ; RV64ZVE32F-NEXT: ret
9398 %eidxs = sext <8 x i16> %idxs to <8 x i64>
9399 %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
9400 call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m)
9404 define void @mscatter_baseidx_zext_v8i16_v8f64(<8 x double> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m) {
9405 ; RV32V-LABEL: mscatter_baseidx_zext_v8i16_v8f64:
9407 ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
9408 ; RV32V-NEXT: vzext.vf2 v14, v12
9409 ; RV32V-NEXT: vsll.vi v12, v14, 3
9410 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
9411 ; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t
9414 ; RV64-LABEL: mscatter_baseidx_zext_v8i16_v8f64:
9416 ; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma
9417 ; RV64-NEXT: vzext.vf2 v14, v12
9418 ; RV64-NEXT: vsll.vi v12, v14, 3
9419 ; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, ma
9420 ; RV64-NEXT: vsoxei32.v v8, (a0), v12, v0.t
9423 ; RV32ZVE32F-LABEL: mscatter_baseidx_zext_v8i16_v8f64:
9424 ; RV32ZVE32F: # %bb.0:
9425 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
9426 ; RV32ZVE32F-NEXT: vzext.vf2 v10, v8
9427 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3
9428 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
9429 ; RV32ZVE32F-NEXT: vmv.x.s a1, v0
9430 ; RV32ZVE32F-NEXT: andi a2, a1, 1
9431 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
9432 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0
9433 ; RV32ZVE32F-NEXT: bnez a2, .LBB86_9
9434 ; RV32ZVE32F-NEXT: # %bb.1: # %else
9435 ; RV32ZVE32F-NEXT: andi a0, a1, 2
9436 ; RV32ZVE32F-NEXT: bnez a0, .LBB86_10
9437 ; RV32ZVE32F-NEXT: .LBB86_2: # %else2
9438 ; RV32ZVE32F-NEXT: andi a0, a1, 4
9439 ; RV32ZVE32F-NEXT: bnez a0, .LBB86_11
9440 ; RV32ZVE32F-NEXT: .LBB86_3: # %else4
9441 ; RV32ZVE32F-NEXT: andi a0, a1, 8
9442 ; RV32ZVE32F-NEXT: bnez a0, .LBB86_12
9443 ; RV32ZVE32F-NEXT: .LBB86_4: # %else6
9444 ; RV32ZVE32F-NEXT: andi a0, a1, 16
9445 ; RV32ZVE32F-NEXT: bnez a0, .LBB86_13
9446 ; RV32ZVE32F-NEXT: .LBB86_5: # %else8
9447 ; RV32ZVE32F-NEXT: andi a0, a1, 32
9448 ; RV32ZVE32F-NEXT: bnez a0, .LBB86_14
9449 ; RV32ZVE32F-NEXT: .LBB86_6: # %else10
9450 ; RV32ZVE32F-NEXT: andi a0, a1, 64
9451 ; RV32ZVE32F-NEXT: bnez a0, .LBB86_15
9452 ; RV32ZVE32F-NEXT: .LBB86_7: # %else12
9453 ; RV32ZVE32F-NEXT: andi a0, a1, -128
9454 ; RV32ZVE32F-NEXT: bnez a0, .LBB86_16
9455 ; RV32ZVE32F-NEXT: .LBB86_8: # %else14
9456 ; RV32ZVE32F-NEXT: ret
9457 ; RV32ZVE32F-NEXT: .LBB86_9: # %cond.store
9458 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
9459 ; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
9460 ; RV32ZVE32F-NEXT: andi a0, a1, 2
9461 ; RV32ZVE32F-NEXT: beqz a0, .LBB86_2
9462 ; RV32ZVE32F-NEXT: .LBB86_10: # %cond.store1
9463 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
9464 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
9465 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
9466 ; RV32ZVE32F-NEXT: fsd fa1, 0(a0)
9467 ; RV32ZVE32F-NEXT: andi a0, a1, 4
9468 ; RV32ZVE32F-NEXT: beqz a0, .LBB86_3
9469 ; RV32ZVE32F-NEXT: .LBB86_11: # %cond.store3
9470 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
9471 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
9472 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
9473 ; RV32ZVE32F-NEXT: fsd fa2, 0(a0)
9474 ; RV32ZVE32F-NEXT: andi a0, a1, 8
9475 ; RV32ZVE32F-NEXT: beqz a0, .LBB86_4
9476 ; RV32ZVE32F-NEXT: .LBB86_12: # %cond.store5
9477 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
9478 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
9479 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
9480 ; RV32ZVE32F-NEXT: fsd fa3, 0(a0)
9481 ; RV32ZVE32F-NEXT: andi a0, a1, 16
9482 ; RV32ZVE32F-NEXT: beqz a0, .LBB86_5
9483 ; RV32ZVE32F-NEXT: .LBB86_13: # %cond.store7
9484 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
9485 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
9486 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
9487 ; RV32ZVE32F-NEXT: fsd fa4, 0(a0)
9488 ; RV32ZVE32F-NEXT: andi a0, a1, 32
9489 ; RV32ZVE32F-NEXT: beqz a0, .LBB86_6
9490 ; RV32ZVE32F-NEXT: .LBB86_14: # %cond.store9
9491 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
9492 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
9493 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
9494 ; RV32ZVE32F-NEXT: fsd fa5, 0(a0)
9495 ; RV32ZVE32F-NEXT: andi a0, a1, 64
9496 ; RV32ZVE32F-NEXT: beqz a0, .LBB86_7
9497 ; RV32ZVE32F-NEXT: .LBB86_15: # %cond.store11
9498 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
9499 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
9500 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
9501 ; RV32ZVE32F-NEXT: fsd fa6, 0(a0)
9502 ; RV32ZVE32F-NEXT: andi a0, a1, -128
9503 ; RV32ZVE32F-NEXT: beqz a0, .LBB86_8
9504 ; RV32ZVE32F-NEXT: .LBB86_16: # %cond.store13
9505 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
9506 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
9507 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
9508 ; RV32ZVE32F-NEXT: fsd fa7, 0(a0)
9509 ; RV32ZVE32F-NEXT: ret
9511 ; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i16_v8f64:
9512 ; RV64ZVE32F: # %bb.0:
9513 ; RV64ZVE32F-NEXT: lui a1, 16
9514 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
9515 ; RV64ZVE32F-NEXT: vmv.x.s a2, v0
9516 ; RV64ZVE32F-NEXT: andi a3, a2, 1
9517 ; RV64ZVE32F-NEXT: addiw a1, a1, -1
9518 ; RV64ZVE32F-NEXT: beqz a3, .LBB86_2
9519 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
9520 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
9521 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
9522 ; RV64ZVE32F-NEXT: and a3, a3, a1
9523 ; RV64ZVE32F-NEXT: slli a3, a3, 3
9524 ; RV64ZVE32F-NEXT: add a3, a0, a3
9525 ; RV64ZVE32F-NEXT: fsd fa0, 0(a3)
9526 ; RV64ZVE32F-NEXT: .LBB86_2: # %else
9527 ; RV64ZVE32F-NEXT: andi a3, a2, 2
9528 ; RV64ZVE32F-NEXT: beqz a3, .LBB86_4
9529 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
9530 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
9531 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
9532 ; RV64ZVE32F-NEXT: vmv.x.s a3, v9
9533 ; RV64ZVE32F-NEXT: and a3, a3, a1
9534 ; RV64ZVE32F-NEXT: slli a3, a3, 3
9535 ; RV64ZVE32F-NEXT: add a3, a0, a3
9536 ; RV64ZVE32F-NEXT: fsd fa1, 0(a3)
9537 ; RV64ZVE32F-NEXT: .LBB86_4: # %else2
9538 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
9539 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
9540 ; RV64ZVE32F-NEXT: andi a3, a2, 4
9541 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
9542 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
9543 ; RV64ZVE32F-NEXT: bnez a3, .LBB86_12
9544 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
9545 ; RV64ZVE32F-NEXT: andi a3, a2, 8
9546 ; RV64ZVE32F-NEXT: bnez a3, .LBB86_13
9547 ; RV64ZVE32F-NEXT: .LBB86_6: # %else6
9548 ; RV64ZVE32F-NEXT: andi a3, a2, 16
9549 ; RV64ZVE32F-NEXT: bnez a3, .LBB86_14
9550 ; RV64ZVE32F-NEXT: .LBB86_7: # %else8
9551 ; RV64ZVE32F-NEXT: andi a3, a2, 32
9552 ; RV64ZVE32F-NEXT: beqz a3, .LBB86_9
9553 ; RV64ZVE32F-NEXT: .LBB86_8: # %cond.store9
9554 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
9555 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
9556 ; RV64ZVE32F-NEXT: and a3, a3, a1
9557 ; RV64ZVE32F-NEXT: slli a3, a3, 3
9558 ; RV64ZVE32F-NEXT: add a3, a0, a3
9559 ; RV64ZVE32F-NEXT: fsd fa5, 0(a3)
9560 ; RV64ZVE32F-NEXT: .LBB86_9: # %else10
9561 ; RV64ZVE32F-NEXT: andi a3, a2, 64
9562 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
9563 ; RV64ZVE32F-NEXT: bnez a3, .LBB86_15
9564 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
9565 ; RV64ZVE32F-NEXT: andi a2, a2, -128
9566 ; RV64ZVE32F-NEXT: bnez a2, .LBB86_16
9567 ; RV64ZVE32F-NEXT: .LBB86_11: # %else14
9568 ; RV64ZVE32F-NEXT: ret
9569 ; RV64ZVE32F-NEXT: .LBB86_12: # %cond.store3
9570 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
9571 ; RV64ZVE32F-NEXT: and a3, a3, a1
9572 ; RV64ZVE32F-NEXT: slli a3, a3, 3
9573 ; RV64ZVE32F-NEXT: add a3, a0, a3
9574 ; RV64ZVE32F-NEXT: fsd fa2, 0(a3)
9575 ; RV64ZVE32F-NEXT: andi a3, a2, 8
9576 ; RV64ZVE32F-NEXT: beqz a3, .LBB86_6
9577 ; RV64ZVE32F-NEXT: .LBB86_13: # %cond.store5
9578 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
9579 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
9580 ; RV64ZVE32F-NEXT: and a3, a3, a1
9581 ; RV64ZVE32F-NEXT: slli a3, a3, 3
9582 ; RV64ZVE32F-NEXT: add a3, a0, a3
9583 ; RV64ZVE32F-NEXT: fsd fa3, 0(a3)
9584 ; RV64ZVE32F-NEXT: andi a3, a2, 16
9585 ; RV64ZVE32F-NEXT: beqz a3, .LBB86_7
9586 ; RV64ZVE32F-NEXT: .LBB86_14: # %cond.store7
9587 ; RV64ZVE32F-NEXT: vmv.x.s a3, v9
9588 ; RV64ZVE32F-NEXT: and a3, a3, a1
9589 ; RV64ZVE32F-NEXT: slli a3, a3, 3
9590 ; RV64ZVE32F-NEXT: add a3, a0, a3
9591 ; RV64ZVE32F-NEXT: fsd fa4, 0(a3)
9592 ; RV64ZVE32F-NEXT: andi a3, a2, 32
9593 ; RV64ZVE32F-NEXT: bnez a3, .LBB86_8
9594 ; RV64ZVE32F-NEXT: j .LBB86_9
9595 ; RV64ZVE32F-NEXT: .LBB86_15: # %cond.store11
9596 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
9597 ; RV64ZVE32F-NEXT: and a3, a3, a1
9598 ; RV64ZVE32F-NEXT: slli a3, a3, 3
9599 ; RV64ZVE32F-NEXT: add a3, a0, a3
9600 ; RV64ZVE32F-NEXT: fsd fa6, 0(a3)
9601 ; RV64ZVE32F-NEXT: andi a2, a2, -128
9602 ; RV64ZVE32F-NEXT: beqz a2, .LBB86_11
9603 ; RV64ZVE32F-NEXT: .LBB86_16: # %cond.store13
9604 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
9605 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
9606 ; RV64ZVE32F-NEXT: and a1, a2, a1
9607 ; RV64ZVE32F-NEXT: slli a1, a1, 3
9608 ; RV64ZVE32F-NEXT: add a0, a0, a1
9609 ; RV64ZVE32F-NEXT: fsd fa7, 0(a0)
9610 ; RV64ZVE32F-NEXT: ret
9611 %eidxs = zext <8 x i16> %idxs to <8 x i64>
9612 %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
9613 call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m)
9617 define void @mscatter_baseidx_v8i32_v8f64(<8 x double> %val, ptr %base, <8 x i32> %idxs, <8 x i1> %m) {
9618 ; RV32V-LABEL: mscatter_baseidx_v8i32_v8f64:
9620 ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
9621 ; RV32V-NEXT: vsll.vi v12, v12, 3
9622 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
9623 ; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t
9626 ; RV64-LABEL: mscatter_baseidx_v8i32_v8f64:
9628 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
9629 ; RV64-NEXT: vsext.vf2 v16, v12
9630 ; RV64-NEXT: vsll.vi v12, v16, 3
9631 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
9634 ; RV32ZVE32F-LABEL: mscatter_baseidx_v8i32_v8f64:
9635 ; RV32ZVE32F: # %bb.0:
9636 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
9637 ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3
9638 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
9639 ; RV32ZVE32F-NEXT: vmv.x.s a1, v0
9640 ; RV32ZVE32F-NEXT: andi a2, a1, 1
9641 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
9642 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0
9643 ; RV32ZVE32F-NEXT: bnez a2, .LBB87_9
9644 ; RV32ZVE32F-NEXT: # %bb.1: # %else
9645 ; RV32ZVE32F-NEXT: andi a0, a1, 2
9646 ; RV32ZVE32F-NEXT: bnez a0, .LBB87_10
9647 ; RV32ZVE32F-NEXT: .LBB87_2: # %else2
9648 ; RV32ZVE32F-NEXT: andi a0, a1, 4
9649 ; RV32ZVE32F-NEXT: bnez a0, .LBB87_11
9650 ; RV32ZVE32F-NEXT: .LBB87_3: # %else4
9651 ; RV32ZVE32F-NEXT: andi a0, a1, 8
9652 ; RV32ZVE32F-NEXT: bnez a0, .LBB87_12
9653 ; RV32ZVE32F-NEXT: .LBB87_4: # %else6
9654 ; RV32ZVE32F-NEXT: andi a0, a1, 16
9655 ; RV32ZVE32F-NEXT: bnez a0, .LBB87_13
9656 ; RV32ZVE32F-NEXT: .LBB87_5: # %else8
9657 ; RV32ZVE32F-NEXT: andi a0, a1, 32
9658 ; RV32ZVE32F-NEXT: bnez a0, .LBB87_14
9659 ; RV32ZVE32F-NEXT: .LBB87_6: # %else10
9660 ; RV32ZVE32F-NEXT: andi a0, a1, 64
9661 ; RV32ZVE32F-NEXT: bnez a0, .LBB87_15
9662 ; RV32ZVE32F-NEXT: .LBB87_7: # %else12
9663 ; RV32ZVE32F-NEXT: andi a0, a1, -128
9664 ; RV32ZVE32F-NEXT: bnez a0, .LBB87_16
9665 ; RV32ZVE32F-NEXT: .LBB87_8: # %else14
9666 ; RV32ZVE32F-NEXT: ret
9667 ; RV32ZVE32F-NEXT: .LBB87_9: # %cond.store
9668 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
9669 ; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
9670 ; RV32ZVE32F-NEXT: andi a0, a1, 2
9671 ; RV32ZVE32F-NEXT: beqz a0, .LBB87_2
9672 ; RV32ZVE32F-NEXT: .LBB87_10: # %cond.store1
9673 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
9674 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
9675 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
9676 ; RV32ZVE32F-NEXT: fsd fa1, 0(a0)
9677 ; RV32ZVE32F-NEXT: andi a0, a1, 4
9678 ; RV32ZVE32F-NEXT: beqz a0, .LBB87_3
9679 ; RV32ZVE32F-NEXT: .LBB87_11: # %cond.store3
9680 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
9681 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
9682 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
9683 ; RV32ZVE32F-NEXT: fsd fa2, 0(a0)
9684 ; RV32ZVE32F-NEXT: andi a0, a1, 8
9685 ; RV32ZVE32F-NEXT: beqz a0, .LBB87_4
9686 ; RV32ZVE32F-NEXT: .LBB87_12: # %cond.store5
9687 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
9688 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
9689 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
9690 ; RV32ZVE32F-NEXT: fsd fa3, 0(a0)
9691 ; RV32ZVE32F-NEXT: andi a0, a1, 16
9692 ; RV32ZVE32F-NEXT: beqz a0, .LBB87_5
9693 ; RV32ZVE32F-NEXT: .LBB87_13: # %cond.store7
9694 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
9695 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
9696 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
9697 ; RV32ZVE32F-NEXT: fsd fa4, 0(a0)
9698 ; RV32ZVE32F-NEXT: andi a0, a1, 32
9699 ; RV32ZVE32F-NEXT: beqz a0, .LBB87_6
9700 ; RV32ZVE32F-NEXT: .LBB87_14: # %cond.store9
9701 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
9702 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
9703 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
9704 ; RV32ZVE32F-NEXT: fsd fa5, 0(a0)
9705 ; RV32ZVE32F-NEXT: andi a0, a1, 64
9706 ; RV32ZVE32F-NEXT: beqz a0, .LBB87_7
9707 ; RV32ZVE32F-NEXT: .LBB87_15: # %cond.store11
9708 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
9709 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
9710 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
9711 ; RV32ZVE32F-NEXT: fsd fa6, 0(a0)
9712 ; RV32ZVE32F-NEXT: andi a0, a1, -128
9713 ; RV32ZVE32F-NEXT: beqz a0, .LBB87_8
9714 ; RV32ZVE32F-NEXT: .LBB87_16: # %cond.store13
9715 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
9716 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
9717 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
9718 ; RV32ZVE32F-NEXT: fsd fa7, 0(a0)
9719 ; RV32ZVE32F-NEXT: ret
9721 ; RV64ZVE32F-LABEL: mscatter_baseidx_v8i32_v8f64:
9722 ; RV64ZVE32F: # %bb.0:
9723 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
9724 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
9725 ; RV64ZVE32F-NEXT: andi a2, a1, 1
9726 ; RV64ZVE32F-NEXT: beqz a2, .LBB87_2
9727 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
9728 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
9729 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
9730 ; RV64ZVE32F-NEXT: slli a2, a2, 3
9731 ; RV64ZVE32F-NEXT: add a2, a0, a2
9732 ; RV64ZVE32F-NEXT: fsd fa0, 0(a2)
9733 ; RV64ZVE32F-NEXT: .LBB87_2: # %else
9734 ; RV64ZVE32F-NEXT: andi a2, a1, 2
9735 ; RV64ZVE32F-NEXT: beqz a2, .LBB87_4
9736 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
9737 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
9738 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
9739 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
9740 ; RV64ZVE32F-NEXT: slli a2, a2, 3
9741 ; RV64ZVE32F-NEXT: add a2, a0, a2
9742 ; RV64ZVE32F-NEXT: fsd fa1, 0(a2)
9743 ; RV64ZVE32F-NEXT: .LBB87_4: # %else2
9744 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
9745 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
9746 ; RV64ZVE32F-NEXT: andi a2, a1, 4
9747 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
9748 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
9749 ; RV64ZVE32F-NEXT: bnez a2, .LBB87_12
9750 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
9751 ; RV64ZVE32F-NEXT: andi a2, a1, 8
9752 ; RV64ZVE32F-NEXT: bnez a2, .LBB87_13
9753 ; RV64ZVE32F-NEXT: .LBB87_6: # %else6
9754 ; RV64ZVE32F-NEXT: andi a2, a1, 16
9755 ; RV64ZVE32F-NEXT: bnez a2, .LBB87_14
9756 ; RV64ZVE32F-NEXT: .LBB87_7: # %else8
9757 ; RV64ZVE32F-NEXT: andi a2, a1, 32
9758 ; RV64ZVE32F-NEXT: beqz a2, .LBB87_9
9759 ; RV64ZVE32F-NEXT: .LBB87_8: # %cond.store9
9760 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
9761 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
9762 ; RV64ZVE32F-NEXT: slli a2, a2, 3
9763 ; RV64ZVE32F-NEXT: add a2, a0, a2
9764 ; RV64ZVE32F-NEXT: fsd fa5, 0(a2)
9765 ; RV64ZVE32F-NEXT: .LBB87_9: # %else10
9766 ; RV64ZVE32F-NEXT: andi a2, a1, 64
9767 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
9768 ; RV64ZVE32F-NEXT: bnez a2, .LBB87_15
9769 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
9770 ; RV64ZVE32F-NEXT: andi a1, a1, -128
9771 ; RV64ZVE32F-NEXT: bnez a1, .LBB87_16
9772 ; RV64ZVE32F-NEXT: .LBB87_11: # %else14
9773 ; RV64ZVE32F-NEXT: ret
9774 ; RV64ZVE32F-NEXT: .LBB87_12: # %cond.store3
9775 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
9776 ; RV64ZVE32F-NEXT: slli a2, a2, 3
9777 ; RV64ZVE32F-NEXT: add a2, a0, a2
9778 ; RV64ZVE32F-NEXT: fsd fa2, 0(a2)
9779 ; RV64ZVE32F-NEXT: andi a2, a1, 8
9780 ; RV64ZVE32F-NEXT: beqz a2, .LBB87_6
9781 ; RV64ZVE32F-NEXT: .LBB87_13: # %cond.store5
9782 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
9783 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
9784 ; RV64ZVE32F-NEXT: slli a2, a2, 3
9785 ; RV64ZVE32F-NEXT: add a2, a0, a2
9786 ; RV64ZVE32F-NEXT: fsd fa3, 0(a2)
9787 ; RV64ZVE32F-NEXT: andi a2, a1, 16
9788 ; RV64ZVE32F-NEXT: beqz a2, .LBB87_7
9789 ; RV64ZVE32F-NEXT: .LBB87_14: # %cond.store7
9790 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
9791 ; RV64ZVE32F-NEXT: slli a2, a2, 3
9792 ; RV64ZVE32F-NEXT: add a2, a0, a2
9793 ; RV64ZVE32F-NEXT: fsd fa4, 0(a2)
9794 ; RV64ZVE32F-NEXT: andi a2, a1, 32
9795 ; RV64ZVE32F-NEXT: bnez a2, .LBB87_8
9796 ; RV64ZVE32F-NEXT: j .LBB87_9
9797 ; RV64ZVE32F-NEXT: .LBB87_15: # %cond.store11
9798 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
9799 ; RV64ZVE32F-NEXT: slli a2, a2, 3
9800 ; RV64ZVE32F-NEXT: add a2, a0, a2
9801 ; RV64ZVE32F-NEXT: fsd fa6, 0(a2)
9802 ; RV64ZVE32F-NEXT: andi a1, a1, -128
9803 ; RV64ZVE32F-NEXT: beqz a1, .LBB87_11
9804 ; RV64ZVE32F-NEXT: .LBB87_16: # %cond.store13
9805 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
9806 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8
9807 ; RV64ZVE32F-NEXT: slli a1, a1, 3
9808 ; RV64ZVE32F-NEXT: add a0, a0, a1
9809 ; RV64ZVE32F-NEXT: fsd fa7, 0(a0)
9810 ; RV64ZVE32F-NEXT: ret
9811 %ptrs = getelementptr inbounds double, ptr %base, <8 x i32> %idxs
9812 call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m)
9816 define void @mscatter_baseidx_sext_v8i32_v8f64(<8 x double> %val, ptr %base, <8 x i32> %idxs, <8 x i1> %m) {
9817 ; RV32V-LABEL: mscatter_baseidx_sext_v8i32_v8f64:
9819 ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
9820 ; RV32V-NEXT: vsll.vi v12, v12, 3
9821 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
9822 ; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t
9825 ; RV64-LABEL: mscatter_baseidx_sext_v8i32_v8f64:
9827 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
9828 ; RV64-NEXT: vsext.vf2 v16, v12
9829 ; RV64-NEXT: vsll.vi v12, v16, 3
9830 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
9833 ; RV32ZVE32F-LABEL: mscatter_baseidx_sext_v8i32_v8f64:
9834 ; RV32ZVE32F: # %bb.0:
9835 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
9836 ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3
9837 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
9838 ; RV32ZVE32F-NEXT: vmv.x.s a1, v0
9839 ; RV32ZVE32F-NEXT: andi a2, a1, 1
9840 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
9841 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0
9842 ; RV32ZVE32F-NEXT: bnez a2, .LBB88_9
9843 ; RV32ZVE32F-NEXT: # %bb.1: # %else
9844 ; RV32ZVE32F-NEXT: andi a0, a1, 2
9845 ; RV32ZVE32F-NEXT: bnez a0, .LBB88_10
9846 ; RV32ZVE32F-NEXT: .LBB88_2: # %else2
9847 ; RV32ZVE32F-NEXT: andi a0, a1, 4
9848 ; RV32ZVE32F-NEXT: bnez a0, .LBB88_11
9849 ; RV32ZVE32F-NEXT: .LBB88_3: # %else4
9850 ; RV32ZVE32F-NEXT: andi a0, a1, 8
9851 ; RV32ZVE32F-NEXT: bnez a0, .LBB88_12
9852 ; RV32ZVE32F-NEXT: .LBB88_4: # %else6
9853 ; RV32ZVE32F-NEXT: andi a0, a1, 16
9854 ; RV32ZVE32F-NEXT: bnez a0, .LBB88_13
9855 ; RV32ZVE32F-NEXT: .LBB88_5: # %else8
9856 ; RV32ZVE32F-NEXT: andi a0, a1, 32
9857 ; RV32ZVE32F-NEXT: bnez a0, .LBB88_14
9858 ; RV32ZVE32F-NEXT: .LBB88_6: # %else10
9859 ; RV32ZVE32F-NEXT: andi a0, a1, 64
9860 ; RV32ZVE32F-NEXT: bnez a0, .LBB88_15
9861 ; RV32ZVE32F-NEXT: .LBB88_7: # %else12
9862 ; RV32ZVE32F-NEXT: andi a0, a1, -128
9863 ; RV32ZVE32F-NEXT: bnez a0, .LBB88_16
9864 ; RV32ZVE32F-NEXT: .LBB88_8: # %else14
9865 ; RV32ZVE32F-NEXT: ret
9866 ; RV32ZVE32F-NEXT: .LBB88_9: # %cond.store
9867 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
9868 ; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
9869 ; RV32ZVE32F-NEXT: andi a0, a1, 2
9870 ; RV32ZVE32F-NEXT: beqz a0, .LBB88_2
9871 ; RV32ZVE32F-NEXT: .LBB88_10: # %cond.store1
9872 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
9873 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
9874 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
9875 ; RV32ZVE32F-NEXT: fsd fa1, 0(a0)
9876 ; RV32ZVE32F-NEXT: andi a0, a1, 4
9877 ; RV32ZVE32F-NEXT: beqz a0, .LBB88_3
9878 ; RV32ZVE32F-NEXT: .LBB88_11: # %cond.store3
9879 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
9880 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
9881 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
9882 ; RV32ZVE32F-NEXT: fsd fa2, 0(a0)
9883 ; RV32ZVE32F-NEXT: andi a0, a1, 8
9884 ; RV32ZVE32F-NEXT: beqz a0, .LBB88_4
9885 ; RV32ZVE32F-NEXT: .LBB88_12: # %cond.store5
9886 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
9887 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
9888 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
9889 ; RV32ZVE32F-NEXT: fsd fa3, 0(a0)
9890 ; RV32ZVE32F-NEXT: andi a0, a1, 16
9891 ; RV32ZVE32F-NEXT: beqz a0, .LBB88_5
9892 ; RV32ZVE32F-NEXT: .LBB88_13: # %cond.store7
9893 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
9894 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
9895 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
9896 ; RV32ZVE32F-NEXT: fsd fa4, 0(a0)
9897 ; RV32ZVE32F-NEXT: andi a0, a1, 32
9898 ; RV32ZVE32F-NEXT: beqz a0, .LBB88_6
9899 ; RV32ZVE32F-NEXT: .LBB88_14: # %cond.store9
9900 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
9901 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
9902 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
9903 ; RV32ZVE32F-NEXT: fsd fa5, 0(a0)
9904 ; RV32ZVE32F-NEXT: andi a0, a1, 64
9905 ; RV32ZVE32F-NEXT: beqz a0, .LBB88_7
9906 ; RV32ZVE32F-NEXT: .LBB88_15: # %cond.store11
9907 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
9908 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
9909 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
9910 ; RV32ZVE32F-NEXT: fsd fa6, 0(a0)
9911 ; RV32ZVE32F-NEXT: andi a0, a1, -128
9912 ; RV32ZVE32F-NEXT: beqz a0, .LBB88_8
9913 ; RV32ZVE32F-NEXT: .LBB88_16: # %cond.store13
9914 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
9915 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
9916 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
9917 ; RV32ZVE32F-NEXT: fsd fa7, 0(a0)
9918 ; RV32ZVE32F-NEXT: ret
9920 ; RV64ZVE32F-LABEL: mscatter_baseidx_sext_v8i32_v8f64:
9921 ; RV64ZVE32F: # %bb.0:
9922 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
9923 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
9924 ; RV64ZVE32F-NEXT: andi a2, a1, 1
9925 ; RV64ZVE32F-NEXT: beqz a2, .LBB88_2
9926 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
9927 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
9928 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
9929 ; RV64ZVE32F-NEXT: slli a2, a2, 3
9930 ; RV64ZVE32F-NEXT: add a2, a0, a2
9931 ; RV64ZVE32F-NEXT: fsd fa0, 0(a2)
9932 ; RV64ZVE32F-NEXT: .LBB88_2: # %else
9933 ; RV64ZVE32F-NEXT: andi a2, a1, 2
9934 ; RV64ZVE32F-NEXT: beqz a2, .LBB88_4
9935 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
9936 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
9937 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
9938 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
9939 ; RV64ZVE32F-NEXT: slli a2, a2, 3
9940 ; RV64ZVE32F-NEXT: add a2, a0, a2
9941 ; RV64ZVE32F-NEXT: fsd fa1, 0(a2)
9942 ; RV64ZVE32F-NEXT: .LBB88_4: # %else2
9943 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
9944 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
9945 ; RV64ZVE32F-NEXT: andi a2, a1, 4
9946 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
9947 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
9948 ; RV64ZVE32F-NEXT: bnez a2, .LBB88_12
9949 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
9950 ; RV64ZVE32F-NEXT: andi a2, a1, 8
9951 ; RV64ZVE32F-NEXT: bnez a2, .LBB88_13
9952 ; RV64ZVE32F-NEXT: .LBB88_6: # %else6
9953 ; RV64ZVE32F-NEXT: andi a2, a1, 16
9954 ; RV64ZVE32F-NEXT: bnez a2, .LBB88_14
9955 ; RV64ZVE32F-NEXT: .LBB88_7: # %else8
9956 ; RV64ZVE32F-NEXT: andi a2, a1, 32
9957 ; RV64ZVE32F-NEXT: beqz a2, .LBB88_9
9958 ; RV64ZVE32F-NEXT: .LBB88_8: # %cond.store9
9959 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
9960 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
9961 ; RV64ZVE32F-NEXT: slli a2, a2, 3
9962 ; RV64ZVE32F-NEXT: add a2, a0, a2
9963 ; RV64ZVE32F-NEXT: fsd fa5, 0(a2)
9964 ; RV64ZVE32F-NEXT: .LBB88_9: # %else10
9965 ; RV64ZVE32F-NEXT: andi a2, a1, 64
9966 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
9967 ; RV64ZVE32F-NEXT: bnez a2, .LBB88_15
9968 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
9969 ; RV64ZVE32F-NEXT: andi a1, a1, -128
9970 ; RV64ZVE32F-NEXT: bnez a1, .LBB88_16
9971 ; RV64ZVE32F-NEXT: .LBB88_11: # %else14
9972 ; RV64ZVE32F-NEXT: ret
9973 ; RV64ZVE32F-NEXT: .LBB88_12: # %cond.store3
9974 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
9975 ; RV64ZVE32F-NEXT: slli a2, a2, 3
9976 ; RV64ZVE32F-NEXT: add a2, a0, a2
9977 ; RV64ZVE32F-NEXT: fsd fa2, 0(a2)
9978 ; RV64ZVE32F-NEXT: andi a2, a1, 8
9979 ; RV64ZVE32F-NEXT: beqz a2, .LBB88_6
9980 ; RV64ZVE32F-NEXT: .LBB88_13: # %cond.store5
9981 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
9982 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
9983 ; RV64ZVE32F-NEXT: slli a2, a2, 3
9984 ; RV64ZVE32F-NEXT: add a2, a0, a2
9985 ; RV64ZVE32F-NEXT: fsd fa3, 0(a2)
9986 ; RV64ZVE32F-NEXT: andi a2, a1, 16
9987 ; RV64ZVE32F-NEXT: beqz a2, .LBB88_7
9988 ; RV64ZVE32F-NEXT: .LBB88_14: # %cond.store7
9989 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
9990 ; RV64ZVE32F-NEXT: slli a2, a2, 3
9991 ; RV64ZVE32F-NEXT: add a2, a0, a2
9992 ; RV64ZVE32F-NEXT: fsd fa4, 0(a2)
9993 ; RV64ZVE32F-NEXT: andi a2, a1, 32
9994 ; RV64ZVE32F-NEXT: bnez a2, .LBB88_8
9995 ; RV64ZVE32F-NEXT: j .LBB88_9
9996 ; RV64ZVE32F-NEXT: .LBB88_15: # %cond.store11
9997 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
9998 ; RV64ZVE32F-NEXT: slli a2, a2, 3
9999 ; RV64ZVE32F-NEXT: add a2, a0, a2
10000 ; RV64ZVE32F-NEXT: fsd fa6, 0(a2)
10001 ; RV64ZVE32F-NEXT: andi a1, a1, -128
10002 ; RV64ZVE32F-NEXT: beqz a1, .LBB88_11
10003 ; RV64ZVE32F-NEXT: .LBB88_16: # %cond.store13
10004 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
10005 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8
10006 ; RV64ZVE32F-NEXT: slli a1, a1, 3
10007 ; RV64ZVE32F-NEXT: add a0, a0, a1
10008 ; RV64ZVE32F-NEXT: fsd fa7, 0(a0)
10009 ; RV64ZVE32F-NEXT: ret
10010 %eidxs = sext <8 x i32> %idxs to <8 x i64>
10011 %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
10012 call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m)
10016 define void @mscatter_baseidx_zext_v8i32_v8f64(<8 x double> %val, ptr %base, <8 x i32> %idxs, <8 x i1> %m) {
10017 ; RV32V-LABEL: mscatter_baseidx_zext_v8i32_v8f64:
10019 ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
10020 ; RV32V-NEXT: vsll.vi v12, v12, 3
10021 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
10022 ; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t
10025 ; RV64-LABEL: mscatter_baseidx_zext_v8i32_v8f64:
10027 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
10028 ; RV64-NEXT: vzext.vf2 v16, v12
10029 ; RV64-NEXT: vsll.vi v12, v16, 3
10030 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
10033 ; RV32ZVE32F-LABEL: mscatter_baseidx_zext_v8i32_v8f64:
10034 ; RV32ZVE32F: # %bb.0:
10035 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
10036 ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3
10037 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
10038 ; RV32ZVE32F-NEXT: vmv.x.s a1, v0
10039 ; RV32ZVE32F-NEXT: andi a2, a1, 1
10040 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
10041 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0
10042 ; RV32ZVE32F-NEXT: bnez a2, .LBB89_9
10043 ; RV32ZVE32F-NEXT: # %bb.1: # %else
10044 ; RV32ZVE32F-NEXT: andi a0, a1, 2
10045 ; RV32ZVE32F-NEXT: bnez a0, .LBB89_10
10046 ; RV32ZVE32F-NEXT: .LBB89_2: # %else2
10047 ; RV32ZVE32F-NEXT: andi a0, a1, 4
10048 ; RV32ZVE32F-NEXT: bnez a0, .LBB89_11
10049 ; RV32ZVE32F-NEXT: .LBB89_3: # %else4
10050 ; RV32ZVE32F-NEXT: andi a0, a1, 8
10051 ; RV32ZVE32F-NEXT: bnez a0, .LBB89_12
10052 ; RV32ZVE32F-NEXT: .LBB89_4: # %else6
10053 ; RV32ZVE32F-NEXT: andi a0, a1, 16
10054 ; RV32ZVE32F-NEXT: bnez a0, .LBB89_13
10055 ; RV32ZVE32F-NEXT: .LBB89_5: # %else8
10056 ; RV32ZVE32F-NEXT: andi a0, a1, 32
10057 ; RV32ZVE32F-NEXT: bnez a0, .LBB89_14
10058 ; RV32ZVE32F-NEXT: .LBB89_6: # %else10
10059 ; RV32ZVE32F-NEXT: andi a0, a1, 64
10060 ; RV32ZVE32F-NEXT: bnez a0, .LBB89_15
10061 ; RV32ZVE32F-NEXT: .LBB89_7: # %else12
10062 ; RV32ZVE32F-NEXT: andi a0, a1, -128
10063 ; RV32ZVE32F-NEXT: bnez a0, .LBB89_16
10064 ; RV32ZVE32F-NEXT: .LBB89_8: # %else14
10065 ; RV32ZVE32F-NEXT: ret
10066 ; RV32ZVE32F-NEXT: .LBB89_9: # %cond.store
10067 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
10068 ; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
10069 ; RV32ZVE32F-NEXT: andi a0, a1, 2
10070 ; RV32ZVE32F-NEXT: beqz a0, .LBB89_2
10071 ; RV32ZVE32F-NEXT: .LBB89_10: # %cond.store1
10072 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
10073 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
10074 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
10075 ; RV32ZVE32F-NEXT: fsd fa1, 0(a0)
10076 ; RV32ZVE32F-NEXT: andi a0, a1, 4
10077 ; RV32ZVE32F-NEXT: beqz a0, .LBB89_3
10078 ; RV32ZVE32F-NEXT: .LBB89_11: # %cond.store3
10079 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
10080 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
10081 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
10082 ; RV32ZVE32F-NEXT: fsd fa2, 0(a0)
10083 ; RV32ZVE32F-NEXT: andi a0, a1, 8
10084 ; RV32ZVE32F-NEXT: beqz a0, .LBB89_4
10085 ; RV32ZVE32F-NEXT: .LBB89_12: # %cond.store5
10086 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
10087 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
10088 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
10089 ; RV32ZVE32F-NEXT: fsd fa3, 0(a0)
10090 ; RV32ZVE32F-NEXT: andi a0, a1, 16
10091 ; RV32ZVE32F-NEXT: beqz a0, .LBB89_5
10092 ; RV32ZVE32F-NEXT: .LBB89_13: # %cond.store7
10093 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
10094 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
10095 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
10096 ; RV32ZVE32F-NEXT: fsd fa4, 0(a0)
10097 ; RV32ZVE32F-NEXT: andi a0, a1, 32
10098 ; RV32ZVE32F-NEXT: beqz a0, .LBB89_6
10099 ; RV32ZVE32F-NEXT: .LBB89_14: # %cond.store9
10100 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
10101 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
10102 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
10103 ; RV32ZVE32F-NEXT: fsd fa5, 0(a0)
10104 ; RV32ZVE32F-NEXT: andi a0, a1, 64
10105 ; RV32ZVE32F-NEXT: beqz a0, .LBB89_7
10106 ; RV32ZVE32F-NEXT: .LBB89_15: # %cond.store11
10107 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
10108 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
10109 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
10110 ; RV32ZVE32F-NEXT: fsd fa6, 0(a0)
10111 ; RV32ZVE32F-NEXT: andi a0, a1, -128
10112 ; RV32ZVE32F-NEXT: beqz a0, .LBB89_8
10113 ; RV32ZVE32F-NEXT: .LBB89_16: # %cond.store13
10114 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
10115 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
10116 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
10117 ; RV32ZVE32F-NEXT: fsd fa7, 0(a0)
10118 ; RV32ZVE32F-NEXT: ret
10120 ; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i32_v8f64:
10121 ; RV64ZVE32F: # %bb.0:
10122 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10123 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
10124 ; RV64ZVE32F-NEXT: andi a2, a1, 1
10125 ; RV64ZVE32F-NEXT: beqz a2, .LBB89_2
10126 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
10127 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
10128 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
10129 ; RV64ZVE32F-NEXT: slli a2, a2, 32
10130 ; RV64ZVE32F-NEXT: srli a2, a2, 29
10131 ; RV64ZVE32F-NEXT: add a2, a0, a2
10132 ; RV64ZVE32F-NEXT: fsd fa0, 0(a2)
10133 ; RV64ZVE32F-NEXT: .LBB89_2: # %else
10134 ; RV64ZVE32F-NEXT: andi a2, a1, 2
10135 ; RV64ZVE32F-NEXT: beqz a2, .LBB89_4
10136 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
10137 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
10138 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
10139 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
10140 ; RV64ZVE32F-NEXT: slli a2, a2, 32
10141 ; RV64ZVE32F-NEXT: srli a2, a2, 29
10142 ; RV64ZVE32F-NEXT: add a2, a0, a2
10143 ; RV64ZVE32F-NEXT: fsd fa1, 0(a2)
10144 ; RV64ZVE32F-NEXT: .LBB89_4: # %else2
10145 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
10146 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
10147 ; RV64ZVE32F-NEXT: andi a2, a1, 4
10148 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
10149 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
10150 ; RV64ZVE32F-NEXT: bnez a2, .LBB89_12
10151 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
10152 ; RV64ZVE32F-NEXT: andi a2, a1, 8
10153 ; RV64ZVE32F-NEXT: bnez a2, .LBB89_13
10154 ; RV64ZVE32F-NEXT: .LBB89_6: # %else6
10155 ; RV64ZVE32F-NEXT: andi a2, a1, 16
10156 ; RV64ZVE32F-NEXT: bnez a2, .LBB89_14
10157 ; RV64ZVE32F-NEXT: .LBB89_7: # %else8
10158 ; RV64ZVE32F-NEXT: andi a2, a1, 32
10159 ; RV64ZVE32F-NEXT: beqz a2, .LBB89_9
10160 ; RV64ZVE32F-NEXT: .LBB89_8: # %cond.store9
10161 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
10162 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
10163 ; RV64ZVE32F-NEXT: slli a2, a2, 32
10164 ; RV64ZVE32F-NEXT: srli a2, a2, 29
10165 ; RV64ZVE32F-NEXT: add a2, a0, a2
10166 ; RV64ZVE32F-NEXT: fsd fa5, 0(a2)
10167 ; RV64ZVE32F-NEXT: .LBB89_9: # %else10
10168 ; RV64ZVE32F-NEXT: andi a2, a1, 64
10169 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
10170 ; RV64ZVE32F-NEXT: bnez a2, .LBB89_15
10171 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
10172 ; RV64ZVE32F-NEXT: andi a1, a1, -128
10173 ; RV64ZVE32F-NEXT: bnez a1, .LBB89_16
10174 ; RV64ZVE32F-NEXT: .LBB89_11: # %else14
10175 ; RV64ZVE32F-NEXT: ret
10176 ; RV64ZVE32F-NEXT: .LBB89_12: # %cond.store3
10177 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
10178 ; RV64ZVE32F-NEXT: slli a2, a2, 32
10179 ; RV64ZVE32F-NEXT: srli a2, a2, 29
10180 ; RV64ZVE32F-NEXT: add a2, a0, a2
10181 ; RV64ZVE32F-NEXT: fsd fa2, 0(a2)
10182 ; RV64ZVE32F-NEXT: andi a2, a1, 8
10183 ; RV64ZVE32F-NEXT: beqz a2, .LBB89_6
10184 ; RV64ZVE32F-NEXT: .LBB89_13: # %cond.store5
10185 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
10186 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
10187 ; RV64ZVE32F-NEXT: slli a2, a2, 32
10188 ; RV64ZVE32F-NEXT: srli a2, a2, 29
10189 ; RV64ZVE32F-NEXT: add a2, a0, a2
10190 ; RV64ZVE32F-NEXT: fsd fa3, 0(a2)
10191 ; RV64ZVE32F-NEXT: andi a2, a1, 16
10192 ; RV64ZVE32F-NEXT: beqz a2, .LBB89_7
10193 ; RV64ZVE32F-NEXT: .LBB89_14: # %cond.store7
10194 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
10195 ; RV64ZVE32F-NEXT: slli a2, a2, 32
10196 ; RV64ZVE32F-NEXT: srli a2, a2, 29
10197 ; RV64ZVE32F-NEXT: add a2, a0, a2
10198 ; RV64ZVE32F-NEXT: fsd fa4, 0(a2)
10199 ; RV64ZVE32F-NEXT: andi a2, a1, 32
10200 ; RV64ZVE32F-NEXT: bnez a2, .LBB89_8
10201 ; RV64ZVE32F-NEXT: j .LBB89_9
10202 ; RV64ZVE32F-NEXT: .LBB89_15: # %cond.store11
10203 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
10204 ; RV64ZVE32F-NEXT: slli a2, a2, 32
10205 ; RV64ZVE32F-NEXT: srli a2, a2, 29
10206 ; RV64ZVE32F-NEXT: add a2, a0, a2
10207 ; RV64ZVE32F-NEXT: fsd fa6, 0(a2)
10208 ; RV64ZVE32F-NEXT: andi a1, a1, -128
10209 ; RV64ZVE32F-NEXT: beqz a1, .LBB89_11
10210 ; RV64ZVE32F-NEXT: .LBB89_16: # %cond.store13
10211 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
10212 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8
10213 ; RV64ZVE32F-NEXT: slli a1, a1, 32
10214 ; RV64ZVE32F-NEXT: srli a1, a1, 29
10215 ; RV64ZVE32F-NEXT: add a0, a0, a1
10216 ; RV64ZVE32F-NEXT: fsd fa7, 0(a0)
10217 ; RV64ZVE32F-NEXT: ret
10218 %eidxs = zext <8 x i32> %idxs to <8 x i64>
10219 %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
10220 call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m)
10224 define void @mscatter_baseidx_v8f64(<8 x double> %val, ptr %base, <8 x i64> %idxs, <8 x i1> %m) {
10225 ; RV32V-LABEL: mscatter_baseidx_v8f64:
10227 ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
10228 ; RV32V-NEXT: vnsrl.wi v16, v12, 0
10229 ; RV32V-NEXT: vsll.vi v12, v16, 3
10230 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
10231 ; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t
10234 ; RV64-LABEL: mscatter_baseidx_v8f64:
10236 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
10237 ; RV64-NEXT: vsll.vi v12, v12, 3
10238 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
10241 ; RV32ZVE32F-LABEL: mscatter_baseidx_v8f64:
10242 ; RV32ZVE32F: # %bb.0:
10243 ; RV32ZVE32F-NEXT: lw a2, 56(a1)
10244 ; RV32ZVE32F-NEXT: lw a3, 48(a1)
10245 ; RV32ZVE32F-NEXT: lw a4, 40(a1)
10246 ; RV32ZVE32F-NEXT: lw a5, 32(a1)
10247 ; RV32ZVE32F-NEXT: lw a6, 0(a1)
10248 ; RV32ZVE32F-NEXT: lw a7, 8(a1)
10249 ; RV32ZVE32F-NEXT: lw t0, 16(a1)
10250 ; RV32ZVE32F-NEXT: lw a1, 24(a1)
10251 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
10252 ; RV32ZVE32F-NEXT: vmv.v.x v8, a6
10253 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a7
10254 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, t0
10255 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a1
10256 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a5
10257 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a4
10258 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a3
10259 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a2
10260 ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3
10261 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
10262 ; RV32ZVE32F-NEXT: vmv.x.s a1, v0
10263 ; RV32ZVE32F-NEXT: andi a2, a1, 1
10264 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
10265 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0
10266 ; RV32ZVE32F-NEXT: bnez a2, .LBB90_9
10267 ; RV32ZVE32F-NEXT: # %bb.1: # %else
10268 ; RV32ZVE32F-NEXT: andi a0, a1, 2
10269 ; RV32ZVE32F-NEXT: bnez a0, .LBB90_10
10270 ; RV32ZVE32F-NEXT: .LBB90_2: # %else2
10271 ; RV32ZVE32F-NEXT: andi a0, a1, 4
10272 ; RV32ZVE32F-NEXT: bnez a0, .LBB90_11
10273 ; RV32ZVE32F-NEXT: .LBB90_3: # %else4
10274 ; RV32ZVE32F-NEXT: andi a0, a1, 8
10275 ; RV32ZVE32F-NEXT: bnez a0, .LBB90_12
10276 ; RV32ZVE32F-NEXT: .LBB90_4: # %else6
10277 ; RV32ZVE32F-NEXT: andi a0, a1, 16
10278 ; RV32ZVE32F-NEXT: bnez a0, .LBB90_13
10279 ; RV32ZVE32F-NEXT: .LBB90_5: # %else8
10280 ; RV32ZVE32F-NEXT: andi a0, a1, 32
10281 ; RV32ZVE32F-NEXT: bnez a0, .LBB90_14
10282 ; RV32ZVE32F-NEXT: .LBB90_6: # %else10
10283 ; RV32ZVE32F-NEXT: andi a0, a1, 64
10284 ; RV32ZVE32F-NEXT: bnez a0, .LBB90_15
10285 ; RV32ZVE32F-NEXT: .LBB90_7: # %else12
10286 ; RV32ZVE32F-NEXT: andi a0, a1, -128
10287 ; RV32ZVE32F-NEXT: bnez a0, .LBB90_16
10288 ; RV32ZVE32F-NEXT: .LBB90_8: # %else14
10289 ; RV32ZVE32F-NEXT: ret
10290 ; RV32ZVE32F-NEXT: .LBB90_9: # %cond.store
10291 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
10292 ; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
10293 ; RV32ZVE32F-NEXT: andi a0, a1, 2
10294 ; RV32ZVE32F-NEXT: beqz a0, .LBB90_2
10295 ; RV32ZVE32F-NEXT: .LBB90_10: # %cond.store1
10296 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
10297 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
10298 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
10299 ; RV32ZVE32F-NEXT: fsd fa1, 0(a0)
10300 ; RV32ZVE32F-NEXT: andi a0, a1, 4
10301 ; RV32ZVE32F-NEXT: beqz a0, .LBB90_3
10302 ; RV32ZVE32F-NEXT: .LBB90_11: # %cond.store3
10303 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
10304 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
10305 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
10306 ; RV32ZVE32F-NEXT: fsd fa2, 0(a0)
10307 ; RV32ZVE32F-NEXT: andi a0, a1, 8
10308 ; RV32ZVE32F-NEXT: beqz a0, .LBB90_4
10309 ; RV32ZVE32F-NEXT: .LBB90_12: # %cond.store5
10310 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
10311 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
10312 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
10313 ; RV32ZVE32F-NEXT: fsd fa3, 0(a0)
10314 ; RV32ZVE32F-NEXT: andi a0, a1, 16
10315 ; RV32ZVE32F-NEXT: beqz a0, .LBB90_5
10316 ; RV32ZVE32F-NEXT: .LBB90_13: # %cond.store7
10317 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
10318 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
10319 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
10320 ; RV32ZVE32F-NEXT: fsd fa4, 0(a0)
10321 ; RV32ZVE32F-NEXT: andi a0, a1, 32
10322 ; RV32ZVE32F-NEXT: beqz a0, .LBB90_6
10323 ; RV32ZVE32F-NEXT: .LBB90_14: # %cond.store9
10324 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
10325 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
10326 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
10327 ; RV32ZVE32F-NEXT: fsd fa5, 0(a0)
10328 ; RV32ZVE32F-NEXT: andi a0, a1, 64
10329 ; RV32ZVE32F-NEXT: beqz a0, .LBB90_7
10330 ; RV32ZVE32F-NEXT: .LBB90_15: # %cond.store11
10331 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
10332 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
10333 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
10334 ; RV32ZVE32F-NEXT: fsd fa6, 0(a0)
10335 ; RV32ZVE32F-NEXT: andi a0, a1, -128
10336 ; RV32ZVE32F-NEXT: beqz a0, .LBB90_8
10337 ; RV32ZVE32F-NEXT: .LBB90_16: # %cond.store13
10338 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
10339 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
10340 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
10341 ; RV32ZVE32F-NEXT: fsd fa7, 0(a0)
10342 ; RV32ZVE32F-NEXT: ret
10344 ; RV64ZVE32F-LABEL: mscatter_baseidx_v8f64:
10345 ; RV64ZVE32F: # %bb.0:
10346 ; RV64ZVE32F-NEXT: ld t1, 8(a1)
10347 ; RV64ZVE32F-NEXT: ld t0, 16(a1)
10348 ; RV64ZVE32F-NEXT: ld a7, 24(a1)
10349 ; RV64ZVE32F-NEXT: ld a6, 32(a1)
10350 ; RV64ZVE32F-NEXT: ld a5, 40(a1)
10351 ; RV64ZVE32F-NEXT: ld a4, 48(a1)
10352 ; RV64ZVE32F-NEXT: ld a2, 56(a1)
10353 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10354 ; RV64ZVE32F-NEXT: vmv.x.s a3, v0
10355 ; RV64ZVE32F-NEXT: andi t2, a3, 1
10356 ; RV64ZVE32F-NEXT: bnez t2, .LBB90_9
10357 ; RV64ZVE32F-NEXT: # %bb.1: # %else
10358 ; RV64ZVE32F-NEXT: andi a1, a3, 2
10359 ; RV64ZVE32F-NEXT: bnez a1, .LBB90_10
10360 ; RV64ZVE32F-NEXT: .LBB90_2: # %else2
10361 ; RV64ZVE32F-NEXT: andi a1, a3, 4
10362 ; RV64ZVE32F-NEXT: bnez a1, .LBB90_11
10363 ; RV64ZVE32F-NEXT: .LBB90_3: # %else4
10364 ; RV64ZVE32F-NEXT: andi a1, a3, 8
10365 ; RV64ZVE32F-NEXT: bnez a1, .LBB90_12
10366 ; RV64ZVE32F-NEXT: .LBB90_4: # %else6
10367 ; RV64ZVE32F-NEXT: andi a1, a3, 16
10368 ; RV64ZVE32F-NEXT: bnez a1, .LBB90_13
10369 ; RV64ZVE32F-NEXT: .LBB90_5: # %else8
10370 ; RV64ZVE32F-NEXT: andi a1, a3, 32
10371 ; RV64ZVE32F-NEXT: bnez a1, .LBB90_14
10372 ; RV64ZVE32F-NEXT: .LBB90_6: # %else10
10373 ; RV64ZVE32F-NEXT: andi a1, a3, 64
10374 ; RV64ZVE32F-NEXT: bnez a1, .LBB90_15
10375 ; RV64ZVE32F-NEXT: .LBB90_7: # %else12
10376 ; RV64ZVE32F-NEXT: andi a1, a3, -128
10377 ; RV64ZVE32F-NEXT: bnez a1, .LBB90_16
10378 ; RV64ZVE32F-NEXT: .LBB90_8: # %else14
10379 ; RV64ZVE32F-NEXT: ret
10380 ; RV64ZVE32F-NEXT: .LBB90_9: # %cond.store
10381 ; RV64ZVE32F-NEXT: ld a1, 0(a1)
10382 ; RV64ZVE32F-NEXT: slli a1, a1, 3
10383 ; RV64ZVE32F-NEXT: add a1, a0, a1
10384 ; RV64ZVE32F-NEXT: fsd fa0, 0(a1)
10385 ; RV64ZVE32F-NEXT: andi a1, a3, 2
10386 ; RV64ZVE32F-NEXT: beqz a1, .LBB90_2
10387 ; RV64ZVE32F-NEXT: .LBB90_10: # %cond.store1
10388 ; RV64ZVE32F-NEXT: slli t1, t1, 3
10389 ; RV64ZVE32F-NEXT: add t1, a0, t1
10390 ; RV64ZVE32F-NEXT: fsd fa1, 0(t1)
10391 ; RV64ZVE32F-NEXT: andi a1, a3, 4
10392 ; RV64ZVE32F-NEXT: beqz a1, .LBB90_3
10393 ; RV64ZVE32F-NEXT: .LBB90_11: # %cond.store3
10394 ; RV64ZVE32F-NEXT: slli t0, t0, 3
10395 ; RV64ZVE32F-NEXT: add t0, a0, t0
10396 ; RV64ZVE32F-NEXT: fsd fa2, 0(t0)
10397 ; RV64ZVE32F-NEXT: andi a1, a3, 8
10398 ; RV64ZVE32F-NEXT: beqz a1, .LBB90_4
10399 ; RV64ZVE32F-NEXT: .LBB90_12: # %cond.store5
10400 ; RV64ZVE32F-NEXT: slli a7, a7, 3
10401 ; RV64ZVE32F-NEXT: add a7, a0, a7
10402 ; RV64ZVE32F-NEXT: fsd fa3, 0(a7)
10403 ; RV64ZVE32F-NEXT: andi a1, a3, 16
10404 ; RV64ZVE32F-NEXT: beqz a1, .LBB90_5
10405 ; RV64ZVE32F-NEXT: .LBB90_13: # %cond.store7
10406 ; RV64ZVE32F-NEXT: slli a6, a6, 3
10407 ; RV64ZVE32F-NEXT: add a6, a0, a6
10408 ; RV64ZVE32F-NEXT: fsd fa4, 0(a6)
10409 ; RV64ZVE32F-NEXT: andi a1, a3, 32
10410 ; RV64ZVE32F-NEXT: beqz a1, .LBB90_6
10411 ; RV64ZVE32F-NEXT: .LBB90_14: # %cond.store9
10412 ; RV64ZVE32F-NEXT: slli a5, a5, 3
10413 ; RV64ZVE32F-NEXT: add a5, a0, a5
10414 ; RV64ZVE32F-NEXT: fsd fa5, 0(a5)
10415 ; RV64ZVE32F-NEXT: andi a1, a3, 64
10416 ; RV64ZVE32F-NEXT: beqz a1, .LBB90_7
10417 ; RV64ZVE32F-NEXT: .LBB90_15: # %cond.store11
10418 ; RV64ZVE32F-NEXT: slli a4, a4, 3
10419 ; RV64ZVE32F-NEXT: add a4, a0, a4
10420 ; RV64ZVE32F-NEXT: fsd fa6, 0(a4)
10421 ; RV64ZVE32F-NEXT: andi a1, a3, -128
10422 ; RV64ZVE32F-NEXT: beqz a1, .LBB90_8
10423 ; RV64ZVE32F-NEXT: .LBB90_16: # %cond.store13
10424 ; RV64ZVE32F-NEXT: slli a2, a2, 3
10425 ; RV64ZVE32F-NEXT: add a0, a0, a2
10426 ; RV64ZVE32F-NEXT: fsd fa7, 0(a0)
10427 ; RV64ZVE32F-NEXT: ret
10428 %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %idxs
10429 call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m)
10433 declare void @llvm.masked.scatter.v16i8.v16p0(<16 x i8>, <16 x ptr>, i32, <16 x i1>)
10435 define void @mscatter_baseidx_v16i8(<16 x i8> %val, ptr %base, <16 x i8> %idxs, <16 x i1> %m) {
10436 ; RV32-LABEL: mscatter_baseidx_v16i8:
10438 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
10439 ; RV32-NEXT: vsext.vf4 v12, v9
10440 ; RV32-NEXT: vsetvli zero, zero, e8, m1, ta, ma
10441 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
10444 ; RV64-LABEL: mscatter_baseidx_v16i8:
10446 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
10447 ; RV64-NEXT: vsext.vf8 v16, v9
10448 ; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, ma
10449 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
10452 ; RV64ZVE32F-LABEL: mscatter_baseidx_v16i8:
10453 ; RV64ZVE32F: # %bb.0:
10454 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
10455 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
10456 ; RV64ZVE32F-NEXT: andi a2, a1, 1
10457 ; RV64ZVE32F-NEXT: beqz a2, .LBB91_2
10458 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
10459 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10460 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
10461 ; RV64ZVE32F-NEXT: add a2, a0, a2
10462 ; RV64ZVE32F-NEXT: vse8.v v8, (a2)
10463 ; RV64ZVE32F-NEXT: .LBB91_2: # %else
10464 ; RV64ZVE32F-NEXT: andi a2, a1, 2
10465 ; RV64ZVE32F-NEXT: beqz a2, .LBB91_4
10466 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
10467 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
10468 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1
10469 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
10470 ; RV64ZVE32F-NEXT: add a2, a0, a2
10471 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10472 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
10473 ; RV64ZVE32F-NEXT: vse8.v v10, (a2)
10474 ; RV64ZVE32F-NEXT: .LBB91_4: # %else2
10475 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
10476 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4
10477 ; RV64ZVE32F-NEXT: andi a2, a1, 4
10478 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
10479 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v9, 2
10480 ; RV64ZVE32F-NEXT: bnez a2, .LBB91_25
10481 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
10482 ; RV64ZVE32F-NEXT: andi a2, a1, 8
10483 ; RV64ZVE32F-NEXT: bnez a2, .LBB91_26
10484 ; RV64ZVE32F-NEXT: .LBB91_6: # %else6
10485 ; RV64ZVE32F-NEXT: andi a2, a1, 16
10486 ; RV64ZVE32F-NEXT: beqz a2, .LBB91_8
10487 ; RV64ZVE32F-NEXT: .LBB91_7: # %cond.store7
10488 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10489 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
10490 ; RV64ZVE32F-NEXT: add a2, a0, a2
10491 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 4
10492 ; RV64ZVE32F-NEXT: vse8.v v11, (a2)
10493 ; RV64ZVE32F-NEXT: .LBB91_8: # %else8
10494 ; RV64ZVE32F-NEXT: andi a2, a1, 32
10495 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, ta, ma
10496 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 8
10497 ; RV64ZVE32F-NEXT: beqz a2, .LBB91_10
10498 ; RV64ZVE32F-NEXT: # %bb.9: # %cond.store9
10499 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
10500 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1
10501 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
10502 ; RV64ZVE32F-NEXT: add a2, a0, a2
10503 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10504 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 5
10505 ; RV64ZVE32F-NEXT: vse8.v v11, (a2)
10506 ; RV64ZVE32F-NEXT: .LBB91_10: # %else10
10507 ; RV64ZVE32F-NEXT: andi a2, a1, 64
10508 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
10509 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
10510 ; RV64ZVE32F-NEXT: bnez a2, .LBB91_27
10511 ; RV64ZVE32F-NEXT: # %bb.11: # %else12
10512 ; RV64ZVE32F-NEXT: andi a2, a1, 128
10513 ; RV64ZVE32F-NEXT: bnez a2, .LBB91_28
10514 ; RV64ZVE32F-NEXT: .LBB91_12: # %else14
10515 ; RV64ZVE32F-NEXT: andi a2, a1, 256
10516 ; RV64ZVE32F-NEXT: bnez a2, .LBB91_29
10517 ; RV64ZVE32F-NEXT: .LBB91_13: # %else16
10518 ; RV64ZVE32F-NEXT: andi a2, a1, 512
10519 ; RV64ZVE32F-NEXT: beqz a2, .LBB91_15
10520 ; RV64ZVE32F-NEXT: .LBB91_14: # %cond.store17
10521 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
10522 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1
10523 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
10524 ; RV64ZVE32F-NEXT: add a2, a0, a2
10525 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10526 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 9
10527 ; RV64ZVE32F-NEXT: vse8.v v10, (a2)
10528 ; RV64ZVE32F-NEXT: .LBB91_15: # %else18
10529 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
10530 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4
10531 ; RV64ZVE32F-NEXT: andi a2, a1, 1024
10532 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
10533 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2
10534 ; RV64ZVE32F-NEXT: bnez a2, .LBB91_30
10535 ; RV64ZVE32F-NEXT: # %bb.16: # %else20
10536 ; RV64ZVE32F-NEXT: slli a2, a1, 52
10537 ; RV64ZVE32F-NEXT: bltz a2, .LBB91_31
10538 ; RV64ZVE32F-NEXT: .LBB91_17: # %else22
10539 ; RV64ZVE32F-NEXT: slli a2, a1, 51
10540 ; RV64ZVE32F-NEXT: bltz a2, .LBB91_32
10541 ; RV64ZVE32F-NEXT: .LBB91_18: # %else24
10542 ; RV64ZVE32F-NEXT: slli a2, a1, 50
10543 ; RV64ZVE32F-NEXT: bgez a2, .LBB91_20
10544 ; RV64ZVE32F-NEXT: .LBB91_19: # %cond.store25
10545 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
10546 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 1
10547 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
10548 ; RV64ZVE32F-NEXT: add a2, a0, a2
10549 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10550 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 13
10551 ; RV64ZVE32F-NEXT: vse8.v v9, (a2)
10552 ; RV64ZVE32F-NEXT: .LBB91_20: # %else26
10553 ; RV64ZVE32F-NEXT: slli a2, a1, 49
10554 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
10555 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 2
10556 ; RV64ZVE32F-NEXT: bgez a2, .LBB91_22
10557 ; RV64ZVE32F-NEXT: # %bb.21: # %cond.store27
10558 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
10559 ; RV64ZVE32F-NEXT: add a2, a0, a2
10560 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10561 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 14
10562 ; RV64ZVE32F-NEXT: vse8.v v10, (a2)
10563 ; RV64ZVE32F-NEXT: .LBB91_22: # %else28
10564 ; RV64ZVE32F-NEXT: lui a2, 1048568
10565 ; RV64ZVE32F-NEXT: and a1, a1, a2
10566 ; RV64ZVE32F-NEXT: beqz a1, .LBB91_24
10567 ; RV64ZVE32F-NEXT: # %bb.23: # %cond.store29
10568 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
10569 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
10570 ; RV64ZVE32F-NEXT: vmv.x.s a1, v9
10571 ; RV64ZVE32F-NEXT: add a0, a0, a1
10572 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10573 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 15
10574 ; RV64ZVE32F-NEXT: vse8.v v8, (a0)
10575 ; RV64ZVE32F-NEXT: .LBB91_24: # %else30
10576 ; RV64ZVE32F-NEXT: ret
10577 ; RV64ZVE32F-NEXT: .LBB91_25: # %cond.store3
10578 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
10579 ; RV64ZVE32F-NEXT: add a2, a0, a2
10580 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10581 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2
10582 ; RV64ZVE32F-NEXT: vse8.v v12, (a2)
10583 ; RV64ZVE32F-NEXT: andi a2, a1, 8
10584 ; RV64ZVE32F-NEXT: beqz a2, .LBB91_6
10585 ; RV64ZVE32F-NEXT: .LBB91_26: # %cond.store5
10586 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
10587 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1
10588 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
10589 ; RV64ZVE32F-NEXT: add a2, a0, a2
10590 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10591 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 3
10592 ; RV64ZVE32F-NEXT: vse8.v v11, (a2)
10593 ; RV64ZVE32F-NEXT: andi a2, a1, 16
10594 ; RV64ZVE32F-NEXT: bnez a2, .LBB91_7
10595 ; RV64ZVE32F-NEXT: j .LBB91_8
10596 ; RV64ZVE32F-NEXT: .LBB91_27: # %cond.store11
10597 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
10598 ; RV64ZVE32F-NEXT: add a2, a0, a2
10599 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10600 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 6
10601 ; RV64ZVE32F-NEXT: vse8.v v11, (a2)
10602 ; RV64ZVE32F-NEXT: andi a2, a1, 128
10603 ; RV64ZVE32F-NEXT: beqz a2, .LBB91_12
10604 ; RV64ZVE32F-NEXT: .LBB91_28: # %cond.store13
10605 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
10606 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
10607 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
10608 ; RV64ZVE32F-NEXT: add a2, a0, a2
10609 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10610 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 7
10611 ; RV64ZVE32F-NEXT: vse8.v v10, (a2)
10612 ; RV64ZVE32F-NEXT: andi a2, a1, 256
10613 ; RV64ZVE32F-NEXT: beqz a2, .LBB91_13
10614 ; RV64ZVE32F-NEXT: .LBB91_29: # %cond.store15
10615 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10616 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
10617 ; RV64ZVE32F-NEXT: add a2, a0, a2
10618 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 8
10619 ; RV64ZVE32F-NEXT: vse8.v v10, (a2)
10620 ; RV64ZVE32F-NEXT: andi a2, a1, 512
10621 ; RV64ZVE32F-NEXT: bnez a2, .LBB91_14
10622 ; RV64ZVE32F-NEXT: j .LBB91_15
10623 ; RV64ZVE32F-NEXT: .LBB91_30: # %cond.store19
10624 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
10625 ; RV64ZVE32F-NEXT: add a2, a0, a2
10626 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10627 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 10
10628 ; RV64ZVE32F-NEXT: vse8.v v11, (a2)
10629 ; RV64ZVE32F-NEXT: slli a2, a1, 52
10630 ; RV64ZVE32F-NEXT: bgez a2, .LBB91_17
10631 ; RV64ZVE32F-NEXT: .LBB91_31: # %cond.store21
10632 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
10633 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
10634 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
10635 ; RV64ZVE32F-NEXT: add a2, a0, a2
10636 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10637 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 11
10638 ; RV64ZVE32F-NEXT: vse8.v v9, (a2)
10639 ; RV64ZVE32F-NEXT: slli a2, a1, 51
10640 ; RV64ZVE32F-NEXT: bgez a2, .LBB91_18
10641 ; RV64ZVE32F-NEXT: .LBB91_32: # %cond.store23
10642 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10643 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
10644 ; RV64ZVE32F-NEXT: add a2, a0, a2
10645 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 12
10646 ; RV64ZVE32F-NEXT: vse8.v v9, (a2)
10647 ; RV64ZVE32F-NEXT: slli a2, a1, 50
10648 ; RV64ZVE32F-NEXT: bltz a2, .LBB91_19
10649 ; RV64ZVE32F-NEXT: j .LBB91_20
10650 %ptrs = getelementptr inbounds i8, ptr %base, <16 x i8> %idxs
10651 call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> %val, <16 x ptr> %ptrs, i32 1, <16 x i1> %m)
10655 declare void @llvm.masked.scatter.v32i8.v32p0(<32 x i8>, <32 x ptr>, i32, <32 x i1>)
10657 define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs, <32 x i1> %m) {
10658 ; RV32-LABEL: mscatter_baseidx_v32i8:
10660 ; RV32-NEXT: li a1, 32
10661 ; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma
10662 ; RV32-NEXT: vsext.vf4 v16, v10
10663 ; RV32-NEXT: vsetvli zero, zero, e8, m2, ta, ma
10664 ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
10667 ; RV64-LABEL: mscatter_baseidx_v32i8:
10669 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
10670 ; RV64-NEXT: vsext.vf8 v16, v10
10671 ; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, ma
10672 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
10673 ; RV64-NEXT: vsetivli zero, 16, e8, m2, ta, ma
10674 ; RV64-NEXT: vslidedown.vi v8, v8, 16
10675 ; RV64-NEXT: vslidedown.vi v10, v10, 16
10676 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
10677 ; RV64-NEXT: vslidedown.vi v0, v0, 2
10678 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
10679 ; RV64-NEXT: vsext.vf8 v16, v10
10680 ; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, ma
10681 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
10684 ; RV64ZVE32F-LABEL: mscatter_baseidx_v32i8:
10685 ; RV64ZVE32F: # %bb.0:
10686 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
10687 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
10688 ; RV64ZVE32F-NEXT: andi a2, a1, 1
10689 ; RV64ZVE32F-NEXT: beqz a2, .LBB92_2
10690 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
10691 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10692 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
10693 ; RV64ZVE32F-NEXT: add a2, a0, a2
10694 ; RV64ZVE32F-NEXT: vse8.v v8, (a2)
10695 ; RV64ZVE32F-NEXT: .LBB92_2: # %else
10696 ; RV64ZVE32F-NEXT: andi a2, a1, 2
10697 ; RV64ZVE32F-NEXT: beqz a2, .LBB92_4
10698 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
10699 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
10700 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v10, 1
10701 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12
10702 ; RV64ZVE32F-NEXT: add a2, a0, a2
10703 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10704 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 1
10705 ; RV64ZVE32F-NEXT: vse8.v v12, (a2)
10706 ; RV64ZVE32F-NEXT: .LBB92_4: # %else2
10707 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
10708 ; RV64ZVE32F-NEXT: vslidedown.vi v13, v10, 4
10709 ; RV64ZVE32F-NEXT: andi a2, a1, 4
10710 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
10711 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v10, 2
10712 ; RV64ZVE32F-NEXT: bnez a2, .LBB92_49
10713 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
10714 ; RV64ZVE32F-NEXT: andi a2, a1, 8
10715 ; RV64ZVE32F-NEXT: bnez a2, .LBB92_50
10716 ; RV64ZVE32F-NEXT: .LBB92_6: # %else6
10717 ; RV64ZVE32F-NEXT: andi a2, a1, 16
10718 ; RV64ZVE32F-NEXT: beqz a2, .LBB92_8
10719 ; RV64ZVE32F-NEXT: .LBB92_7: # %cond.store7
10720 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10721 ; RV64ZVE32F-NEXT: vmv.x.s a2, v13
10722 ; RV64ZVE32F-NEXT: add a2, a0, a2
10723 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
10724 ; RV64ZVE32F-NEXT: vse8.v v12, (a2)
10725 ; RV64ZVE32F-NEXT: .LBB92_8: # %else8
10726 ; RV64ZVE32F-NEXT: andi a2, a1, 32
10727 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, ta, ma
10728 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v10, 8
10729 ; RV64ZVE32F-NEXT: beqz a2, .LBB92_10
10730 ; RV64ZVE32F-NEXT: # %bb.9: # %cond.store9
10731 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
10732 ; RV64ZVE32F-NEXT: vslidedown.vi v14, v13, 1
10733 ; RV64ZVE32F-NEXT: vmv.x.s a2, v14
10734 ; RV64ZVE32F-NEXT: add a2, a0, a2
10735 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10736 ; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 5
10737 ; RV64ZVE32F-NEXT: vse8.v v14, (a2)
10738 ; RV64ZVE32F-NEXT: .LBB92_10: # %else10
10739 ; RV64ZVE32F-NEXT: andi a2, a1, 64
10740 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
10741 ; RV64ZVE32F-NEXT: vslidedown.vi v13, v13, 2
10742 ; RV64ZVE32F-NEXT: bnez a2, .LBB92_51
10743 ; RV64ZVE32F-NEXT: # %bb.11: # %else12
10744 ; RV64ZVE32F-NEXT: andi a2, a1, 128
10745 ; RV64ZVE32F-NEXT: bnez a2, .LBB92_52
10746 ; RV64ZVE32F-NEXT: .LBB92_12: # %else14
10747 ; RV64ZVE32F-NEXT: andi a2, a1, 256
10748 ; RV64ZVE32F-NEXT: bnez a2, .LBB92_53
10749 ; RV64ZVE32F-NEXT: .LBB92_13: # %else16
10750 ; RV64ZVE32F-NEXT: andi a2, a1, 512
10751 ; RV64ZVE32F-NEXT: beqz a2, .LBB92_15
10752 ; RV64ZVE32F-NEXT: .LBB92_14: # %cond.store17
10753 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
10754 ; RV64ZVE32F-NEXT: vslidedown.vi v13, v12, 1
10755 ; RV64ZVE32F-NEXT: vmv.x.s a2, v13
10756 ; RV64ZVE32F-NEXT: add a2, a0, a2
10757 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10758 ; RV64ZVE32F-NEXT: vslidedown.vi v13, v8, 9
10759 ; RV64ZVE32F-NEXT: vse8.v v13, (a2)
10760 ; RV64ZVE32F-NEXT: .LBB92_15: # %else18
10761 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
10762 ; RV64ZVE32F-NEXT: vslidedown.vi v13, v12, 4
10763 ; RV64ZVE32F-NEXT: andi a2, a1, 1024
10764 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
10765 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 2
10766 ; RV64ZVE32F-NEXT: beqz a2, .LBB92_17
10767 ; RV64ZVE32F-NEXT: # %bb.16: # %cond.store19
10768 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12
10769 ; RV64ZVE32F-NEXT: add a2, a0, a2
10770 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10771 ; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 10
10772 ; RV64ZVE32F-NEXT: vse8.v v14, (a2)
10773 ; RV64ZVE32F-NEXT: .LBB92_17: # %else20
10774 ; RV64ZVE32F-NEXT: slli a2, a1, 52
10775 ; RV64ZVE32F-NEXT: bgez a2, .LBB92_19
10776 ; RV64ZVE32F-NEXT: # %bb.18: # %cond.store21
10777 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
10778 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 1
10779 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12
10780 ; RV64ZVE32F-NEXT: add a2, a0, a2
10781 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10782 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 11
10783 ; RV64ZVE32F-NEXT: vse8.v v12, (a2)
10784 ; RV64ZVE32F-NEXT: .LBB92_19: # %else22
10785 ; RV64ZVE32F-NEXT: slli a2, a1, 51
10786 ; RV64ZVE32F-NEXT: vsetivli zero, 16, e8, m2, ta, ma
10787 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 16
10788 ; RV64ZVE32F-NEXT: bgez a2, .LBB92_21
10789 ; RV64ZVE32F-NEXT: # %bb.20: # %cond.store23
10790 ; RV64ZVE32F-NEXT: vmv.x.s a2, v13
10791 ; RV64ZVE32F-NEXT: add a2, a0, a2
10792 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10793 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 12
10794 ; RV64ZVE32F-NEXT: vse8.v v11, (a2)
10795 ; RV64ZVE32F-NEXT: .LBB92_21: # %else24
10796 ; RV64ZVE32F-NEXT: slli a2, a1, 50
10797 ; RV64ZVE32F-NEXT: bgez a2, .LBB92_23
10798 ; RV64ZVE32F-NEXT: # %bb.22: # %cond.store25
10799 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
10800 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v13, 1
10801 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
10802 ; RV64ZVE32F-NEXT: add a2, a0, a2
10803 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10804 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 13
10805 ; RV64ZVE32F-NEXT: vse8.v v11, (a2)
10806 ; RV64ZVE32F-NEXT: .LBB92_23: # %else26
10807 ; RV64ZVE32F-NEXT: slli a2, a1, 49
10808 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
10809 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v13, 2
10810 ; RV64ZVE32F-NEXT: bltz a2, .LBB92_54
10811 ; RV64ZVE32F-NEXT: # %bb.24: # %else28
10812 ; RV64ZVE32F-NEXT: slli a2, a1, 48
10813 ; RV64ZVE32F-NEXT: bltz a2, .LBB92_55
10814 ; RV64ZVE32F-NEXT: .LBB92_25: # %else30
10815 ; RV64ZVE32F-NEXT: slli a2, a1, 47
10816 ; RV64ZVE32F-NEXT: bltz a2, .LBB92_56
10817 ; RV64ZVE32F-NEXT: .LBB92_26: # %else32
10818 ; RV64ZVE32F-NEXT: slli a2, a1, 46
10819 ; RV64ZVE32F-NEXT: bgez a2, .LBB92_28
10820 ; RV64ZVE32F-NEXT: .LBB92_27: # %cond.store33
10821 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
10822 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1
10823 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
10824 ; RV64ZVE32F-NEXT: add a2, a0, a2
10825 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
10826 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 17
10827 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10828 ; RV64ZVE32F-NEXT: vse8.v v12, (a2)
10829 ; RV64ZVE32F-NEXT: .LBB92_28: # %else34
10830 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
10831 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4
10832 ; RV64ZVE32F-NEXT: slli a2, a1, 45
10833 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
10834 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v10, 2
10835 ; RV64ZVE32F-NEXT: bltz a2, .LBB92_57
10836 ; RV64ZVE32F-NEXT: # %bb.29: # %else36
10837 ; RV64ZVE32F-NEXT: slli a2, a1, 44
10838 ; RV64ZVE32F-NEXT: bltz a2, .LBB92_58
10839 ; RV64ZVE32F-NEXT: .LBB92_30: # %else38
10840 ; RV64ZVE32F-NEXT: slli a2, a1, 43
10841 ; RV64ZVE32F-NEXT: bgez a2, .LBB92_32
10842 ; RV64ZVE32F-NEXT: .LBB92_31: # %cond.store39
10843 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
10844 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
10845 ; RV64ZVE32F-NEXT: add a2, a0, a2
10846 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 20
10847 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10848 ; RV64ZVE32F-NEXT: vse8.v v12, (a2)
10849 ; RV64ZVE32F-NEXT: .LBB92_32: # %else40
10850 ; RV64ZVE32F-NEXT: slli a2, a1, 42
10851 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, ta, ma
10852 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 8
10853 ; RV64ZVE32F-NEXT: bgez a2, .LBB92_34
10854 ; RV64ZVE32F-NEXT: # %bb.33: # %cond.store41
10855 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
10856 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v11, 1
10857 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12
10858 ; RV64ZVE32F-NEXT: add a2, a0, a2
10859 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
10860 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 21
10861 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10862 ; RV64ZVE32F-NEXT: vse8.v v12, (a2)
10863 ; RV64ZVE32F-NEXT: .LBB92_34: # %else42
10864 ; RV64ZVE32F-NEXT: slli a2, a1, 41
10865 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
10866 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 2
10867 ; RV64ZVE32F-NEXT: bltz a2, .LBB92_59
10868 ; RV64ZVE32F-NEXT: # %bb.35: # %else44
10869 ; RV64ZVE32F-NEXT: slli a2, a1, 40
10870 ; RV64ZVE32F-NEXT: bltz a2, .LBB92_60
10871 ; RV64ZVE32F-NEXT: .LBB92_36: # %else46
10872 ; RV64ZVE32F-NEXT: slli a2, a1, 39
10873 ; RV64ZVE32F-NEXT: bltz a2, .LBB92_61
10874 ; RV64ZVE32F-NEXT: .LBB92_37: # %else48
10875 ; RV64ZVE32F-NEXT: slli a2, a1, 38
10876 ; RV64ZVE32F-NEXT: bgez a2, .LBB92_39
10877 ; RV64ZVE32F-NEXT: .LBB92_38: # %cond.store49
10878 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
10879 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1
10880 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
10881 ; RV64ZVE32F-NEXT: add a2, a0, a2
10882 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
10883 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 25
10884 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10885 ; RV64ZVE32F-NEXT: vse8.v v12, (a2)
10886 ; RV64ZVE32F-NEXT: .LBB92_39: # %else50
10887 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
10888 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4
10889 ; RV64ZVE32F-NEXT: slli a2, a1, 37
10890 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
10891 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
10892 ; RV64ZVE32F-NEXT: bltz a2, .LBB92_62
10893 ; RV64ZVE32F-NEXT: # %bb.40: # %else52
10894 ; RV64ZVE32F-NEXT: slli a2, a1, 36
10895 ; RV64ZVE32F-NEXT: bltz a2, .LBB92_63
10896 ; RV64ZVE32F-NEXT: .LBB92_41: # %else54
10897 ; RV64ZVE32F-NEXT: slli a2, a1, 35
10898 ; RV64ZVE32F-NEXT: bltz a2, .LBB92_64
10899 ; RV64ZVE32F-NEXT: .LBB92_42: # %else56
10900 ; RV64ZVE32F-NEXT: slli a2, a1, 34
10901 ; RV64ZVE32F-NEXT: bgez a2, .LBB92_44
10902 ; RV64ZVE32F-NEXT: .LBB92_43: # %cond.store57
10903 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
10904 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 1
10905 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
10906 ; RV64ZVE32F-NEXT: add a2, a0, a2
10907 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
10908 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 29
10909 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10910 ; RV64ZVE32F-NEXT: vse8.v v12, (a2)
10911 ; RV64ZVE32F-NEXT: .LBB92_44: # %else58
10912 ; RV64ZVE32F-NEXT: slli a2, a1, 33
10913 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
10914 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2
10915 ; RV64ZVE32F-NEXT: bgez a2, .LBB92_46
10916 ; RV64ZVE32F-NEXT: # %bb.45: # %cond.store59
10917 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
10918 ; RV64ZVE32F-NEXT: add a2, a0, a2
10919 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
10920 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 30
10921 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10922 ; RV64ZVE32F-NEXT: vse8.v v12, (a2)
10923 ; RV64ZVE32F-NEXT: .LBB92_46: # %else60
10924 ; RV64ZVE32F-NEXT: lui a2, 524288
10925 ; RV64ZVE32F-NEXT: and a1, a1, a2
10926 ; RV64ZVE32F-NEXT: beqz a1, .LBB92_48
10927 ; RV64ZVE32F-NEXT: # %bb.47: # %cond.store61
10928 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
10929 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
10930 ; RV64ZVE32F-NEXT: vmv.x.s a1, v10
10931 ; RV64ZVE32F-NEXT: add a0, a0, a1
10932 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
10933 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 31
10934 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10935 ; RV64ZVE32F-NEXT: vse8.v v8, (a0)
10936 ; RV64ZVE32F-NEXT: .LBB92_48: # %else62
10937 ; RV64ZVE32F-NEXT: ret
10938 ; RV64ZVE32F-NEXT: .LBB92_49: # %cond.store3
10939 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12
10940 ; RV64ZVE32F-NEXT: add a2, a0, a2
10941 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10942 ; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 2
10943 ; RV64ZVE32F-NEXT: vse8.v v14, (a2)
10944 ; RV64ZVE32F-NEXT: andi a2, a1, 8
10945 ; RV64ZVE32F-NEXT: beqz a2, .LBB92_6
10946 ; RV64ZVE32F-NEXT: .LBB92_50: # %cond.store5
10947 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
10948 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 1
10949 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12
10950 ; RV64ZVE32F-NEXT: add a2, a0, a2
10951 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10952 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 3
10953 ; RV64ZVE32F-NEXT: vse8.v v12, (a2)
10954 ; RV64ZVE32F-NEXT: andi a2, a1, 16
10955 ; RV64ZVE32F-NEXT: bnez a2, .LBB92_7
10956 ; RV64ZVE32F-NEXT: j .LBB92_8
10957 ; RV64ZVE32F-NEXT: .LBB92_51: # %cond.store11
10958 ; RV64ZVE32F-NEXT: vmv.x.s a2, v13
10959 ; RV64ZVE32F-NEXT: add a2, a0, a2
10960 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10961 ; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 6
10962 ; RV64ZVE32F-NEXT: vse8.v v14, (a2)
10963 ; RV64ZVE32F-NEXT: andi a2, a1, 128
10964 ; RV64ZVE32F-NEXT: beqz a2, .LBB92_12
10965 ; RV64ZVE32F-NEXT: .LBB92_52: # %cond.store13
10966 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
10967 ; RV64ZVE32F-NEXT: vslidedown.vi v13, v13, 1
10968 ; RV64ZVE32F-NEXT: vmv.x.s a2, v13
10969 ; RV64ZVE32F-NEXT: add a2, a0, a2
10970 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10971 ; RV64ZVE32F-NEXT: vslidedown.vi v13, v8, 7
10972 ; RV64ZVE32F-NEXT: vse8.v v13, (a2)
10973 ; RV64ZVE32F-NEXT: andi a2, a1, 256
10974 ; RV64ZVE32F-NEXT: beqz a2, .LBB92_13
10975 ; RV64ZVE32F-NEXT: .LBB92_53: # %cond.store15
10976 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10977 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12
10978 ; RV64ZVE32F-NEXT: add a2, a0, a2
10979 ; RV64ZVE32F-NEXT: vslidedown.vi v13, v8, 8
10980 ; RV64ZVE32F-NEXT: vse8.v v13, (a2)
10981 ; RV64ZVE32F-NEXT: andi a2, a1, 512
10982 ; RV64ZVE32F-NEXT: bnez a2, .LBB92_14
10983 ; RV64ZVE32F-NEXT: j .LBB92_15
10984 ; RV64ZVE32F-NEXT: .LBB92_54: # %cond.store27
10985 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
10986 ; RV64ZVE32F-NEXT: add a2, a0, a2
10987 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10988 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 14
10989 ; RV64ZVE32F-NEXT: vse8.v v12, (a2)
10990 ; RV64ZVE32F-NEXT: slli a2, a1, 48
10991 ; RV64ZVE32F-NEXT: bgez a2, .LBB92_25
10992 ; RV64ZVE32F-NEXT: .LBB92_55: # %cond.store29
10993 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
10994 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1
10995 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
10996 ; RV64ZVE32F-NEXT: add a2, a0, a2
10997 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10998 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 15
10999 ; RV64ZVE32F-NEXT: vse8.v v11, (a2)
11000 ; RV64ZVE32F-NEXT: slli a2, a1, 47
11001 ; RV64ZVE32F-NEXT: bgez a2, .LBB92_26
11002 ; RV64ZVE32F-NEXT: .LBB92_56: # %cond.store31
11003 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
11004 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
11005 ; RV64ZVE32F-NEXT: add a2, a0, a2
11006 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 16
11007 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
11008 ; RV64ZVE32F-NEXT: vse8.v v12, (a2)
11009 ; RV64ZVE32F-NEXT: slli a2, a1, 46
11010 ; RV64ZVE32F-NEXT: bltz a2, .LBB92_27
11011 ; RV64ZVE32F-NEXT: j .LBB92_28
11012 ; RV64ZVE32F-NEXT: .LBB92_57: # %cond.store35
11013 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12
11014 ; RV64ZVE32F-NEXT: add a2, a0, a2
11015 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
11016 ; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 18
11017 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
11018 ; RV64ZVE32F-NEXT: vse8.v v14, (a2)
11019 ; RV64ZVE32F-NEXT: slli a2, a1, 44
11020 ; RV64ZVE32F-NEXT: bgez a2, .LBB92_30
11021 ; RV64ZVE32F-NEXT: .LBB92_58: # %cond.store37
11022 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
11023 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 1
11024 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12
11025 ; RV64ZVE32F-NEXT: add a2, a0, a2
11026 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
11027 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 19
11028 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
11029 ; RV64ZVE32F-NEXT: vse8.v v12, (a2)
11030 ; RV64ZVE32F-NEXT: slli a2, a1, 43
11031 ; RV64ZVE32F-NEXT: bltz a2, .LBB92_31
11032 ; RV64ZVE32F-NEXT: j .LBB92_32
11033 ; RV64ZVE32F-NEXT: .LBB92_59: # %cond.store43
11034 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
11035 ; RV64ZVE32F-NEXT: add a2, a0, a2
11036 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
11037 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 22
11038 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
11039 ; RV64ZVE32F-NEXT: vse8.v v12, (a2)
11040 ; RV64ZVE32F-NEXT: slli a2, a1, 40
11041 ; RV64ZVE32F-NEXT: bgez a2, .LBB92_36
11042 ; RV64ZVE32F-NEXT: .LBB92_60: # %cond.store45
11043 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
11044 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1
11045 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
11046 ; RV64ZVE32F-NEXT: add a2, a0, a2
11047 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
11048 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 23
11049 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
11050 ; RV64ZVE32F-NEXT: vse8.v v12, (a2)
11051 ; RV64ZVE32F-NEXT: slli a2, a1, 39
11052 ; RV64ZVE32F-NEXT: bgez a2, .LBB92_37
11053 ; RV64ZVE32F-NEXT: .LBB92_61: # %cond.store47
11054 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
11055 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
11056 ; RV64ZVE32F-NEXT: add a2, a0, a2
11057 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 24
11058 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
11059 ; RV64ZVE32F-NEXT: vse8.v v12, (a2)
11060 ; RV64ZVE32F-NEXT: slli a2, a1, 38
11061 ; RV64ZVE32F-NEXT: bltz a2, .LBB92_38
11062 ; RV64ZVE32F-NEXT: j .LBB92_39
11063 ; RV64ZVE32F-NEXT: .LBB92_62: # %cond.store51
11064 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
11065 ; RV64ZVE32F-NEXT: add a2, a0, a2
11066 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
11067 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 26
11068 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
11069 ; RV64ZVE32F-NEXT: vse8.v v12, (a2)
11070 ; RV64ZVE32F-NEXT: slli a2, a1, 36
11071 ; RV64ZVE32F-NEXT: bgez a2, .LBB92_41
11072 ; RV64ZVE32F-NEXT: .LBB92_63: # %cond.store53
11073 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
11074 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
11075 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
11076 ; RV64ZVE32F-NEXT: add a2, a0, a2
11077 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
11078 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 27
11079 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
11080 ; RV64ZVE32F-NEXT: vse8.v v12, (a2)
11081 ; RV64ZVE32F-NEXT: slli a2, a1, 35
11082 ; RV64ZVE32F-NEXT: bgez a2, .LBB92_42
11083 ; RV64ZVE32F-NEXT: .LBB92_64: # %cond.store55
11084 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
11085 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
11086 ; RV64ZVE32F-NEXT: add a2, a0, a2
11087 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 28
11088 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
11089 ; RV64ZVE32F-NEXT: vse8.v v12, (a2)
11090 ; RV64ZVE32F-NEXT: slli a2, a1, 34
11091 ; RV64ZVE32F-NEXT: bltz a2, .LBB92_43
11092 ; RV64ZVE32F-NEXT: j .LBB92_44
11093 %ptrs = getelementptr inbounds i8, ptr %base, <32 x i8> %idxs
11094 call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> %val, <32 x ptr> %ptrs, i32 1, <32 x i1> %m)
11098 define void @mscatter_unit_stride(<8 x i16> %val, ptr %base) {
11099 ; CHECK-LABEL: mscatter_unit_stride:
11101 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
11102 ; CHECK-NEXT: vse16.v v8, (a0)
11104 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>
11105 call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> splat (i1 true))
11109 define void @mscatter_unit_stride_with_offset(<8 x i16> %val, ptr %base) {
11110 ; CHECK-LABEL: mscatter_unit_stride_with_offset:
11112 ; CHECK-NEXT: addi a0, a0, 10
11113 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
11114 ; CHECK-NEXT: vse16.v v8, (a0)
11116 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> <i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12>
11117 call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> splat (i1 true))
11121 define void @mscatter_shuffle_reverse(<8 x i16> %val, ptr %base) {
11122 ; CHECK-LABEL: mscatter_shuffle_reverse:
11124 ; CHECK-NEXT: addi a0, a0, 14
11125 ; CHECK-NEXT: li a1, -2
11126 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
11127 ; CHECK-NEXT: vsse16.v v8, (a0), a1
11129 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> <i64 7, i64 6, i64 5, i64 4, i64 3, i64 2, i64 1, i64 0>
11130 call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> splat (i1 true))
11134 define void @mscatter_shuffle_rotate(<8 x i16> %val, ptr %base) {
11135 ; RV32-LABEL: mscatter_shuffle_rotate:
11137 ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
11138 ; RV32-NEXT: vslidedown.vi v9, v8, 4
11139 ; RV32-NEXT: vslideup.vi v9, v8, 4
11140 ; RV32-NEXT: vse16.v v9, (a0)
11143 ; RV64-LABEL: mscatter_shuffle_rotate:
11145 ; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
11146 ; RV64-NEXT: vslidedown.vi v9, v8, 4
11147 ; RV64-NEXT: vslideup.vi v9, v8, 4
11148 ; RV64-NEXT: vse16.v v9, (a0)
11151 ; RV64ZVE32F-LABEL: mscatter_shuffle_rotate:
11152 ; RV64ZVE32F: # %bb.0:
11153 ; RV64ZVE32F-NEXT: addi a1, a0, 6
11154 ; RV64ZVE32F-NEXT: addi a2, a0, 4
11155 ; RV64ZVE32F-NEXT: addi a3, a0, 2
11156 ; RV64ZVE32F-NEXT: addi a4, a0, 14
11157 ; RV64ZVE32F-NEXT: addi a5, a0, 12
11158 ; RV64ZVE32F-NEXT: addi a6, a0, 10
11159 ; RV64ZVE32F-NEXT: addi a7, a0, 8
11160 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
11161 ; RV64ZVE32F-NEXT: vse16.v v8, (a7)
11162 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
11163 ; RV64ZVE32F-NEXT: vse16.v v9, (a6)
11164 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
11165 ; RV64ZVE32F-NEXT: vse16.v v9, (a5)
11166 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 3
11167 ; RV64ZVE32F-NEXT: vse16.v v9, (a4)
11168 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
11169 ; RV64ZVE32F-NEXT: vse16.v v9, (a0)
11170 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5
11171 ; RV64ZVE32F-NEXT: vse16.v v9, (a3)
11172 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 6
11173 ; RV64ZVE32F-NEXT: vse16.v v9, (a2)
11174 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
11175 ; RV64ZVE32F-NEXT: vse16.v v8, (a1)
11176 ; RV64ZVE32F-NEXT: ret
11177 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> <i64 4, i64 5, i64 6, i64 7, i64 0, i64 1, i64 2, i64 3>
11178 call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> splat (i1 true))