1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=ilp32d \
3 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32V
4 ; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=lp64d \
5 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
6 ; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfh,+zve32f,+zvl128b -target-abi=ilp32d \
7 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32ZVE32F
8 ; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfh,+zve32f,+zvl128b -target-abi=lp64d \
9 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64ZVE32F
11 declare void @llvm.masked.scatter.v1i8.v1p0(<1 x i8>, <1 x ptr>, i32, <1 x i1>)
13 define void @mscatter_v1i8(<1 x i8> %val, <1 x ptr> %ptrs, <1 x i1> %m) {
14 ; RV32V-LABEL: mscatter_v1i8:
16 ; RV32V-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
17 ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t
20 ; RV64-LABEL: mscatter_v1i8:
22 ; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
23 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
26 ; RV32ZVE32F-LABEL: mscatter_v1i8:
27 ; RV32ZVE32F: # %bb.0:
28 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
29 ; RV32ZVE32F-NEXT: vsoxei32.v v8, (zero), v9, v0.t
30 ; RV32ZVE32F-NEXT: ret
32 ; RV64ZVE32F-LABEL: mscatter_v1i8:
33 ; RV64ZVE32F: # %bb.0:
34 ; RV64ZVE32F-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
35 ; RV64ZVE32F-NEXT: vfirst.m a1, v0
36 ; RV64ZVE32F-NEXT: bnez a1, .LBB0_2
37 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
38 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
39 ; RV64ZVE32F-NEXT: vse8.v v8, (a0)
40 ; RV64ZVE32F-NEXT: .LBB0_2: # %else
41 ; RV64ZVE32F-NEXT: ret
42 call void @llvm.masked.scatter.v1i8.v1p0(<1 x i8> %val, <1 x ptr> %ptrs, i32 1, <1 x i1> %m)
46 declare void @llvm.masked.scatter.v2i8.v2p0(<2 x i8>, <2 x ptr>, i32, <2 x i1>)
48 define void @mscatter_v2i8(<2 x i8> %val, <2 x ptr> %ptrs, <2 x i1> %m) {
49 ; RV32V-LABEL: mscatter_v2i8:
51 ; RV32V-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
52 ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t
55 ; RV64-LABEL: mscatter_v2i8:
57 ; RV64-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
58 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
61 ; RV32ZVE32F-LABEL: mscatter_v2i8:
62 ; RV32ZVE32F: # %bb.0:
63 ; RV32ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
64 ; RV32ZVE32F-NEXT: vsoxei32.v v8, (zero), v9, v0.t
65 ; RV32ZVE32F-NEXT: ret
67 ; RV64ZVE32F-LABEL: mscatter_v2i8:
68 ; RV64ZVE32F: # %bb.0:
69 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
70 ; RV64ZVE32F-NEXT: vmv.x.s a2, v0
71 ; RV64ZVE32F-NEXT: andi a3, a2, 1
72 ; RV64ZVE32F-NEXT: bnez a3, .LBB1_3
73 ; RV64ZVE32F-NEXT: # %bb.1: # %else
74 ; RV64ZVE32F-NEXT: andi a2, a2, 2
75 ; RV64ZVE32F-NEXT: bnez a2, .LBB1_4
76 ; RV64ZVE32F-NEXT: .LBB1_2: # %else2
77 ; RV64ZVE32F-NEXT: ret
78 ; RV64ZVE32F-NEXT: .LBB1_3: # %cond.store
79 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
80 ; RV64ZVE32F-NEXT: vse8.v v8, (a0)
81 ; RV64ZVE32F-NEXT: andi a2, a2, 2
82 ; RV64ZVE32F-NEXT: beqz a2, .LBB1_2
83 ; RV64ZVE32F-NEXT: .LBB1_4: # %cond.store1
84 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
85 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
86 ; RV64ZVE32F-NEXT: vse8.v v8, (a1)
87 ; RV64ZVE32F-NEXT: ret
88 call void @llvm.masked.scatter.v2i8.v2p0(<2 x i8> %val, <2 x ptr> %ptrs, i32 1, <2 x i1> %m)
92 define void @mscatter_v2i16_truncstore_v2i8(<2 x i16> %val, <2 x ptr> %ptrs, <2 x i1> %m) {
93 ; RV32V-LABEL: mscatter_v2i16_truncstore_v2i8:
95 ; RV32V-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
96 ; RV32V-NEXT: vnsrl.wi v8, v8, 0
97 ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t
100 ; RV64-LABEL: mscatter_v2i16_truncstore_v2i8:
102 ; RV64-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
103 ; RV64-NEXT: vnsrl.wi v8, v8, 0
104 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
107 ; RV32ZVE32F-LABEL: mscatter_v2i16_truncstore_v2i8:
108 ; RV32ZVE32F: # %bb.0:
109 ; RV32ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
110 ; RV32ZVE32F-NEXT: vnsrl.wi v8, v8, 0
111 ; RV32ZVE32F-NEXT: vsoxei32.v v8, (zero), v9, v0.t
112 ; RV32ZVE32F-NEXT: ret
114 ; RV64ZVE32F-LABEL: mscatter_v2i16_truncstore_v2i8:
115 ; RV64ZVE32F: # %bb.0:
116 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
117 ; RV64ZVE32F-NEXT: vmv.x.s a2, v0
118 ; RV64ZVE32F-NEXT: andi a3, a2, 1
119 ; RV64ZVE32F-NEXT: vnsrl.wi v8, v8, 0
120 ; RV64ZVE32F-NEXT: bnez a3, .LBB2_3
121 ; RV64ZVE32F-NEXT: # %bb.1: # %else
122 ; RV64ZVE32F-NEXT: andi a2, a2, 2
123 ; RV64ZVE32F-NEXT: bnez a2, .LBB2_4
124 ; RV64ZVE32F-NEXT: .LBB2_2: # %else2
125 ; RV64ZVE32F-NEXT: ret
126 ; RV64ZVE32F-NEXT: .LBB2_3: # %cond.store
127 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
128 ; RV64ZVE32F-NEXT: vse8.v v8, (a0)
129 ; RV64ZVE32F-NEXT: andi a2, a2, 2
130 ; RV64ZVE32F-NEXT: beqz a2, .LBB2_2
131 ; RV64ZVE32F-NEXT: .LBB2_4: # %cond.store1
132 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
133 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
134 ; RV64ZVE32F-NEXT: vse8.v v8, (a1)
135 ; RV64ZVE32F-NEXT: ret
136 %tval = trunc <2 x i16> %val to <2 x i8>
137 call void @llvm.masked.scatter.v2i8.v2p0(<2 x i8> %tval, <2 x ptr> %ptrs, i32 1, <2 x i1> %m)
141 define void @mscatter_v2i32_truncstore_v2i8(<2 x i32> %val, <2 x ptr> %ptrs, <2 x i1> %m) {
142 ; RV32V-LABEL: mscatter_v2i32_truncstore_v2i8:
144 ; RV32V-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
145 ; RV32V-NEXT: vnsrl.wi v8, v8, 0
146 ; RV32V-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
147 ; RV32V-NEXT: vnsrl.wi v8, v8, 0
148 ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t
151 ; RV64-LABEL: mscatter_v2i32_truncstore_v2i8:
153 ; RV64-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
154 ; RV64-NEXT: vnsrl.wi v8, v8, 0
155 ; RV64-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
156 ; RV64-NEXT: vnsrl.wi v8, v8, 0
157 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
160 ; RV32ZVE32F-LABEL: mscatter_v2i32_truncstore_v2i8:
161 ; RV32ZVE32F: # %bb.0:
162 ; RV32ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
163 ; RV32ZVE32F-NEXT: vnsrl.wi v8, v8, 0
164 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
165 ; RV32ZVE32F-NEXT: vnsrl.wi v8, v8, 0
166 ; RV32ZVE32F-NEXT: vsoxei32.v v8, (zero), v9, v0.t
167 ; RV32ZVE32F-NEXT: ret
169 ; RV64ZVE32F-LABEL: mscatter_v2i32_truncstore_v2i8:
170 ; RV64ZVE32F: # %bb.0:
171 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
172 ; RV64ZVE32F-NEXT: vnsrl.wi v8, v8, 0
173 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
174 ; RV64ZVE32F-NEXT: vmv.x.s a2, v0
175 ; RV64ZVE32F-NEXT: andi a3, a2, 1
176 ; RV64ZVE32F-NEXT: vnsrl.wi v8, v8, 0
177 ; RV64ZVE32F-NEXT: bnez a3, .LBB3_3
178 ; RV64ZVE32F-NEXT: # %bb.1: # %else
179 ; RV64ZVE32F-NEXT: andi a2, a2, 2
180 ; RV64ZVE32F-NEXT: bnez a2, .LBB3_4
181 ; RV64ZVE32F-NEXT: .LBB3_2: # %else2
182 ; RV64ZVE32F-NEXT: ret
183 ; RV64ZVE32F-NEXT: .LBB3_3: # %cond.store
184 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
185 ; RV64ZVE32F-NEXT: vse8.v v8, (a0)
186 ; RV64ZVE32F-NEXT: andi a2, a2, 2
187 ; RV64ZVE32F-NEXT: beqz a2, .LBB3_2
188 ; RV64ZVE32F-NEXT: .LBB3_4: # %cond.store1
189 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
190 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
191 ; RV64ZVE32F-NEXT: vse8.v v8, (a1)
192 ; RV64ZVE32F-NEXT: ret
193 %tval = trunc <2 x i32> %val to <2 x i8>
194 call void @llvm.masked.scatter.v2i8.v2p0(<2 x i8> %tval, <2 x ptr> %ptrs, i32 1, <2 x i1> %m)
198 define void @mscatter_v2i64_truncstore_v2i8(<2 x i64> %val, <2 x ptr> %ptrs, <2 x i1> %m) {
199 ; RV32V-LABEL: mscatter_v2i64_truncstore_v2i8:
201 ; RV32V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
202 ; RV32V-NEXT: vnsrl.wi v8, v8, 0
203 ; RV32V-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
204 ; RV32V-NEXT: vnsrl.wi v8, v8, 0
205 ; RV32V-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
206 ; RV32V-NEXT: vnsrl.wi v8, v8, 0
207 ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t
210 ; RV64-LABEL: mscatter_v2i64_truncstore_v2i8:
212 ; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
213 ; RV64-NEXT: vnsrl.wi v8, v8, 0
214 ; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
215 ; RV64-NEXT: vnsrl.wi v8, v8, 0
216 ; RV64-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
217 ; RV64-NEXT: vnsrl.wi v8, v8, 0
218 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
221 ; RV32ZVE32F-LABEL: mscatter_v2i64_truncstore_v2i8:
222 ; RV32ZVE32F: # %bb.0:
223 ; RV32ZVE32F-NEXT: lw a1, 8(a0)
224 ; RV32ZVE32F-NEXT: lw a0, 0(a0)
225 ; RV32ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
226 ; RV32ZVE32F-NEXT: vmv.s.x v9, a1
227 ; RV32ZVE32F-NEXT: vmv.s.x v10, a0
228 ; RV32ZVE32F-NEXT: vslideup.vi v10, v9, 1
229 ; RV32ZVE32F-NEXT: vsoxei32.v v10, (zero), v8, v0.t
230 ; RV32ZVE32F-NEXT: ret
232 ; RV64ZVE32F-LABEL: mscatter_v2i64_truncstore_v2i8:
233 ; RV64ZVE32F: # %bb.0:
234 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
235 ; RV64ZVE32F-NEXT: vmv.s.x v9, a1
236 ; RV64ZVE32F-NEXT: vmv.s.x v8, a0
237 ; RV64ZVE32F-NEXT: vmv.x.s a0, v0
238 ; RV64ZVE32F-NEXT: andi a1, a0, 1
239 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
240 ; RV64ZVE32F-NEXT: bnez a1, .LBB4_3
241 ; RV64ZVE32F-NEXT: # %bb.1: # %else
242 ; RV64ZVE32F-NEXT: andi a0, a0, 2
243 ; RV64ZVE32F-NEXT: bnez a0, .LBB4_4
244 ; RV64ZVE32F-NEXT: .LBB4_2: # %else2
245 ; RV64ZVE32F-NEXT: ret
246 ; RV64ZVE32F-NEXT: .LBB4_3: # %cond.store
247 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
248 ; RV64ZVE32F-NEXT: vse8.v v8, (a2)
249 ; RV64ZVE32F-NEXT: andi a0, a0, 2
250 ; RV64ZVE32F-NEXT: beqz a0, .LBB4_2
251 ; RV64ZVE32F-NEXT: .LBB4_4: # %cond.store1
252 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
253 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
254 ; RV64ZVE32F-NEXT: vse8.v v8, (a3)
255 ; RV64ZVE32F-NEXT: ret
256 %tval = trunc <2 x i64> %val to <2 x i8>
257 call void @llvm.masked.scatter.v2i8.v2p0(<2 x i8> %tval, <2 x ptr> %ptrs, i32 1, <2 x i1> %m)
261 declare void @llvm.masked.scatter.v4i8.v4p0(<4 x i8>, <4 x ptr>, i32, <4 x i1>)
263 define void @mscatter_v4i8(<4 x i8> %val, <4 x ptr> %ptrs, <4 x i1> %m) {
264 ; RV32-LABEL: mscatter_v4i8:
266 ; RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
267 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
270 ; RV64-LABEL: mscatter_v4i8:
272 ; RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
273 ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
276 ; RV64ZVE32F-LABEL: mscatter_v4i8:
277 ; RV64ZVE32F: # %bb.0:
278 ; RV64ZVE32F-NEXT: ld a1, 24(a0)
279 ; RV64ZVE32F-NEXT: ld a2, 16(a0)
280 ; RV64ZVE32F-NEXT: ld a4, 8(a0)
281 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
282 ; RV64ZVE32F-NEXT: vmv.x.s a3, v0
283 ; RV64ZVE32F-NEXT: andi a5, a3, 1
284 ; RV64ZVE32F-NEXT: bnez a5, .LBB5_5
285 ; RV64ZVE32F-NEXT: # %bb.1: # %else
286 ; RV64ZVE32F-NEXT: andi a0, a3, 2
287 ; RV64ZVE32F-NEXT: bnez a0, .LBB5_6
288 ; RV64ZVE32F-NEXT: .LBB5_2: # %else2
289 ; RV64ZVE32F-NEXT: andi a0, a3, 4
290 ; RV64ZVE32F-NEXT: bnez a0, .LBB5_7
291 ; RV64ZVE32F-NEXT: .LBB5_3: # %else4
292 ; RV64ZVE32F-NEXT: andi a3, a3, 8
293 ; RV64ZVE32F-NEXT: bnez a3, .LBB5_8
294 ; RV64ZVE32F-NEXT: .LBB5_4: # %else6
295 ; RV64ZVE32F-NEXT: ret
296 ; RV64ZVE32F-NEXT: .LBB5_5: # %cond.store
297 ; RV64ZVE32F-NEXT: ld a0, 0(a0)
298 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
299 ; RV64ZVE32F-NEXT: vse8.v v8, (a0)
300 ; RV64ZVE32F-NEXT: andi a0, a3, 2
301 ; RV64ZVE32F-NEXT: beqz a0, .LBB5_2
302 ; RV64ZVE32F-NEXT: .LBB5_6: # %cond.store1
303 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
304 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
305 ; RV64ZVE32F-NEXT: vse8.v v9, (a4)
306 ; RV64ZVE32F-NEXT: andi a0, a3, 4
307 ; RV64ZVE32F-NEXT: beqz a0, .LBB5_3
308 ; RV64ZVE32F-NEXT: .LBB5_7: # %cond.store3
309 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
310 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
311 ; RV64ZVE32F-NEXT: vse8.v v9, (a2)
312 ; RV64ZVE32F-NEXT: andi a3, a3, 8
313 ; RV64ZVE32F-NEXT: beqz a3, .LBB5_4
314 ; RV64ZVE32F-NEXT: .LBB5_8: # %cond.store5
315 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
316 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 3
317 ; RV64ZVE32F-NEXT: vse8.v v8, (a1)
318 ; RV64ZVE32F-NEXT: ret
319 call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> %m)
323 define void @mscatter_truemask_v4i8(<4 x i8> %val, <4 x ptr> %ptrs) {
324 ; RV32-LABEL: mscatter_truemask_v4i8:
326 ; RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
327 ; RV32-NEXT: vsoxei32.v v8, (zero), v9
330 ; RV64-LABEL: mscatter_truemask_v4i8:
332 ; RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
333 ; RV64-NEXT: vsoxei64.v v8, (zero), v10
336 ; RV64ZVE32F-LABEL: mscatter_truemask_v4i8:
337 ; RV64ZVE32F: # %bb.0:
338 ; RV64ZVE32F-NEXT: ld a1, 24(a0)
339 ; RV64ZVE32F-NEXT: ld a2, 16(a0)
340 ; RV64ZVE32F-NEXT: ld a4, 8(a0)
341 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
342 ; RV64ZVE32F-NEXT: vmset.m v9
343 ; RV64ZVE32F-NEXT: vmv.x.s a3, v9
344 ; RV64ZVE32F-NEXT: beqz zero, .LBB6_5
345 ; RV64ZVE32F-NEXT: # %bb.1: # %else
346 ; RV64ZVE32F-NEXT: andi a0, a3, 2
347 ; RV64ZVE32F-NEXT: bnez a0, .LBB6_6
348 ; RV64ZVE32F-NEXT: .LBB6_2: # %else2
349 ; RV64ZVE32F-NEXT: andi a0, a3, 4
350 ; RV64ZVE32F-NEXT: bnez a0, .LBB6_7
351 ; RV64ZVE32F-NEXT: .LBB6_3: # %else4
352 ; RV64ZVE32F-NEXT: andi a3, a3, 8
353 ; RV64ZVE32F-NEXT: bnez a3, .LBB6_8
354 ; RV64ZVE32F-NEXT: .LBB6_4: # %else6
355 ; RV64ZVE32F-NEXT: ret
356 ; RV64ZVE32F-NEXT: .LBB6_5: # %cond.store
357 ; RV64ZVE32F-NEXT: ld a0, 0(a0)
358 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
359 ; RV64ZVE32F-NEXT: vse8.v v8, (a0)
360 ; RV64ZVE32F-NEXT: andi a0, a3, 2
361 ; RV64ZVE32F-NEXT: beqz a0, .LBB6_2
362 ; RV64ZVE32F-NEXT: .LBB6_6: # %cond.store1
363 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
364 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
365 ; RV64ZVE32F-NEXT: vse8.v v9, (a4)
366 ; RV64ZVE32F-NEXT: andi a0, a3, 4
367 ; RV64ZVE32F-NEXT: beqz a0, .LBB6_3
368 ; RV64ZVE32F-NEXT: .LBB6_7: # %cond.store3
369 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
370 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
371 ; RV64ZVE32F-NEXT: vse8.v v9, (a2)
372 ; RV64ZVE32F-NEXT: andi a3, a3, 8
373 ; RV64ZVE32F-NEXT: beqz a3, .LBB6_4
374 ; RV64ZVE32F-NEXT: .LBB6_8: # %cond.store5
375 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
376 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 3
377 ; RV64ZVE32F-NEXT: vse8.v v8, (a1)
378 ; RV64ZVE32F-NEXT: ret
379 %mhead = insertelement <4 x i1> poison, i1 1, i32 0
380 %mtrue = shufflevector <4 x i1> %mhead, <4 x i1> poison, <4 x i32> zeroinitializer
381 call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> %mtrue)
385 define void @mscatter_falsemask_v4i8(<4 x i8> %val, <4 x ptr> %ptrs) {
386 ; CHECK-LABEL: mscatter_falsemask_v4i8:
389 call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> zeroinitializer)
393 declare void @llvm.masked.scatter.v8i8.v8p0(<8 x i8>, <8 x ptr>, i32, <8 x i1>)
395 define void @mscatter_v8i8(<8 x i8> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
396 ; RV32-LABEL: mscatter_v8i8:
398 ; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
399 ; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t
402 ; RV64-LABEL: mscatter_v8i8:
404 ; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
405 ; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t
408 ; RV64ZVE32F-LABEL: mscatter_v8i8:
409 ; RV64ZVE32F: # %bb.0:
410 ; RV64ZVE32F-NEXT: ld a1, 56(a0)
411 ; RV64ZVE32F-NEXT: ld a2, 48(a0)
412 ; RV64ZVE32F-NEXT: ld a4, 40(a0)
413 ; RV64ZVE32F-NEXT: ld a5, 32(a0)
414 ; RV64ZVE32F-NEXT: ld a6, 24(a0)
415 ; RV64ZVE32F-NEXT: ld a7, 16(a0)
416 ; RV64ZVE32F-NEXT: ld t0, 8(a0)
417 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
418 ; RV64ZVE32F-NEXT: vmv.x.s a3, v0
419 ; RV64ZVE32F-NEXT: andi t1, a3, 1
420 ; RV64ZVE32F-NEXT: bnez t1, .LBB8_9
421 ; RV64ZVE32F-NEXT: # %bb.1: # %else
422 ; RV64ZVE32F-NEXT: andi a0, a3, 2
423 ; RV64ZVE32F-NEXT: bnez a0, .LBB8_10
424 ; RV64ZVE32F-NEXT: .LBB8_2: # %else2
425 ; RV64ZVE32F-NEXT: andi a0, a3, 4
426 ; RV64ZVE32F-NEXT: bnez a0, .LBB8_11
427 ; RV64ZVE32F-NEXT: .LBB8_3: # %else4
428 ; RV64ZVE32F-NEXT: andi a0, a3, 8
429 ; RV64ZVE32F-NEXT: bnez a0, .LBB8_12
430 ; RV64ZVE32F-NEXT: .LBB8_4: # %else6
431 ; RV64ZVE32F-NEXT: andi a0, a3, 16
432 ; RV64ZVE32F-NEXT: bnez a0, .LBB8_13
433 ; RV64ZVE32F-NEXT: .LBB8_5: # %else8
434 ; RV64ZVE32F-NEXT: andi a0, a3, 32
435 ; RV64ZVE32F-NEXT: bnez a0, .LBB8_14
436 ; RV64ZVE32F-NEXT: .LBB8_6: # %else10
437 ; RV64ZVE32F-NEXT: andi a0, a3, 64
438 ; RV64ZVE32F-NEXT: bnez a0, .LBB8_15
439 ; RV64ZVE32F-NEXT: .LBB8_7: # %else12
440 ; RV64ZVE32F-NEXT: andi a0, a3, -128
441 ; RV64ZVE32F-NEXT: bnez a0, .LBB8_16
442 ; RV64ZVE32F-NEXT: .LBB8_8: # %else14
443 ; RV64ZVE32F-NEXT: ret
444 ; RV64ZVE32F-NEXT: .LBB8_9: # %cond.store
445 ; RV64ZVE32F-NEXT: ld a0, 0(a0)
446 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
447 ; RV64ZVE32F-NEXT: vse8.v v8, (a0)
448 ; RV64ZVE32F-NEXT: andi a0, a3, 2
449 ; RV64ZVE32F-NEXT: beqz a0, .LBB8_2
450 ; RV64ZVE32F-NEXT: .LBB8_10: # %cond.store1
451 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
452 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
453 ; RV64ZVE32F-NEXT: vse8.v v9, (t0)
454 ; RV64ZVE32F-NEXT: andi a0, a3, 4
455 ; RV64ZVE32F-NEXT: beqz a0, .LBB8_3
456 ; RV64ZVE32F-NEXT: .LBB8_11: # %cond.store3
457 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
458 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
459 ; RV64ZVE32F-NEXT: vse8.v v9, (a7)
460 ; RV64ZVE32F-NEXT: andi a0, a3, 8
461 ; RV64ZVE32F-NEXT: beqz a0, .LBB8_4
462 ; RV64ZVE32F-NEXT: .LBB8_12: # %cond.store5
463 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
464 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 3
465 ; RV64ZVE32F-NEXT: vse8.v v9, (a6)
466 ; RV64ZVE32F-NEXT: andi a0, a3, 16
467 ; RV64ZVE32F-NEXT: beqz a0, .LBB8_5
468 ; RV64ZVE32F-NEXT: .LBB8_13: # %cond.store7
469 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
470 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
471 ; RV64ZVE32F-NEXT: vse8.v v9, (a5)
472 ; RV64ZVE32F-NEXT: andi a0, a3, 32
473 ; RV64ZVE32F-NEXT: beqz a0, .LBB8_6
474 ; RV64ZVE32F-NEXT: .LBB8_14: # %cond.store9
475 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
476 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5
477 ; RV64ZVE32F-NEXT: vse8.v v9, (a4)
478 ; RV64ZVE32F-NEXT: andi a0, a3, 64
479 ; RV64ZVE32F-NEXT: beqz a0, .LBB8_7
480 ; RV64ZVE32F-NEXT: .LBB8_15: # %cond.store11
481 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
482 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 6
483 ; RV64ZVE32F-NEXT: vse8.v v9, (a2)
484 ; RV64ZVE32F-NEXT: andi a0, a3, -128
485 ; RV64ZVE32F-NEXT: beqz a0, .LBB8_8
486 ; RV64ZVE32F-NEXT: .LBB8_16: # %cond.store13
487 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
488 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
489 ; RV64ZVE32F-NEXT: vse8.v v8, (a1)
490 ; RV64ZVE32F-NEXT: ret
491 call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> %val, <8 x ptr> %ptrs, i32 1, <8 x i1> %m)
495 define void @mscatter_baseidx_v8i8(<8 x i8> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) {
496 ; RV32-LABEL: mscatter_baseidx_v8i8:
498 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
499 ; RV32-NEXT: vsext.vf4 v10, v9
500 ; RV32-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
501 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
504 ; RV64-LABEL: mscatter_baseidx_v8i8:
506 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
507 ; RV64-NEXT: vsext.vf8 v12, v9
508 ; RV64-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
509 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
512 ; RV64ZVE32F-LABEL: mscatter_baseidx_v8i8:
513 ; RV64ZVE32F: # %bb.0:
514 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
515 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
516 ; RV64ZVE32F-NEXT: andi a2, a1, 1
517 ; RV64ZVE32F-NEXT: beqz a2, .LBB9_2
518 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
519 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
520 ; RV64ZVE32F-NEXT: add a2, a0, a2
521 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
522 ; RV64ZVE32F-NEXT: vse8.v v8, (a2)
523 ; RV64ZVE32F-NEXT: .LBB9_2: # %else
524 ; RV64ZVE32F-NEXT: andi a2, a1, 2
525 ; RV64ZVE32F-NEXT: beqz a2, .LBB9_4
526 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
527 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
528 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1
529 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
530 ; RV64ZVE32F-NEXT: add a2, a0, a2
531 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
532 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
533 ; RV64ZVE32F-NEXT: vse8.v v10, (a2)
534 ; RV64ZVE32F-NEXT: .LBB9_4: # %else2
535 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
536 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4
537 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
538 ; RV64ZVE32F-NEXT: andi a2, a1, 4
539 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2
540 ; RV64ZVE32F-NEXT: bnez a2, .LBB9_12
541 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
542 ; RV64ZVE32F-NEXT: andi a2, a1, 8
543 ; RV64ZVE32F-NEXT: bnez a2, .LBB9_13
544 ; RV64ZVE32F-NEXT: .LBB9_6: # %else6
545 ; RV64ZVE32F-NEXT: andi a2, a1, 16
546 ; RV64ZVE32F-NEXT: bnez a2, .LBB9_14
547 ; RV64ZVE32F-NEXT: .LBB9_7: # %else8
548 ; RV64ZVE32F-NEXT: andi a2, a1, 32
549 ; RV64ZVE32F-NEXT: beqz a2, .LBB9_9
550 ; RV64ZVE32F-NEXT: .LBB9_8: # %cond.store9
551 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
552 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 1
553 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
554 ; RV64ZVE32F-NEXT: add a2, a0, a2
555 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
556 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5
557 ; RV64ZVE32F-NEXT: vse8.v v9, (a2)
558 ; RV64ZVE32F-NEXT: .LBB9_9: # %else10
559 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
560 ; RV64ZVE32F-NEXT: andi a2, a1, 64
561 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 2
562 ; RV64ZVE32F-NEXT: bnez a2, .LBB9_15
563 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
564 ; RV64ZVE32F-NEXT: andi a1, a1, -128
565 ; RV64ZVE32F-NEXT: bnez a1, .LBB9_16
566 ; RV64ZVE32F-NEXT: .LBB9_11: # %else14
567 ; RV64ZVE32F-NEXT: ret
568 ; RV64ZVE32F-NEXT: .LBB9_12: # %cond.store3
569 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
570 ; RV64ZVE32F-NEXT: add a2, a0, a2
571 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
572 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2
573 ; RV64ZVE32F-NEXT: vse8.v v11, (a2)
574 ; RV64ZVE32F-NEXT: andi a2, a1, 8
575 ; RV64ZVE32F-NEXT: beqz a2, .LBB9_6
576 ; RV64ZVE32F-NEXT: .LBB9_13: # %cond.store5
577 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
578 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
579 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
580 ; RV64ZVE32F-NEXT: add a2, a0, a2
581 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
582 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 3
583 ; RV64ZVE32F-NEXT: vse8.v v9, (a2)
584 ; RV64ZVE32F-NEXT: andi a2, a1, 16
585 ; RV64ZVE32F-NEXT: beqz a2, .LBB9_7
586 ; RV64ZVE32F-NEXT: .LBB9_14: # %cond.store7
587 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
588 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
589 ; RV64ZVE32F-NEXT: add a2, a0, a2
590 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
591 ; RV64ZVE32F-NEXT: vse8.v v9, (a2)
592 ; RV64ZVE32F-NEXT: andi a2, a1, 32
593 ; RV64ZVE32F-NEXT: bnez a2, .LBB9_8
594 ; RV64ZVE32F-NEXT: j .LBB9_9
595 ; RV64ZVE32F-NEXT: .LBB9_15: # %cond.store11
596 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
597 ; RV64ZVE32F-NEXT: add a2, a0, a2
598 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
599 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 6
600 ; RV64ZVE32F-NEXT: vse8.v v10, (a2)
601 ; RV64ZVE32F-NEXT: andi a1, a1, -128
602 ; RV64ZVE32F-NEXT: beqz a1, .LBB9_11
603 ; RV64ZVE32F-NEXT: .LBB9_16: # %cond.store13
604 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
605 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
606 ; RV64ZVE32F-NEXT: vmv.x.s a1, v9
607 ; RV64ZVE32F-NEXT: add a0, a0, a1
608 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
609 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
610 ; RV64ZVE32F-NEXT: vse8.v v8, (a0)
611 ; RV64ZVE32F-NEXT: ret
612 %ptrs = getelementptr inbounds i8, ptr %base, <8 x i8> %idxs
613 call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> %val, <8 x ptr> %ptrs, i32 1, <8 x i1> %m)
617 declare void @llvm.masked.scatter.v1i16.v1p0(<1 x i16>, <1 x ptr>, i32, <1 x i1>)
619 define void @mscatter_v1i16(<1 x i16> %val, <1 x ptr> %ptrs, <1 x i1> %m) {
620 ; RV32V-LABEL: mscatter_v1i16:
622 ; RV32V-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
623 ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t
626 ; RV64-LABEL: mscatter_v1i16:
628 ; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
629 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
632 ; RV32ZVE32F-LABEL: mscatter_v1i16:
633 ; RV32ZVE32F: # %bb.0:
634 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
635 ; RV32ZVE32F-NEXT: vsoxei32.v v8, (zero), v9, v0.t
636 ; RV32ZVE32F-NEXT: ret
638 ; RV64ZVE32F-LABEL: mscatter_v1i16:
639 ; RV64ZVE32F: # %bb.0:
640 ; RV64ZVE32F-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
641 ; RV64ZVE32F-NEXT: vfirst.m a1, v0
642 ; RV64ZVE32F-NEXT: bnez a1, .LBB10_2
643 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
644 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
645 ; RV64ZVE32F-NEXT: vse16.v v8, (a0)
646 ; RV64ZVE32F-NEXT: .LBB10_2: # %else
647 ; RV64ZVE32F-NEXT: ret
648 call void @llvm.masked.scatter.v1i16.v1p0(<1 x i16> %val, <1 x ptr> %ptrs, i32 2, <1 x i1> %m)
652 declare void @llvm.masked.scatter.v2i16.v2p0(<2 x i16>, <2 x ptr>, i32, <2 x i1>)
654 define void @mscatter_v2i16(<2 x i16> %val, <2 x ptr> %ptrs, <2 x i1> %m) {
655 ; RV32V-LABEL: mscatter_v2i16:
657 ; RV32V-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
658 ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t
661 ; RV64-LABEL: mscatter_v2i16:
663 ; RV64-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
664 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
667 ; RV32ZVE32F-LABEL: mscatter_v2i16:
668 ; RV32ZVE32F: # %bb.0:
669 ; RV32ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
670 ; RV32ZVE32F-NEXT: vsoxei32.v v8, (zero), v9, v0.t
671 ; RV32ZVE32F-NEXT: ret
673 ; RV64ZVE32F-LABEL: mscatter_v2i16:
674 ; RV64ZVE32F: # %bb.0:
675 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
676 ; RV64ZVE32F-NEXT: vmv.x.s a2, v0
677 ; RV64ZVE32F-NEXT: andi a3, a2, 1
678 ; RV64ZVE32F-NEXT: bnez a3, .LBB11_3
679 ; RV64ZVE32F-NEXT: # %bb.1: # %else
680 ; RV64ZVE32F-NEXT: andi a2, a2, 2
681 ; RV64ZVE32F-NEXT: bnez a2, .LBB11_4
682 ; RV64ZVE32F-NEXT: .LBB11_2: # %else2
683 ; RV64ZVE32F-NEXT: ret
684 ; RV64ZVE32F-NEXT: .LBB11_3: # %cond.store
685 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
686 ; RV64ZVE32F-NEXT: vse16.v v8, (a0)
687 ; RV64ZVE32F-NEXT: andi a2, a2, 2
688 ; RV64ZVE32F-NEXT: beqz a2, .LBB11_2
689 ; RV64ZVE32F-NEXT: .LBB11_4: # %cond.store1
690 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
691 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
692 ; RV64ZVE32F-NEXT: vse16.v v8, (a1)
693 ; RV64ZVE32F-NEXT: ret
694 call void @llvm.masked.scatter.v2i16.v2p0(<2 x i16> %val, <2 x ptr> %ptrs, i32 2, <2 x i1> %m)
698 define void @mscatter_v2i32_truncstore_v2i16(<2 x i32> %val, <2 x ptr> %ptrs, <2 x i1> %m) {
699 ; RV32V-LABEL: mscatter_v2i32_truncstore_v2i16:
701 ; RV32V-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
702 ; RV32V-NEXT: vnsrl.wi v8, v8, 0
703 ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t
706 ; RV64-LABEL: mscatter_v2i32_truncstore_v2i16:
708 ; RV64-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
709 ; RV64-NEXT: vnsrl.wi v8, v8, 0
710 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
713 ; RV32ZVE32F-LABEL: mscatter_v2i32_truncstore_v2i16:
714 ; RV32ZVE32F: # %bb.0:
715 ; RV32ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
716 ; RV32ZVE32F-NEXT: vnsrl.wi v8, v8, 0
717 ; RV32ZVE32F-NEXT: vsoxei32.v v8, (zero), v9, v0.t
718 ; RV32ZVE32F-NEXT: ret
720 ; RV64ZVE32F-LABEL: mscatter_v2i32_truncstore_v2i16:
721 ; RV64ZVE32F: # %bb.0:
722 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
723 ; RV64ZVE32F-NEXT: vnsrl.wi v8, v8, 0
724 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
725 ; RV64ZVE32F-NEXT: vmv.x.s a2, v0
726 ; RV64ZVE32F-NEXT: andi a3, a2, 1
727 ; RV64ZVE32F-NEXT: bnez a3, .LBB12_3
728 ; RV64ZVE32F-NEXT: # %bb.1: # %else
729 ; RV64ZVE32F-NEXT: andi a2, a2, 2
730 ; RV64ZVE32F-NEXT: bnez a2, .LBB12_4
731 ; RV64ZVE32F-NEXT: .LBB12_2: # %else2
732 ; RV64ZVE32F-NEXT: ret
733 ; RV64ZVE32F-NEXT: .LBB12_3: # %cond.store
734 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
735 ; RV64ZVE32F-NEXT: vse16.v v8, (a0)
736 ; RV64ZVE32F-NEXT: andi a2, a2, 2
737 ; RV64ZVE32F-NEXT: beqz a2, .LBB12_2
738 ; RV64ZVE32F-NEXT: .LBB12_4: # %cond.store1
739 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
740 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
741 ; RV64ZVE32F-NEXT: vse16.v v8, (a1)
742 ; RV64ZVE32F-NEXT: ret
743 %tval = trunc <2 x i32> %val to <2 x i16>
744 call void @llvm.masked.scatter.v2i16.v2p0(<2 x i16> %tval, <2 x ptr> %ptrs, i32 2, <2 x i1> %m)
748 define void @mscatter_v2i64_truncstore_v2i16(<2 x i64> %val, <2 x ptr> %ptrs, <2 x i1> %m) {
749 ; RV32V-LABEL: mscatter_v2i64_truncstore_v2i16:
751 ; RV32V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
752 ; RV32V-NEXT: vnsrl.wi v8, v8, 0
753 ; RV32V-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
754 ; RV32V-NEXT: vnsrl.wi v8, v8, 0
755 ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t
758 ; RV64-LABEL: mscatter_v2i64_truncstore_v2i16:
760 ; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
761 ; RV64-NEXT: vnsrl.wi v8, v8, 0
762 ; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
763 ; RV64-NEXT: vnsrl.wi v8, v8, 0
764 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
767 ; RV32ZVE32F-LABEL: mscatter_v2i64_truncstore_v2i16:
768 ; RV32ZVE32F: # %bb.0:
769 ; RV32ZVE32F-NEXT: lw a1, 8(a0)
770 ; RV32ZVE32F-NEXT: lw a0, 0(a0)
771 ; RV32ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
772 ; RV32ZVE32F-NEXT: vmv.s.x v9, a1
773 ; RV32ZVE32F-NEXT: vmv.s.x v10, a0
774 ; RV32ZVE32F-NEXT: vslideup.vi v10, v9, 1
775 ; RV32ZVE32F-NEXT: vsoxei32.v v10, (zero), v8, v0.t
776 ; RV32ZVE32F-NEXT: ret
778 ; RV64ZVE32F-LABEL: mscatter_v2i64_truncstore_v2i16:
779 ; RV64ZVE32F: # %bb.0:
780 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
781 ; RV64ZVE32F-NEXT: vmv.s.x v9, a1
782 ; RV64ZVE32F-NEXT: vmv.s.x v8, a0
783 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
784 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
785 ; RV64ZVE32F-NEXT: vmv.x.s a0, v0
786 ; RV64ZVE32F-NEXT: andi a1, a0, 1
787 ; RV64ZVE32F-NEXT: bnez a1, .LBB13_3
788 ; RV64ZVE32F-NEXT: # %bb.1: # %else
789 ; RV64ZVE32F-NEXT: andi a0, a0, 2
790 ; RV64ZVE32F-NEXT: bnez a0, .LBB13_4
791 ; RV64ZVE32F-NEXT: .LBB13_2: # %else2
792 ; RV64ZVE32F-NEXT: ret
793 ; RV64ZVE32F-NEXT: .LBB13_3: # %cond.store
794 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
795 ; RV64ZVE32F-NEXT: vse16.v v8, (a2)
796 ; RV64ZVE32F-NEXT: andi a0, a0, 2
797 ; RV64ZVE32F-NEXT: beqz a0, .LBB13_2
798 ; RV64ZVE32F-NEXT: .LBB13_4: # %cond.store1
799 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
800 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
801 ; RV64ZVE32F-NEXT: vse16.v v8, (a3)
802 ; RV64ZVE32F-NEXT: ret
803 %tval = trunc <2 x i64> %val to <2 x i16>
804 call void @llvm.masked.scatter.v2i16.v2p0(<2 x i16> %tval, <2 x ptr> %ptrs, i32 2, <2 x i1> %m)
808 declare void @llvm.masked.scatter.v4i16.v4p0(<4 x i16>, <4 x ptr>, i32, <4 x i1>)
810 define void @mscatter_v4i16(<4 x i16> %val, <4 x ptr> %ptrs, <4 x i1> %m) {
811 ; RV32-LABEL: mscatter_v4i16:
813 ; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
814 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
817 ; RV64-LABEL: mscatter_v4i16:
819 ; RV64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
820 ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
823 ; RV64ZVE32F-LABEL: mscatter_v4i16:
824 ; RV64ZVE32F: # %bb.0:
825 ; RV64ZVE32F-NEXT: ld a1, 24(a0)
826 ; RV64ZVE32F-NEXT: ld a2, 16(a0)
827 ; RV64ZVE32F-NEXT: ld a4, 8(a0)
828 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
829 ; RV64ZVE32F-NEXT: vmv.x.s a3, v0
830 ; RV64ZVE32F-NEXT: andi a5, a3, 1
831 ; RV64ZVE32F-NEXT: bnez a5, .LBB14_5
832 ; RV64ZVE32F-NEXT: # %bb.1: # %else
833 ; RV64ZVE32F-NEXT: andi a0, a3, 2
834 ; RV64ZVE32F-NEXT: bnez a0, .LBB14_6
835 ; RV64ZVE32F-NEXT: .LBB14_2: # %else2
836 ; RV64ZVE32F-NEXT: andi a0, a3, 4
837 ; RV64ZVE32F-NEXT: bnez a0, .LBB14_7
838 ; RV64ZVE32F-NEXT: .LBB14_3: # %else4
839 ; RV64ZVE32F-NEXT: andi a3, a3, 8
840 ; RV64ZVE32F-NEXT: bnez a3, .LBB14_8
841 ; RV64ZVE32F-NEXT: .LBB14_4: # %else6
842 ; RV64ZVE32F-NEXT: ret
843 ; RV64ZVE32F-NEXT: .LBB14_5: # %cond.store
844 ; RV64ZVE32F-NEXT: ld a0, 0(a0)
845 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
846 ; RV64ZVE32F-NEXT: vse16.v v8, (a0)
847 ; RV64ZVE32F-NEXT: andi a0, a3, 2
848 ; RV64ZVE32F-NEXT: beqz a0, .LBB14_2
849 ; RV64ZVE32F-NEXT: .LBB14_6: # %cond.store1
850 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
851 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
852 ; RV64ZVE32F-NEXT: vse16.v v9, (a4)
853 ; RV64ZVE32F-NEXT: andi a0, a3, 4
854 ; RV64ZVE32F-NEXT: beqz a0, .LBB14_3
855 ; RV64ZVE32F-NEXT: .LBB14_7: # %cond.store3
856 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
857 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
858 ; RV64ZVE32F-NEXT: vse16.v v9, (a2)
859 ; RV64ZVE32F-NEXT: andi a3, a3, 8
860 ; RV64ZVE32F-NEXT: beqz a3, .LBB14_4
861 ; RV64ZVE32F-NEXT: .LBB14_8: # %cond.store5
862 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
863 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 3
864 ; RV64ZVE32F-NEXT: vse16.v v8, (a1)
865 ; RV64ZVE32F-NEXT: ret
866 call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> %val, <4 x ptr> %ptrs, i32 2, <4 x i1> %m)
870 define void @mscatter_truemask_v4i16(<4 x i16> %val, <4 x ptr> %ptrs) {
871 ; RV32-LABEL: mscatter_truemask_v4i16:
873 ; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
874 ; RV32-NEXT: vsoxei32.v v8, (zero), v9
877 ; RV64-LABEL: mscatter_truemask_v4i16:
879 ; RV64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
880 ; RV64-NEXT: vsoxei64.v v8, (zero), v10
883 ; RV64ZVE32F-LABEL: mscatter_truemask_v4i16:
884 ; RV64ZVE32F: # %bb.0:
885 ; RV64ZVE32F-NEXT: ld a1, 24(a0)
886 ; RV64ZVE32F-NEXT: ld a2, 16(a0)
887 ; RV64ZVE32F-NEXT: ld a4, 8(a0)
888 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
889 ; RV64ZVE32F-NEXT: vmset.m v9
890 ; RV64ZVE32F-NEXT: vmv.x.s a3, v9
891 ; RV64ZVE32F-NEXT: beqz zero, .LBB15_5
892 ; RV64ZVE32F-NEXT: # %bb.1: # %else
893 ; RV64ZVE32F-NEXT: andi a0, a3, 2
894 ; RV64ZVE32F-NEXT: bnez a0, .LBB15_6
895 ; RV64ZVE32F-NEXT: .LBB15_2: # %else2
896 ; RV64ZVE32F-NEXT: andi a0, a3, 4
897 ; RV64ZVE32F-NEXT: bnez a0, .LBB15_7
898 ; RV64ZVE32F-NEXT: .LBB15_3: # %else4
899 ; RV64ZVE32F-NEXT: andi a3, a3, 8
900 ; RV64ZVE32F-NEXT: bnez a3, .LBB15_8
901 ; RV64ZVE32F-NEXT: .LBB15_4: # %else6
902 ; RV64ZVE32F-NEXT: ret
903 ; RV64ZVE32F-NEXT: .LBB15_5: # %cond.store
904 ; RV64ZVE32F-NEXT: ld a0, 0(a0)
905 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
906 ; RV64ZVE32F-NEXT: vse16.v v8, (a0)
907 ; RV64ZVE32F-NEXT: andi a0, a3, 2
908 ; RV64ZVE32F-NEXT: beqz a0, .LBB15_2
909 ; RV64ZVE32F-NEXT: .LBB15_6: # %cond.store1
910 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
911 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
912 ; RV64ZVE32F-NEXT: vse16.v v9, (a4)
913 ; RV64ZVE32F-NEXT: andi a0, a3, 4
914 ; RV64ZVE32F-NEXT: beqz a0, .LBB15_3
915 ; RV64ZVE32F-NEXT: .LBB15_7: # %cond.store3
916 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
917 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
918 ; RV64ZVE32F-NEXT: vse16.v v9, (a2)
919 ; RV64ZVE32F-NEXT: andi a3, a3, 8
920 ; RV64ZVE32F-NEXT: beqz a3, .LBB15_4
921 ; RV64ZVE32F-NEXT: .LBB15_8: # %cond.store5
922 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
923 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 3
924 ; RV64ZVE32F-NEXT: vse16.v v8, (a1)
925 ; RV64ZVE32F-NEXT: ret
926 %mhead = insertelement <4 x i1> poison, i1 1, i32 0
927 %mtrue = shufflevector <4 x i1> %mhead, <4 x i1> poison, <4 x i32> zeroinitializer
928 call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> %val, <4 x ptr> %ptrs, i32 2, <4 x i1> %mtrue)
932 define void @mscatter_falsemask_v4i16(<4 x i16> %val, <4 x ptr> %ptrs) {
933 ; CHECK-LABEL: mscatter_falsemask_v4i16:
936 call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> %val, <4 x ptr> %ptrs, i32 2, <4 x i1> zeroinitializer)
940 declare void @llvm.masked.scatter.v8i16.v8p0(<8 x i16>, <8 x ptr>, i32, <8 x i1>)
942 define void @mscatter_v8i16(<8 x i16> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
943 ; RV32-LABEL: mscatter_v8i16:
945 ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
946 ; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t
949 ; RV64-LABEL: mscatter_v8i16:
951 ; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
952 ; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t
955 ; RV64ZVE32F-LABEL: mscatter_v8i16:
956 ; RV64ZVE32F: # %bb.0:
957 ; RV64ZVE32F-NEXT: ld a1, 56(a0)
958 ; RV64ZVE32F-NEXT: ld a2, 48(a0)
959 ; RV64ZVE32F-NEXT: ld a4, 40(a0)
960 ; RV64ZVE32F-NEXT: ld a5, 32(a0)
961 ; RV64ZVE32F-NEXT: ld a6, 24(a0)
962 ; RV64ZVE32F-NEXT: ld a7, 16(a0)
963 ; RV64ZVE32F-NEXT: ld t0, 8(a0)
964 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
965 ; RV64ZVE32F-NEXT: vmv.x.s a3, v0
966 ; RV64ZVE32F-NEXT: andi t1, a3, 1
967 ; RV64ZVE32F-NEXT: bnez t1, .LBB17_9
968 ; RV64ZVE32F-NEXT: # %bb.1: # %else
969 ; RV64ZVE32F-NEXT: andi a0, a3, 2
970 ; RV64ZVE32F-NEXT: bnez a0, .LBB17_10
971 ; RV64ZVE32F-NEXT: .LBB17_2: # %else2
972 ; RV64ZVE32F-NEXT: andi a0, a3, 4
973 ; RV64ZVE32F-NEXT: bnez a0, .LBB17_11
974 ; RV64ZVE32F-NEXT: .LBB17_3: # %else4
975 ; RV64ZVE32F-NEXT: andi a0, a3, 8
976 ; RV64ZVE32F-NEXT: bnez a0, .LBB17_12
977 ; RV64ZVE32F-NEXT: .LBB17_4: # %else6
978 ; RV64ZVE32F-NEXT: andi a0, a3, 16
979 ; RV64ZVE32F-NEXT: bnez a0, .LBB17_13
980 ; RV64ZVE32F-NEXT: .LBB17_5: # %else8
981 ; RV64ZVE32F-NEXT: andi a0, a3, 32
982 ; RV64ZVE32F-NEXT: bnez a0, .LBB17_14
983 ; RV64ZVE32F-NEXT: .LBB17_6: # %else10
984 ; RV64ZVE32F-NEXT: andi a0, a3, 64
985 ; RV64ZVE32F-NEXT: bnez a0, .LBB17_15
986 ; RV64ZVE32F-NEXT: .LBB17_7: # %else12
987 ; RV64ZVE32F-NEXT: andi a0, a3, -128
988 ; RV64ZVE32F-NEXT: bnez a0, .LBB17_16
989 ; RV64ZVE32F-NEXT: .LBB17_8: # %else14
990 ; RV64ZVE32F-NEXT: ret
991 ; RV64ZVE32F-NEXT: .LBB17_9: # %cond.store
992 ; RV64ZVE32F-NEXT: ld a0, 0(a0)
993 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
994 ; RV64ZVE32F-NEXT: vse16.v v8, (a0)
995 ; RV64ZVE32F-NEXT: andi a0, a3, 2
996 ; RV64ZVE32F-NEXT: beqz a0, .LBB17_2
997 ; RV64ZVE32F-NEXT: .LBB17_10: # %cond.store1
998 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
999 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
1000 ; RV64ZVE32F-NEXT: vse16.v v9, (t0)
1001 ; RV64ZVE32F-NEXT: andi a0, a3, 4
1002 ; RV64ZVE32F-NEXT: beqz a0, .LBB17_3
1003 ; RV64ZVE32F-NEXT: .LBB17_11: # %cond.store3
1004 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1005 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
1006 ; RV64ZVE32F-NEXT: vse16.v v9, (a7)
1007 ; RV64ZVE32F-NEXT: andi a0, a3, 8
1008 ; RV64ZVE32F-NEXT: beqz a0, .LBB17_4
1009 ; RV64ZVE32F-NEXT: .LBB17_12: # %cond.store5
1010 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1011 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 3
1012 ; RV64ZVE32F-NEXT: vse16.v v9, (a6)
1013 ; RV64ZVE32F-NEXT: andi a0, a3, 16
1014 ; RV64ZVE32F-NEXT: beqz a0, .LBB17_5
1015 ; RV64ZVE32F-NEXT: .LBB17_13: # %cond.store7
1016 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1017 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
1018 ; RV64ZVE32F-NEXT: vse16.v v9, (a5)
1019 ; RV64ZVE32F-NEXT: andi a0, a3, 32
1020 ; RV64ZVE32F-NEXT: beqz a0, .LBB17_6
1021 ; RV64ZVE32F-NEXT: .LBB17_14: # %cond.store9
1022 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1023 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5
1024 ; RV64ZVE32F-NEXT: vse16.v v9, (a4)
1025 ; RV64ZVE32F-NEXT: andi a0, a3, 64
1026 ; RV64ZVE32F-NEXT: beqz a0, .LBB17_7
1027 ; RV64ZVE32F-NEXT: .LBB17_15: # %cond.store11
1028 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1029 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 6
1030 ; RV64ZVE32F-NEXT: vse16.v v9, (a2)
1031 ; RV64ZVE32F-NEXT: andi a0, a3, -128
1032 ; RV64ZVE32F-NEXT: beqz a0, .LBB17_8
1033 ; RV64ZVE32F-NEXT: .LBB17_16: # %cond.store13
1034 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1035 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
1036 ; RV64ZVE32F-NEXT: vse16.v v8, (a1)
1037 ; RV64ZVE32F-NEXT: ret
1038 call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> %m)
1042 define void @mscatter_baseidx_v8i8_v8i16(<8 x i16> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) {
1043 ; RV32-LABEL: mscatter_baseidx_v8i8_v8i16:
1045 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1046 ; RV32-NEXT: vsext.vf4 v10, v9
1047 ; RV32-NEXT: vadd.vv v10, v10, v10
1048 ; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma
1049 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
1052 ; RV64-LABEL: mscatter_baseidx_v8i8_v8i16:
1054 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
1055 ; RV64-NEXT: vsext.vf8 v12, v9
1056 ; RV64-NEXT: vadd.vv v12, v12, v12
1057 ; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, ma
1058 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
1061 ; RV64ZVE32F-LABEL: mscatter_baseidx_v8i8_v8i16:
1062 ; RV64ZVE32F: # %bb.0:
1063 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1064 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
1065 ; RV64ZVE32F-NEXT: andi a2, a1, 1
1066 ; RV64ZVE32F-NEXT: beqz a2, .LBB18_2
1067 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
1068 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
1069 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1070 ; RV64ZVE32F-NEXT: add a2, a0, a2
1071 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1072 ; RV64ZVE32F-NEXT: vse16.v v8, (a2)
1073 ; RV64ZVE32F-NEXT: .LBB18_2: # %else
1074 ; RV64ZVE32F-NEXT: andi a2, a1, 2
1075 ; RV64ZVE32F-NEXT: beqz a2, .LBB18_4
1076 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
1077 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1078 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1
1079 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
1080 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1081 ; RV64ZVE32F-NEXT: add a2, a0, a2
1082 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1083 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
1084 ; RV64ZVE32F-NEXT: vse16.v v10, (a2)
1085 ; RV64ZVE32F-NEXT: .LBB18_4: # %else2
1086 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
1087 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4
1088 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
1089 ; RV64ZVE32F-NEXT: andi a2, a1, 4
1090 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2
1091 ; RV64ZVE32F-NEXT: bnez a2, .LBB18_12
1092 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
1093 ; RV64ZVE32F-NEXT: andi a2, a1, 8
1094 ; RV64ZVE32F-NEXT: bnez a2, .LBB18_13
1095 ; RV64ZVE32F-NEXT: .LBB18_6: # %else6
1096 ; RV64ZVE32F-NEXT: andi a2, a1, 16
1097 ; RV64ZVE32F-NEXT: bnez a2, .LBB18_14
1098 ; RV64ZVE32F-NEXT: .LBB18_7: # %else8
1099 ; RV64ZVE32F-NEXT: andi a2, a1, 32
1100 ; RV64ZVE32F-NEXT: beqz a2, .LBB18_9
1101 ; RV64ZVE32F-NEXT: .LBB18_8: # %cond.store9
1102 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1103 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 1
1104 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
1105 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1106 ; RV64ZVE32F-NEXT: add a2, a0, a2
1107 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1108 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5
1109 ; RV64ZVE32F-NEXT: vse16.v v9, (a2)
1110 ; RV64ZVE32F-NEXT: .LBB18_9: # %else10
1111 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
1112 ; RV64ZVE32F-NEXT: andi a2, a1, 64
1113 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 2
1114 ; RV64ZVE32F-NEXT: bnez a2, .LBB18_15
1115 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
1116 ; RV64ZVE32F-NEXT: andi a1, a1, -128
1117 ; RV64ZVE32F-NEXT: bnez a1, .LBB18_16
1118 ; RV64ZVE32F-NEXT: .LBB18_11: # %else14
1119 ; RV64ZVE32F-NEXT: ret
1120 ; RV64ZVE32F-NEXT: .LBB18_12: # %cond.store3
1121 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
1122 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1123 ; RV64ZVE32F-NEXT: add a2, a0, a2
1124 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1125 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2
1126 ; RV64ZVE32F-NEXT: vse16.v v11, (a2)
1127 ; RV64ZVE32F-NEXT: andi a2, a1, 8
1128 ; RV64ZVE32F-NEXT: beqz a2, .LBB18_6
1129 ; RV64ZVE32F-NEXT: .LBB18_13: # %cond.store5
1130 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1131 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
1132 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
1133 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1134 ; RV64ZVE32F-NEXT: add a2, a0, a2
1135 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1136 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 3
1137 ; RV64ZVE32F-NEXT: vse16.v v9, (a2)
1138 ; RV64ZVE32F-NEXT: andi a2, a1, 16
1139 ; RV64ZVE32F-NEXT: beqz a2, .LBB18_7
1140 ; RV64ZVE32F-NEXT: .LBB18_14: # %cond.store7
1141 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1142 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
1143 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1144 ; RV64ZVE32F-NEXT: add a2, a0, a2
1145 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1146 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
1147 ; RV64ZVE32F-NEXT: vse16.v v9, (a2)
1148 ; RV64ZVE32F-NEXT: andi a2, a1, 32
1149 ; RV64ZVE32F-NEXT: bnez a2, .LBB18_8
1150 ; RV64ZVE32F-NEXT: j .LBB18_9
1151 ; RV64ZVE32F-NEXT: .LBB18_15: # %cond.store11
1152 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
1153 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1154 ; RV64ZVE32F-NEXT: add a2, a0, a2
1155 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1156 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 6
1157 ; RV64ZVE32F-NEXT: vse16.v v10, (a2)
1158 ; RV64ZVE32F-NEXT: andi a1, a1, -128
1159 ; RV64ZVE32F-NEXT: beqz a1, .LBB18_11
1160 ; RV64ZVE32F-NEXT: .LBB18_16: # %cond.store13
1161 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1162 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
1163 ; RV64ZVE32F-NEXT: vmv.x.s a1, v9
1164 ; RV64ZVE32F-NEXT: slli a1, a1, 1
1165 ; RV64ZVE32F-NEXT: add a0, a0, a1
1166 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1167 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
1168 ; RV64ZVE32F-NEXT: vse16.v v8, (a0)
1169 ; RV64ZVE32F-NEXT: ret
1170 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i8> %idxs
1171 call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> %m)
1175 define void @mscatter_baseidx_sext_v8i8_v8i16(<8 x i16> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) {
1176 ; RV32-LABEL: mscatter_baseidx_sext_v8i8_v8i16:
1178 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1179 ; RV32-NEXT: vsext.vf4 v10, v9
1180 ; RV32-NEXT: vadd.vv v10, v10, v10
1181 ; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma
1182 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
1185 ; RV64-LABEL: mscatter_baseidx_sext_v8i8_v8i16:
1187 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
1188 ; RV64-NEXT: vsext.vf8 v12, v9
1189 ; RV64-NEXT: vadd.vv v12, v12, v12
1190 ; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, ma
1191 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
1194 ; RV64ZVE32F-LABEL: mscatter_baseidx_sext_v8i8_v8i16:
1195 ; RV64ZVE32F: # %bb.0:
1196 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1197 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
1198 ; RV64ZVE32F-NEXT: andi a2, a1, 1
1199 ; RV64ZVE32F-NEXT: beqz a2, .LBB19_2
1200 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
1201 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
1202 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1203 ; RV64ZVE32F-NEXT: add a2, a0, a2
1204 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1205 ; RV64ZVE32F-NEXT: vse16.v v8, (a2)
1206 ; RV64ZVE32F-NEXT: .LBB19_2: # %else
1207 ; RV64ZVE32F-NEXT: andi a2, a1, 2
1208 ; RV64ZVE32F-NEXT: beqz a2, .LBB19_4
1209 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
1210 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1211 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1
1212 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
1213 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1214 ; RV64ZVE32F-NEXT: add a2, a0, a2
1215 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1216 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
1217 ; RV64ZVE32F-NEXT: vse16.v v10, (a2)
1218 ; RV64ZVE32F-NEXT: .LBB19_4: # %else2
1219 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
1220 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4
1221 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
1222 ; RV64ZVE32F-NEXT: andi a2, a1, 4
1223 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2
1224 ; RV64ZVE32F-NEXT: bnez a2, .LBB19_12
1225 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
1226 ; RV64ZVE32F-NEXT: andi a2, a1, 8
1227 ; RV64ZVE32F-NEXT: bnez a2, .LBB19_13
1228 ; RV64ZVE32F-NEXT: .LBB19_6: # %else6
1229 ; RV64ZVE32F-NEXT: andi a2, a1, 16
1230 ; RV64ZVE32F-NEXT: bnez a2, .LBB19_14
1231 ; RV64ZVE32F-NEXT: .LBB19_7: # %else8
1232 ; RV64ZVE32F-NEXT: andi a2, a1, 32
1233 ; RV64ZVE32F-NEXT: beqz a2, .LBB19_9
1234 ; RV64ZVE32F-NEXT: .LBB19_8: # %cond.store9
1235 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1236 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 1
1237 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
1238 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1239 ; RV64ZVE32F-NEXT: add a2, a0, a2
1240 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1241 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5
1242 ; RV64ZVE32F-NEXT: vse16.v v9, (a2)
1243 ; RV64ZVE32F-NEXT: .LBB19_9: # %else10
1244 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
1245 ; RV64ZVE32F-NEXT: andi a2, a1, 64
1246 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 2
1247 ; RV64ZVE32F-NEXT: bnez a2, .LBB19_15
1248 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
1249 ; RV64ZVE32F-NEXT: andi a1, a1, -128
1250 ; RV64ZVE32F-NEXT: bnez a1, .LBB19_16
1251 ; RV64ZVE32F-NEXT: .LBB19_11: # %else14
1252 ; RV64ZVE32F-NEXT: ret
1253 ; RV64ZVE32F-NEXT: .LBB19_12: # %cond.store3
1254 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
1255 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1256 ; RV64ZVE32F-NEXT: add a2, a0, a2
1257 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1258 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2
1259 ; RV64ZVE32F-NEXT: vse16.v v11, (a2)
1260 ; RV64ZVE32F-NEXT: andi a2, a1, 8
1261 ; RV64ZVE32F-NEXT: beqz a2, .LBB19_6
1262 ; RV64ZVE32F-NEXT: .LBB19_13: # %cond.store5
1263 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1264 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
1265 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
1266 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1267 ; RV64ZVE32F-NEXT: add a2, a0, a2
1268 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1269 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 3
1270 ; RV64ZVE32F-NEXT: vse16.v v9, (a2)
1271 ; RV64ZVE32F-NEXT: andi a2, a1, 16
1272 ; RV64ZVE32F-NEXT: beqz a2, .LBB19_7
1273 ; RV64ZVE32F-NEXT: .LBB19_14: # %cond.store7
1274 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1275 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
1276 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1277 ; RV64ZVE32F-NEXT: add a2, a0, a2
1278 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1279 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
1280 ; RV64ZVE32F-NEXT: vse16.v v9, (a2)
1281 ; RV64ZVE32F-NEXT: andi a2, a1, 32
1282 ; RV64ZVE32F-NEXT: bnez a2, .LBB19_8
1283 ; RV64ZVE32F-NEXT: j .LBB19_9
1284 ; RV64ZVE32F-NEXT: .LBB19_15: # %cond.store11
1285 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
1286 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1287 ; RV64ZVE32F-NEXT: add a2, a0, a2
1288 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1289 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 6
1290 ; RV64ZVE32F-NEXT: vse16.v v10, (a2)
1291 ; RV64ZVE32F-NEXT: andi a1, a1, -128
1292 ; RV64ZVE32F-NEXT: beqz a1, .LBB19_11
1293 ; RV64ZVE32F-NEXT: .LBB19_16: # %cond.store13
1294 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1295 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
1296 ; RV64ZVE32F-NEXT: vmv.x.s a1, v9
1297 ; RV64ZVE32F-NEXT: slli a1, a1, 1
1298 ; RV64ZVE32F-NEXT: add a0, a0, a1
1299 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1300 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
1301 ; RV64ZVE32F-NEXT: vse16.v v8, (a0)
1302 ; RV64ZVE32F-NEXT: ret
1303 %eidxs = sext <8 x i8> %idxs to <8 x i16>
1304 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i16> %eidxs
1305 call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> %m)
1309 define void @mscatter_baseidx_zext_v8i8_v8i16(<8 x i16> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) {
1310 ; RV32-LABEL: mscatter_baseidx_zext_v8i8_v8i16:
1312 ; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
1313 ; RV32-NEXT: vwaddu.vv v10, v9, v9
1314 ; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma
1315 ; RV32-NEXT: vsoxei16.v v8, (a0), v10, v0.t
1318 ; RV64-LABEL: mscatter_baseidx_zext_v8i8_v8i16:
1320 ; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
1321 ; RV64-NEXT: vwaddu.vv v10, v9, v9
1322 ; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, ma
1323 ; RV64-NEXT: vsoxei16.v v8, (a0), v10, v0.t
1326 ; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i8_v8i16:
1327 ; RV64ZVE32F: # %bb.0:
1328 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1329 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
1330 ; RV64ZVE32F-NEXT: andi a2, a1, 1
1331 ; RV64ZVE32F-NEXT: beqz a2, .LBB20_2
1332 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
1333 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
1334 ; RV64ZVE32F-NEXT: andi a2, a2, 255
1335 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1336 ; RV64ZVE32F-NEXT: add a2, a0, a2
1337 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1338 ; RV64ZVE32F-NEXT: vse16.v v8, (a2)
1339 ; RV64ZVE32F-NEXT: .LBB20_2: # %else
1340 ; RV64ZVE32F-NEXT: andi a2, a1, 2
1341 ; RV64ZVE32F-NEXT: beqz a2, .LBB20_4
1342 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
1343 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1344 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1
1345 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
1346 ; RV64ZVE32F-NEXT: andi a2, a2, 255
1347 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1348 ; RV64ZVE32F-NEXT: add a2, a0, a2
1349 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1350 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
1351 ; RV64ZVE32F-NEXT: vse16.v v10, (a2)
1352 ; RV64ZVE32F-NEXT: .LBB20_4: # %else2
1353 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
1354 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4
1355 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
1356 ; RV64ZVE32F-NEXT: andi a2, a1, 4
1357 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2
1358 ; RV64ZVE32F-NEXT: bnez a2, .LBB20_12
1359 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
1360 ; RV64ZVE32F-NEXT: andi a2, a1, 8
1361 ; RV64ZVE32F-NEXT: bnez a2, .LBB20_13
1362 ; RV64ZVE32F-NEXT: .LBB20_6: # %else6
1363 ; RV64ZVE32F-NEXT: andi a2, a1, 16
1364 ; RV64ZVE32F-NEXT: bnez a2, .LBB20_14
1365 ; RV64ZVE32F-NEXT: .LBB20_7: # %else8
1366 ; RV64ZVE32F-NEXT: andi a2, a1, 32
1367 ; RV64ZVE32F-NEXT: beqz a2, .LBB20_9
1368 ; RV64ZVE32F-NEXT: .LBB20_8: # %cond.store9
1369 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1370 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 1
1371 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
1372 ; RV64ZVE32F-NEXT: andi a2, a2, 255
1373 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1374 ; RV64ZVE32F-NEXT: add a2, a0, a2
1375 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1376 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5
1377 ; RV64ZVE32F-NEXT: vse16.v v9, (a2)
1378 ; RV64ZVE32F-NEXT: .LBB20_9: # %else10
1379 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
1380 ; RV64ZVE32F-NEXT: andi a2, a1, 64
1381 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 2
1382 ; RV64ZVE32F-NEXT: bnez a2, .LBB20_15
1383 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
1384 ; RV64ZVE32F-NEXT: andi a1, a1, -128
1385 ; RV64ZVE32F-NEXT: bnez a1, .LBB20_16
1386 ; RV64ZVE32F-NEXT: .LBB20_11: # %else14
1387 ; RV64ZVE32F-NEXT: ret
1388 ; RV64ZVE32F-NEXT: .LBB20_12: # %cond.store3
1389 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
1390 ; RV64ZVE32F-NEXT: andi a2, a2, 255
1391 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1392 ; RV64ZVE32F-NEXT: add a2, a0, a2
1393 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1394 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2
1395 ; RV64ZVE32F-NEXT: vse16.v v11, (a2)
1396 ; RV64ZVE32F-NEXT: andi a2, a1, 8
1397 ; RV64ZVE32F-NEXT: beqz a2, .LBB20_6
1398 ; RV64ZVE32F-NEXT: .LBB20_13: # %cond.store5
1399 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1400 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
1401 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
1402 ; RV64ZVE32F-NEXT: andi a2, a2, 255
1403 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1404 ; RV64ZVE32F-NEXT: add a2, a0, a2
1405 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1406 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 3
1407 ; RV64ZVE32F-NEXT: vse16.v v9, (a2)
1408 ; RV64ZVE32F-NEXT: andi a2, a1, 16
1409 ; RV64ZVE32F-NEXT: beqz a2, .LBB20_7
1410 ; RV64ZVE32F-NEXT: .LBB20_14: # %cond.store7
1411 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1412 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
1413 ; RV64ZVE32F-NEXT: andi a2, a2, 255
1414 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1415 ; RV64ZVE32F-NEXT: add a2, a0, a2
1416 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1417 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
1418 ; RV64ZVE32F-NEXT: vse16.v v9, (a2)
1419 ; RV64ZVE32F-NEXT: andi a2, a1, 32
1420 ; RV64ZVE32F-NEXT: bnez a2, .LBB20_8
1421 ; RV64ZVE32F-NEXT: j .LBB20_9
1422 ; RV64ZVE32F-NEXT: .LBB20_15: # %cond.store11
1423 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
1424 ; RV64ZVE32F-NEXT: andi a2, a2, 255
1425 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1426 ; RV64ZVE32F-NEXT: add a2, a0, a2
1427 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1428 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 6
1429 ; RV64ZVE32F-NEXT: vse16.v v10, (a2)
1430 ; RV64ZVE32F-NEXT: andi a1, a1, -128
1431 ; RV64ZVE32F-NEXT: beqz a1, .LBB20_11
1432 ; RV64ZVE32F-NEXT: .LBB20_16: # %cond.store13
1433 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1434 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
1435 ; RV64ZVE32F-NEXT: vmv.x.s a1, v9
1436 ; RV64ZVE32F-NEXT: andi a1, a1, 255
1437 ; RV64ZVE32F-NEXT: slli a1, a1, 1
1438 ; RV64ZVE32F-NEXT: add a0, a0, a1
1439 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1440 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
1441 ; RV64ZVE32F-NEXT: vse16.v v8, (a0)
1442 ; RV64ZVE32F-NEXT: ret
1443 %eidxs = zext <8 x i8> %idxs to <8 x i16>
1444 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i16> %eidxs
1445 call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> %m)
1449 define void @mscatter_baseidx_v8i16(<8 x i16> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m) {
1450 ; RV32-LABEL: mscatter_baseidx_v8i16:
1452 ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1453 ; RV32-NEXT: vwadd.vv v10, v9, v9
1454 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
1457 ; RV64-LABEL: mscatter_baseidx_v8i16:
1459 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
1460 ; RV64-NEXT: vsext.vf4 v12, v9
1461 ; RV64-NEXT: vadd.vv v12, v12, v12
1462 ; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, ma
1463 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
1466 ; RV64ZVE32F-LABEL: mscatter_baseidx_v8i16:
1467 ; RV64ZVE32F: # %bb.0:
1468 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1469 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
1470 ; RV64ZVE32F-NEXT: andi a2, a1, 1
1471 ; RV64ZVE32F-NEXT: beqz a2, .LBB21_2
1472 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
1473 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1474 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
1475 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1476 ; RV64ZVE32F-NEXT: add a2, a0, a2
1477 ; RV64ZVE32F-NEXT: vse16.v v8, (a2)
1478 ; RV64ZVE32F-NEXT: .LBB21_2: # %else
1479 ; RV64ZVE32F-NEXT: andi a2, a1, 2
1480 ; RV64ZVE32F-NEXT: beqz a2, .LBB21_4
1481 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
1482 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
1483 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1
1484 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
1485 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1486 ; RV64ZVE32F-NEXT: add a2, a0, a2
1487 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1488 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
1489 ; RV64ZVE32F-NEXT: vse16.v v10, (a2)
1490 ; RV64ZVE32F-NEXT: .LBB21_4: # %else2
1491 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
1492 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4
1493 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
1494 ; RV64ZVE32F-NEXT: andi a2, a1, 4
1495 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2
1496 ; RV64ZVE32F-NEXT: bnez a2, .LBB21_12
1497 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
1498 ; RV64ZVE32F-NEXT: andi a2, a1, 8
1499 ; RV64ZVE32F-NEXT: bnez a2, .LBB21_13
1500 ; RV64ZVE32F-NEXT: .LBB21_6: # %else6
1501 ; RV64ZVE32F-NEXT: andi a2, a1, 16
1502 ; RV64ZVE32F-NEXT: bnez a2, .LBB21_14
1503 ; RV64ZVE32F-NEXT: .LBB21_7: # %else8
1504 ; RV64ZVE32F-NEXT: andi a2, a1, 32
1505 ; RV64ZVE32F-NEXT: beqz a2, .LBB21_9
1506 ; RV64ZVE32F-NEXT: .LBB21_8: # %cond.store9
1507 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
1508 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 1
1509 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
1510 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1511 ; RV64ZVE32F-NEXT: add a2, a0, a2
1512 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1513 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5
1514 ; RV64ZVE32F-NEXT: vse16.v v9, (a2)
1515 ; RV64ZVE32F-NEXT: .LBB21_9: # %else10
1516 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
1517 ; RV64ZVE32F-NEXT: andi a2, a1, 64
1518 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 2
1519 ; RV64ZVE32F-NEXT: bnez a2, .LBB21_15
1520 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
1521 ; RV64ZVE32F-NEXT: andi a1, a1, -128
1522 ; RV64ZVE32F-NEXT: bnez a1, .LBB21_16
1523 ; RV64ZVE32F-NEXT: .LBB21_11: # %else14
1524 ; RV64ZVE32F-NEXT: ret
1525 ; RV64ZVE32F-NEXT: .LBB21_12: # %cond.store3
1526 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
1527 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1528 ; RV64ZVE32F-NEXT: add a2, a0, a2
1529 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1530 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2
1531 ; RV64ZVE32F-NEXT: vse16.v v11, (a2)
1532 ; RV64ZVE32F-NEXT: andi a2, a1, 8
1533 ; RV64ZVE32F-NEXT: beqz a2, .LBB21_6
1534 ; RV64ZVE32F-NEXT: .LBB21_13: # %cond.store5
1535 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
1536 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
1537 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
1538 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1539 ; RV64ZVE32F-NEXT: add a2, a0, a2
1540 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1541 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 3
1542 ; RV64ZVE32F-NEXT: vse16.v v9, (a2)
1543 ; RV64ZVE32F-NEXT: andi a2, a1, 16
1544 ; RV64ZVE32F-NEXT: beqz a2, .LBB21_7
1545 ; RV64ZVE32F-NEXT: .LBB21_14: # %cond.store7
1546 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1547 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
1548 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1549 ; RV64ZVE32F-NEXT: add a2, a0, a2
1550 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
1551 ; RV64ZVE32F-NEXT: vse16.v v9, (a2)
1552 ; RV64ZVE32F-NEXT: andi a2, a1, 32
1553 ; RV64ZVE32F-NEXT: bnez a2, .LBB21_8
1554 ; RV64ZVE32F-NEXT: j .LBB21_9
1555 ; RV64ZVE32F-NEXT: .LBB21_15: # %cond.store11
1556 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
1557 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1558 ; RV64ZVE32F-NEXT: add a2, a0, a2
1559 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1560 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 6
1561 ; RV64ZVE32F-NEXT: vse16.v v10, (a2)
1562 ; RV64ZVE32F-NEXT: andi a1, a1, -128
1563 ; RV64ZVE32F-NEXT: beqz a1, .LBB21_11
1564 ; RV64ZVE32F-NEXT: .LBB21_16: # %cond.store13
1565 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
1566 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
1567 ; RV64ZVE32F-NEXT: vmv.x.s a1, v9
1568 ; RV64ZVE32F-NEXT: slli a1, a1, 1
1569 ; RV64ZVE32F-NEXT: add a0, a0, a1
1570 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1571 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
1572 ; RV64ZVE32F-NEXT: vse16.v v8, (a0)
1573 ; RV64ZVE32F-NEXT: ret
1574 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i16> %idxs
1575 call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> %m)
1579 declare void @llvm.masked.scatter.v1i32.v1p0(<1 x i32>, <1 x ptr>, i32, <1 x i1>)
1581 define void @mscatter_v1i32(<1 x i32> %val, <1 x ptr> %ptrs, <1 x i1> %m) {
1582 ; RV32V-LABEL: mscatter_v1i32:
1584 ; RV32V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
1585 ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t
1588 ; RV64-LABEL: mscatter_v1i32:
1590 ; RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
1591 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
1594 ; RV32ZVE32F-LABEL: mscatter_v1i32:
1595 ; RV32ZVE32F: # %bb.0:
1596 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1597 ; RV32ZVE32F-NEXT: vsoxei32.v v8, (zero), v9, v0.t
1598 ; RV32ZVE32F-NEXT: ret
1600 ; RV64ZVE32F-LABEL: mscatter_v1i32:
1601 ; RV64ZVE32F: # %bb.0:
1602 ; RV64ZVE32F-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
1603 ; RV64ZVE32F-NEXT: vfirst.m a1, v0
1604 ; RV64ZVE32F-NEXT: bnez a1, .LBB22_2
1605 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
1606 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1607 ; RV64ZVE32F-NEXT: vse32.v v8, (a0)
1608 ; RV64ZVE32F-NEXT: .LBB22_2: # %else
1609 ; RV64ZVE32F-NEXT: ret
1610 call void @llvm.masked.scatter.v1i32.v1p0(<1 x i32> %val, <1 x ptr> %ptrs, i32 4, <1 x i1> %m)
1614 declare void @llvm.masked.scatter.v2i32.v2p0(<2 x i32>, <2 x ptr>, i32, <2 x i1>)
1616 define void @mscatter_v2i32(<2 x i32> %val, <2 x ptr> %ptrs, <2 x i1> %m) {
1617 ; RV32V-LABEL: mscatter_v2i32:
1619 ; RV32V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
1620 ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t
1623 ; RV64-LABEL: mscatter_v2i32:
1625 ; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
1626 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
1629 ; RV32ZVE32F-LABEL: mscatter_v2i32:
1630 ; RV32ZVE32F: # %bb.0:
1631 ; RV32ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
1632 ; RV32ZVE32F-NEXT: vsoxei32.v v8, (zero), v9, v0.t
1633 ; RV32ZVE32F-NEXT: ret
1635 ; RV64ZVE32F-LABEL: mscatter_v2i32:
1636 ; RV64ZVE32F: # %bb.0:
1637 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1638 ; RV64ZVE32F-NEXT: vmv.x.s a2, v0
1639 ; RV64ZVE32F-NEXT: andi a3, a2, 1
1640 ; RV64ZVE32F-NEXT: bnez a3, .LBB23_3
1641 ; RV64ZVE32F-NEXT: # %bb.1: # %else
1642 ; RV64ZVE32F-NEXT: andi a2, a2, 2
1643 ; RV64ZVE32F-NEXT: bnez a2, .LBB23_4
1644 ; RV64ZVE32F-NEXT: .LBB23_2: # %else2
1645 ; RV64ZVE32F-NEXT: ret
1646 ; RV64ZVE32F-NEXT: .LBB23_3: # %cond.store
1647 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1648 ; RV64ZVE32F-NEXT: vse32.v v8, (a0)
1649 ; RV64ZVE32F-NEXT: andi a2, a2, 2
1650 ; RV64ZVE32F-NEXT: beqz a2, .LBB23_2
1651 ; RV64ZVE32F-NEXT: .LBB23_4: # %cond.store1
1652 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1653 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
1654 ; RV64ZVE32F-NEXT: vse32.v v8, (a1)
1655 ; RV64ZVE32F-NEXT: ret
1656 call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> %val, <2 x ptr> %ptrs, i32 4, <2 x i1> %m)
1660 define void @mscatter_v2i64_truncstore_v2i32(<2 x i64> %val, <2 x ptr> %ptrs, <2 x i1> %m) {
1661 ; RV32V-LABEL: mscatter_v2i64_truncstore_v2i32:
1663 ; RV32V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
1664 ; RV32V-NEXT: vnsrl.wi v8, v8, 0
1665 ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t
1668 ; RV64-LABEL: mscatter_v2i64_truncstore_v2i32:
1670 ; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
1671 ; RV64-NEXT: vnsrl.wi v8, v8, 0
1672 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
1675 ; RV32ZVE32F-LABEL: mscatter_v2i64_truncstore_v2i32:
1676 ; RV32ZVE32F: # %bb.0:
1677 ; RV32ZVE32F-NEXT: lw a1, 0(a0)
1678 ; RV32ZVE32F-NEXT: lw a0, 8(a0)
1679 ; RV32ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
1680 ; RV32ZVE32F-NEXT: vslide1down.vx v9, v8, a1
1681 ; RV32ZVE32F-NEXT: vslide1down.vx v9, v9, a0
1682 ; RV32ZVE32F-NEXT: vsoxei32.v v9, (zero), v8, v0.t
1683 ; RV32ZVE32F-NEXT: ret
1685 ; RV64ZVE32F-LABEL: mscatter_v2i64_truncstore_v2i32:
1686 ; RV64ZVE32F: # %bb.0:
1687 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
1688 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0
1689 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a1
1690 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
1691 ; RV64ZVE32F-NEXT: vmv.x.s a0, v0
1692 ; RV64ZVE32F-NEXT: andi a1, a0, 1
1693 ; RV64ZVE32F-NEXT: bnez a1, .LBB24_3
1694 ; RV64ZVE32F-NEXT: # %bb.1: # %else
1695 ; RV64ZVE32F-NEXT: andi a0, a0, 2
1696 ; RV64ZVE32F-NEXT: bnez a0, .LBB24_4
1697 ; RV64ZVE32F-NEXT: .LBB24_2: # %else2
1698 ; RV64ZVE32F-NEXT: ret
1699 ; RV64ZVE32F-NEXT: .LBB24_3: # %cond.store
1700 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1701 ; RV64ZVE32F-NEXT: vse32.v v8, (a2)
1702 ; RV64ZVE32F-NEXT: andi a0, a0, 2
1703 ; RV64ZVE32F-NEXT: beqz a0, .LBB24_2
1704 ; RV64ZVE32F-NEXT: .LBB24_4: # %cond.store1
1705 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1706 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
1707 ; RV64ZVE32F-NEXT: vse32.v v8, (a3)
1708 ; RV64ZVE32F-NEXT: ret
1709 %tval = trunc <2 x i64> %val to <2 x i32>
1710 call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> %tval, <2 x ptr> %ptrs, i32 4, <2 x i1> %m)
1714 declare void @llvm.masked.scatter.v4i32.v4p0(<4 x i32>, <4 x ptr>, i32, <4 x i1>)
1716 define void @mscatter_v4i32(<4 x i32> %val, <4 x ptr> %ptrs, <4 x i1> %m) {
1717 ; RV32-LABEL: mscatter_v4i32:
1719 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1720 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
1723 ; RV64-LABEL: mscatter_v4i32:
1725 ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1726 ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
1729 ; RV64ZVE32F-LABEL: mscatter_v4i32:
1730 ; RV64ZVE32F: # %bb.0:
1731 ; RV64ZVE32F-NEXT: ld a1, 24(a0)
1732 ; RV64ZVE32F-NEXT: ld a2, 16(a0)
1733 ; RV64ZVE32F-NEXT: ld a4, 8(a0)
1734 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1735 ; RV64ZVE32F-NEXT: vmv.x.s a3, v0
1736 ; RV64ZVE32F-NEXT: andi a5, a3, 1
1737 ; RV64ZVE32F-NEXT: bnez a5, .LBB25_5
1738 ; RV64ZVE32F-NEXT: # %bb.1: # %else
1739 ; RV64ZVE32F-NEXT: andi a0, a3, 2
1740 ; RV64ZVE32F-NEXT: bnez a0, .LBB25_6
1741 ; RV64ZVE32F-NEXT: .LBB25_2: # %else2
1742 ; RV64ZVE32F-NEXT: andi a0, a3, 4
1743 ; RV64ZVE32F-NEXT: bnez a0, .LBB25_7
1744 ; RV64ZVE32F-NEXT: .LBB25_3: # %else4
1745 ; RV64ZVE32F-NEXT: andi a3, a3, 8
1746 ; RV64ZVE32F-NEXT: bnez a3, .LBB25_8
1747 ; RV64ZVE32F-NEXT: .LBB25_4: # %else6
1748 ; RV64ZVE32F-NEXT: ret
1749 ; RV64ZVE32F-NEXT: .LBB25_5: # %cond.store
1750 ; RV64ZVE32F-NEXT: ld a0, 0(a0)
1751 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1752 ; RV64ZVE32F-NEXT: vse32.v v8, (a0)
1753 ; RV64ZVE32F-NEXT: andi a0, a3, 2
1754 ; RV64ZVE32F-NEXT: beqz a0, .LBB25_2
1755 ; RV64ZVE32F-NEXT: .LBB25_6: # %cond.store1
1756 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1757 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
1758 ; RV64ZVE32F-NEXT: vse32.v v9, (a4)
1759 ; RV64ZVE32F-NEXT: andi a0, a3, 4
1760 ; RV64ZVE32F-NEXT: beqz a0, .LBB25_3
1761 ; RV64ZVE32F-NEXT: .LBB25_7: # %cond.store3
1762 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1763 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
1764 ; RV64ZVE32F-NEXT: vse32.v v9, (a2)
1765 ; RV64ZVE32F-NEXT: andi a3, a3, 8
1766 ; RV64ZVE32F-NEXT: beqz a3, .LBB25_4
1767 ; RV64ZVE32F-NEXT: .LBB25_8: # %cond.store5
1768 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1769 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 3
1770 ; RV64ZVE32F-NEXT: vse32.v v8, (a1)
1771 ; RV64ZVE32F-NEXT: ret
1772 call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %val, <4 x ptr> %ptrs, i32 4, <4 x i1> %m)
1776 define void @mscatter_truemask_v4i32(<4 x i32> %val, <4 x ptr> %ptrs) {
1777 ; RV32-LABEL: mscatter_truemask_v4i32:
1779 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1780 ; RV32-NEXT: vsoxei32.v v8, (zero), v9
1783 ; RV64-LABEL: mscatter_truemask_v4i32:
1785 ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1786 ; RV64-NEXT: vsoxei64.v v8, (zero), v10
1789 ; RV64ZVE32F-LABEL: mscatter_truemask_v4i32:
1790 ; RV64ZVE32F: # %bb.0:
1791 ; RV64ZVE32F-NEXT: ld a1, 24(a0)
1792 ; RV64ZVE32F-NEXT: ld a2, 16(a0)
1793 ; RV64ZVE32F-NEXT: ld a4, 8(a0)
1794 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
1795 ; RV64ZVE32F-NEXT: vmset.m v9
1796 ; RV64ZVE32F-NEXT: vmv.x.s a3, v9
1797 ; RV64ZVE32F-NEXT: beqz zero, .LBB26_5
1798 ; RV64ZVE32F-NEXT: # %bb.1: # %else
1799 ; RV64ZVE32F-NEXT: andi a0, a3, 2
1800 ; RV64ZVE32F-NEXT: bnez a0, .LBB26_6
1801 ; RV64ZVE32F-NEXT: .LBB26_2: # %else2
1802 ; RV64ZVE32F-NEXT: andi a0, a3, 4
1803 ; RV64ZVE32F-NEXT: bnez a0, .LBB26_7
1804 ; RV64ZVE32F-NEXT: .LBB26_3: # %else4
1805 ; RV64ZVE32F-NEXT: andi a3, a3, 8
1806 ; RV64ZVE32F-NEXT: bnez a3, .LBB26_8
1807 ; RV64ZVE32F-NEXT: .LBB26_4: # %else6
1808 ; RV64ZVE32F-NEXT: ret
1809 ; RV64ZVE32F-NEXT: .LBB26_5: # %cond.store
1810 ; RV64ZVE32F-NEXT: ld a0, 0(a0)
1811 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1812 ; RV64ZVE32F-NEXT: vse32.v v8, (a0)
1813 ; RV64ZVE32F-NEXT: andi a0, a3, 2
1814 ; RV64ZVE32F-NEXT: beqz a0, .LBB26_2
1815 ; RV64ZVE32F-NEXT: .LBB26_6: # %cond.store1
1816 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1817 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
1818 ; RV64ZVE32F-NEXT: vse32.v v9, (a4)
1819 ; RV64ZVE32F-NEXT: andi a0, a3, 4
1820 ; RV64ZVE32F-NEXT: beqz a0, .LBB26_3
1821 ; RV64ZVE32F-NEXT: .LBB26_7: # %cond.store3
1822 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1823 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
1824 ; RV64ZVE32F-NEXT: vse32.v v9, (a2)
1825 ; RV64ZVE32F-NEXT: andi a3, a3, 8
1826 ; RV64ZVE32F-NEXT: beqz a3, .LBB26_4
1827 ; RV64ZVE32F-NEXT: .LBB26_8: # %cond.store5
1828 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1829 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 3
1830 ; RV64ZVE32F-NEXT: vse32.v v8, (a1)
1831 ; RV64ZVE32F-NEXT: ret
1832 %mhead = insertelement <4 x i1> poison, i1 1, i32 0
1833 %mtrue = shufflevector <4 x i1> %mhead, <4 x i1> poison, <4 x i32> zeroinitializer
1834 call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %val, <4 x ptr> %ptrs, i32 4, <4 x i1> %mtrue)
1838 define void @mscatter_falsemask_v4i32(<4 x i32> %val, <4 x ptr> %ptrs) {
1839 ; CHECK-LABEL: mscatter_falsemask_v4i32:
1842 call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %val, <4 x ptr> %ptrs, i32 4, <4 x i1> zeroinitializer)
1846 declare void @llvm.masked.scatter.v8i32.v8p0(<8 x i32>, <8 x ptr>, i32, <8 x i1>)
1848 define void @mscatter_v8i32(<8 x i32> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
1849 ; RV32-LABEL: mscatter_v8i32:
1851 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1852 ; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t
1855 ; RV64-LABEL: mscatter_v8i32:
1857 ; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1858 ; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t
1861 ; RV64ZVE32F-LABEL: mscatter_v8i32:
1862 ; RV64ZVE32F: # %bb.0:
1863 ; RV64ZVE32F-NEXT: ld a1, 56(a0)
1864 ; RV64ZVE32F-NEXT: ld a2, 48(a0)
1865 ; RV64ZVE32F-NEXT: ld a4, 40(a0)
1866 ; RV64ZVE32F-NEXT: ld a5, 32(a0)
1867 ; RV64ZVE32F-NEXT: ld a6, 24(a0)
1868 ; RV64ZVE32F-NEXT: ld a7, 16(a0)
1869 ; RV64ZVE32F-NEXT: ld t0, 8(a0)
1870 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1871 ; RV64ZVE32F-NEXT: vmv.x.s a3, v0
1872 ; RV64ZVE32F-NEXT: andi t1, a3, 1
1873 ; RV64ZVE32F-NEXT: bnez t1, .LBB28_9
1874 ; RV64ZVE32F-NEXT: # %bb.1: # %else
1875 ; RV64ZVE32F-NEXT: andi a0, a3, 2
1876 ; RV64ZVE32F-NEXT: bnez a0, .LBB28_10
1877 ; RV64ZVE32F-NEXT: .LBB28_2: # %else2
1878 ; RV64ZVE32F-NEXT: andi a0, a3, 4
1879 ; RV64ZVE32F-NEXT: bnez a0, .LBB28_11
1880 ; RV64ZVE32F-NEXT: .LBB28_3: # %else4
1881 ; RV64ZVE32F-NEXT: andi a0, a3, 8
1882 ; RV64ZVE32F-NEXT: bnez a0, .LBB28_12
1883 ; RV64ZVE32F-NEXT: .LBB28_4: # %else6
1884 ; RV64ZVE32F-NEXT: andi a0, a3, 16
1885 ; RV64ZVE32F-NEXT: bnez a0, .LBB28_13
1886 ; RV64ZVE32F-NEXT: .LBB28_5: # %else8
1887 ; RV64ZVE32F-NEXT: andi a0, a3, 32
1888 ; RV64ZVE32F-NEXT: bnez a0, .LBB28_14
1889 ; RV64ZVE32F-NEXT: .LBB28_6: # %else10
1890 ; RV64ZVE32F-NEXT: andi a0, a3, 64
1891 ; RV64ZVE32F-NEXT: bnez a0, .LBB28_15
1892 ; RV64ZVE32F-NEXT: .LBB28_7: # %else12
1893 ; RV64ZVE32F-NEXT: andi a0, a3, -128
1894 ; RV64ZVE32F-NEXT: bnez a0, .LBB28_16
1895 ; RV64ZVE32F-NEXT: .LBB28_8: # %else14
1896 ; RV64ZVE32F-NEXT: ret
1897 ; RV64ZVE32F-NEXT: .LBB28_9: # %cond.store
1898 ; RV64ZVE32F-NEXT: ld a0, 0(a0)
1899 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1900 ; RV64ZVE32F-NEXT: vse32.v v8, (a0)
1901 ; RV64ZVE32F-NEXT: andi a0, a3, 2
1902 ; RV64ZVE32F-NEXT: beqz a0, .LBB28_2
1903 ; RV64ZVE32F-NEXT: .LBB28_10: # %cond.store1
1904 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1905 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
1906 ; RV64ZVE32F-NEXT: vse32.v v10, (t0)
1907 ; RV64ZVE32F-NEXT: andi a0, a3, 4
1908 ; RV64ZVE32F-NEXT: beqz a0, .LBB28_3
1909 ; RV64ZVE32F-NEXT: .LBB28_11: # %cond.store3
1910 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1911 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 2
1912 ; RV64ZVE32F-NEXT: vse32.v v10, (a7)
1913 ; RV64ZVE32F-NEXT: andi a0, a3, 8
1914 ; RV64ZVE32F-NEXT: beqz a0, .LBB28_4
1915 ; RV64ZVE32F-NEXT: .LBB28_12: # %cond.store5
1916 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1917 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
1918 ; RV64ZVE32F-NEXT: vse32.v v10, (a6)
1919 ; RV64ZVE32F-NEXT: andi a0, a3, 16
1920 ; RV64ZVE32F-NEXT: beqz a0, .LBB28_5
1921 ; RV64ZVE32F-NEXT: .LBB28_13: # %cond.store7
1922 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
1923 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
1924 ; RV64ZVE32F-NEXT: vse32.v v10, (a5)
1925 ; RV64ZVE32F-NEXT: andi a0, a3, 32
1926 ; RV64ZVE32F-NEXT: beqz a0, .LBB28_6
1927 ; RV64ZVE32F-NEXT: .LBB28_14: # %cond.store9
1928 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
1929 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 5
1930 ; RV64ZVE32F-NEXT: vse32.v v10, (a4)
1931 ; RV64ZVE32F-NEXT: andi a0, a3, 64
1932 ; RV64ZVE32F-NEXT: beqz a0, .LBB28_7
1933 ; RV64ZVE32F-NEXT: .LBB28_15: # %cond.store11
1934 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
1935 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 6
1936 ; RV64ZVE32F-NEXT: vse32.v v10, (a2)
1937 ; RV64ZVE32F-NEXT: andi a0, a3, -128
1938 ; RV64ZVE32F-NEXT: beqz a0, .LBB28_8
1939 ; RV64ZVE32F-NEXT: .LBB28_16: # %cond.store13
1940 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
1941 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
1942 ; RV64ZVE32F-NEXT: vse32.v v8, (a1)
1943 ; RV64ZVE32F-NEXT: ret
1944 call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %val, <8 x ptr> %ptrs, i32 4, <8 x i1> %m)
1948 define void @mscatter_baseidx_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) {
1949 ; RV32-LABEL: mscatter_baseidx_v8i8_v8i32:
1951 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1952 ; RV32-NEXT: vsext.vf4 v12, v10
1953 ; RV32-NEXT: vsll.vi v10, v12, 2
1954 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
1957 ; RV64-LABEL: mscatter_baseidx_v8i8_v8i32:
1959 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
1960 ; RV64-NEXT: vsext.vf8 v12, v10
1961 ; RV64-NEXT: vsll.vi v12, v12, 2
1962 ; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
1963 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
1966 ; RV64ZVE32F-LABEL: mscatter_baseidx_v8i8_v8i32:
1967 ; RV64ZVE32F: # %bb.0:
1968 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1969 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
1970 ; RV64ZVE32F-NEXT: andi a2, a1, 1
1971 ; RV64ZVE32F-NEXT: beqz a2, .LBB29_2
1972 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
1973 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
1974 ; RV64ZVE32F-NEXT: slli a2, a2, 2
1975 ; RV64ZVE32F-NEXT: add a2, a0, a2
1976 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1977 ; RV64ZVE32F-NEXT: vse32.v v8, (a2)
1978 ; RV64ZVE32F-NEXT: .LBB29_2: # %else
1979 ; RV64ZVE32F-NEXT: andi a2, a1, 2
1980 ; RV64ZVE32F-NEXT: beqz a2, .LBB29_4
1981 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
1982 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1983 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1
1984 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
1985 ; RV64ZVE32F-NEXT: slli a2, a2, 2
1986 ; RV64ZVE32F-NEXT: add a2, a0, a2
1987 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
1988 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 1
1989 ; RV64ZVE32F-NEXT: vse32.v v11, (a2)
1990 ; RV64ZVE32F-NEXT: .LBB29_4: # %else2
1991 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
1992 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4
1993 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
1994 ; RV64ZVE32F-NEXT: andi a2, a1, 4
1995 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
1996 ; RV64ZVE32F-NEXT: bnez a2, .LBB29_12
1997 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
1998 ; RV64ZVE32F-NEXT: andi a2, a1, 8
1999 ; RV64ZVE32F-NEXT: bnez a2, .LBB29_13
2000 ; RV64ZVE32F-NEXT: .LBB29_6: # %else6
2001 ; RV64ZVE32F-NEXT: andi a2, a1, 16
2002 ; RV64ZVE32F-NEXT: bnez a2, .LBB29_14
2003 ; RV64ZVE32F-NEXT: .LBB29_7: # %else8
2004 ; RV64ZVE32F-NEXT: andi a2, a1, 32
2005 ; RV64ZVE32F-NEXT: beqz a2, .LBB29_9
2006 ; RV64ZVE32F-NEXT: .LBB29_8: # %cond.store9
2007 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
2008 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 1
2009 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2010 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2011 ; RV64ZVE32F-NEXT: add a2, a0, a2
2012 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
2013 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5
2014 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
2015 ; RV64ZVE32F-NEXT: .LBB29_9: # %else10
2016 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
2017 ; RV64ZVE32F-NEXT: andi a2, a1, 64
2018 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2
2019 ; RV64ZVE32F-NEXT: bnez a2, .LBB29_15
2020 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
2021 ; RV64ZVE32F-NEXT: andi a1, a1, -128
2022 ; RV64ZVE32F-NEXT: bnez a1, .LBB29_16
2023 ; RV64ZVE32F-NEXT: .LBB29_11: # %else14
2024 ; RV64ZVE32F-NEXT: ret
2025 ; RV64ZVE32F-NEXT: .LBB29_12: # %cond.store3
2026 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2027 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2028 ; RV64ZVE32F-NEXT: add a2, a0, a2
2029 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2030 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2
2031 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
2032 ; RV64ZVE32F-NEXT: andi a2, a1, 8
2033 ; RV64ZVE32F-NEXT: beqz a2, .LBB29_6
2034 ; RV64ZVE32F-NEXT: .LBB29_13: # %cond.store5
2035 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
2036 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
2037 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2038 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2039 ; RV64ZVE32F-NEXT: add a2, a0, a2
2040 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2041 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
2042 ; RV64ZVE32F-NEXT: vse32.v v10, (a2)
2043 ; RV64ZVE32F-NEXT: andi a2, a1, 16
2044 ; RV64ZVE32F-NEXT: beqz a2, .LBB29_7
2045 ; RV64ZVE32F-NEXT: .LBB29_14: # %cond.store7
2046 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
2047 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
2048 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2049 ; RV64ZVE32F-NEXT: add a2, a0, a2
2050 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
2051 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
2052 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
2053 ; RV64ZVE32F-NEXT: andi a2, a1, 32
2054 ; RV64ZVE32F-NEXT: bnez a2, .LBB29_8
2055 ; RV64ZVE32F-NEXT: j .LBB29_9
2056 ; RV64ZVE32F-NEXT: .LBB29_15: # %cond.store11
2057 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2058 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2059 ; RV64ZVE32F-NEXT: add a2, a0, a2
2060 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
2061 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6
2062 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
2063 ; RV64ZVE32F-NEXT: andi a1, a1, -128
2064 ; RV64ZVE32F-NEXT: beqz a1, .LBB29_11
2065 ; RV64ZVE32F-NEXT: .LBB29_16: # %cond.store13
2066 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
2067 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
2068 ; RV64ZVE32F-NEXT: vmv.x.s a1, v10
2069 ; RV64ZVE32F-NEXT: slli a1, a1, 2
2070 ; RV64ZVE32F-NEXT: add a0, a0, a1
2071 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
2072 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
2073 ; RV64ZVE32F-NEXT: vse32.v v8, (a0)
2074 ; RV64ZVE32F-NEXT: ret
2075 %ptrs = getelementptr inbounds i32, ptr %base, <8 x i8> %idxs
2076 call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %val, <8 x ptr> %ptrs, i32 4, <8 x i1> %m)
2080 define void @mscatter_baseidx_sext_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) {
2081 ; RV32-LABEL: mscatter_baseidx_sext_v8i8_v8i32:
2083 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
2084 ; RV32-NEXT: vsext.vf4 v12, v10
2085 ; RV32-NEXT: vsll.vi v10, v12, 2
2086 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
2089 ; RV64-LABEL: mscatter_baseidx_sext_v8i8_v8i32:
2091 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
2092 ; RV64-NEXT: vsext.vf8 v12, v10
2093 ; RV64-NEXT: vsll.vi v12, v12, 2
2094 ; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
2095 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
2098 ; RV64ZVE32F-LABEL: mscatter_baseidx_sext_v8i8_v8i32:
2099 ; RV64ZVE32F: # %bb.0:
2100 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
2101 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
2102 ; RV64ZVE32F-NEXT: andi a2, a1, 1
2103 ; RV64ZVE32F-NEXT: beqz a2, .LBB30_2
2104 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
2105 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2106 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2107 ; RV64ZVE32F-NEXT: add a2, a0, a2
2108 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2109 ; RV64ZVE32F-NEXT: vse32.v v8, (a2)
2110 ; RV64ZVE32F-NEXT: .LBB30_2: # %else
2111 ; RV64ZVE32F-NEXT: andi a2, a1, 2
2112 ; RV64ZVE32F-NEXT: beqz a2, .LBB30_4
2113 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
2114 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
2115 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1
2116 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
2117 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2118 ; RV64ZVE32F-NEXT: add a2, a0, a2
2119 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2120 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 1
2121 ; RV64ZVE32F-NEXT: vse32.v v11, (a2)
2122 ; RV64ZVE32F-NEXT: .LBB30_4: # %else2
2123 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
2124 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4
2125 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
2126 ; RV64ZVE32F-NEXT: andi a2, a1, 4
2127 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
2128 ; RV64ZVE32F-NEXT: bnez a2, .LBB30_12
2129 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
2130 ; RV64ZVE32F-NEXT: andi a2, a1, 8
2131 ; RV64ZVE32F-NEXT: bnez a2, .LBB30_13
2132 ; RV64ZVE32F-NEXT: .LBB30_6: # %else6
2133 ; RV64ZVE32F-NEXT: andi a2, a1, 16
2134 ; RV64ZVE32F-NEXT: bnez a2, .LBB30_14
2135 ; RV64ZVE32F-NEXT: .LBB30_7: # %else8
2136 ; RV64ZVE32F-NEXT: andi a2, a1, 32
2137 ; RV64ZVE32F-NEXT: beqz a2, .LBB30_9
2138 ; RV64ZVE32F-NEXT: .LBB30_8: # %cond.store9
2139 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
2140 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 1
2141 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2142 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2143 ; RV64ZVE32F-NEXT: add a2, a0, a2
2144 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
2145 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5
2146 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
2147 ; RV64ZVE32F-NEXT: .LBB30_9: # %else10
2148 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
2149 ; RV64ZVE32F-NEXT: andi a2, a1, 64
2150 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2
2151 ; RV64ZVE32F-NEXT: bnez a2, .LBB30_15
2152 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
2153 ; RV64ZVE32F-NEXT: andi a1, a1, -128
2154 ; RV64ZVE32F-NEXT: bnez a1, .LBB30_16
2155 ; RV64ZVE32F-NEXT: .LBB30_11: # %else14
2156 ; RV64ZVE32F-NEXT: ret
2157 ; RV64ZVE32F-NEXT: .LBB30_12: # %cond.store3
2158 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2159 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2160 ; RV64ZVE32F-NEXT: add a2, a0, a2
2161 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2162 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2
2163 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
2164 ; RV64ZVE32F-NEXT: andi a2, a1, 8
2165 ; RV64ZVE32F-NEXT: beqz a2, .LBB30_6
2166 ; RV64ZVE32F-NEXT: .LBB30_13: # %cond.store5
2167 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
2168 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
2169 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2170 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2171 ; RV64ZVE32F-NEXT: add a2, a0, a2
2172 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2173 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
2174 ; RV64ZVE32F-NEXT: vse32.v v10, (a2)
2175 ; RV64ZVE32F-NEXT: andi a2, a1, 16
2176 ; RV64ZVE32F-NEXT: beqz a2, .LBB30_7
2177 ; RV64ZVE32F-NEXT: .LBB30_14: # %cond.store7
2178 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
2179 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
2180 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2181 ; RV64ZVE32F-NEXT: add a2, a0, a2
2182 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
2183 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
2184 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
2185 ; RV64ZVE32F-NEXT: andi a2, a1, 32
2186 ; RV64ZVE32F-NEXT: bnez a2, .LBB30_8
2187 ; RV64ZVE32F-NEXT: j .LBB30_9
2188 ; RV64ZVE32F-NEXT: .LBB30_15: # %cond.store11
2189 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2190 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2191 ; RV64ZVE32F-NEXT: add a2, a0, a2
2192 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
2193 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6
2194 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
2195 ; RV64ZVE32F-NEXT: andi a1, a1, -128
2196 ; RV64ZVE32F-NEXT: beqz a1, .LBB30_11
2197 ; RV64ZVE32F-NEXT: .LBB30_16: # %cond.store13
2198 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
2199 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
2200 ; RV64ZVE32F-NEXT: vmv.x.s a1, v10
2201 ; RV64ZVE32F-NEXT: slli a1, a1, 2
2202 ; RV64ZVE32F-NEXT: add a0, a0, a1
2203 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
2204 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
2205 ; RV64ZVE32F-NEXT: vse32.v v8, (a0)
2206 ; RV64ZVE32F-NEXT: ret
2207 %eidxs = sext <8 x i8> %idxs to <8 x i32>
2208 %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %eidxs
2209 call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %val, <8 x ptr> %ptrs, i32 4, <8 x i1> %m)
2213 define void @mscatter_baseidx_zext_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) {
2214 ; RV32-LABEL: mscatter_baseidx_zext_v8i8_v8i32:
2216 ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
2217 ; RV32-NEXT: vzext.vf2 v11, v10
2218 ; RV32-NEXT: vsll.vi v10, v11, 2
2219 ; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
2220 ; RV32-NEXT: vsoxei16.v v8, (a0), v10, v0.t
2223 ; RV64-LABEL: mscatter_baseidx_zext_v8i8_v8i32:
2225 ; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
2226 ; RV64-NEXT: vzext.vf2 v11, v10
2227 ; RV64-NEXT: vsll.vi v10, v11, 2
2228 ; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
2229 ; RV64-NEXT: vsoxei16.v v8, (a0), v10, v0.t
2232 ; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i8_v8i32:
2233 ; RV64ZVE32F: # %bb.0:
2234 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
2235 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
2236 ; RV64ZVE32F-NEXT: andi a2, a1, 1
2237 ; RV64ZVE32F-NEXT: beqz a2, .LBB31_2
2238 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
2239 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2240 ; RV64ZVE32F-NEXT: andi a2, a2, 255
2241 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2242 ; RV64ZVE32F-NEXT: add a2, a0, a2
2243 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2244 ; RV64ZVE32F-NEXT: vse32.v v8, (a2)
2245 ; RV64ZVE32F-NEXT: .LBB31_2: # %else
2246 ; RV64ZVE32F-NEXT: andi a2, a1, 2
2247 ; RV64ZVE32F-NEXT: beqz a2, .LBB31_4
2248 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
2249 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
2250 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1
2251 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
2252 ; RV64ZVE32F-NEXT: andi a2, a2, 255
2253 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2254 ; RV64ZVE32F-NEXT: add a2, a0, a2
2255 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2256 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 1
2257 ; RV64ZVE32F-NEXT: vse32.v v11, (a2)
2258 ; RV64ZVE32F-NEXT: .LBB31_4: # %else2
2259 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
2260 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4
2261 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
2262 ; RV64ZVE32F-NEXT: andi a2, a1, 4
2263 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
2264 ; RV64ZVE32F-NEXT: bnez a2, .LBB31_12
2265 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
2266 ; RV64ZVE32F-NEXT: andi a2, a1, 8
2267 ; RV64ZVE32F-NEXT: bnez a2, .LBB31_13
2268 ; RV64ZVE32F-NEXT: .LBB31_6: # %else6
2269 ; RV64ZVE32F-NEXT: andi a2, a1, 16
2270 ; RV64ZVE32F-NEXT: bnez a2, .LBB31_14
2271 ; RV64ZVE32F-NEXT: .LBB31_7: # %else8
2272 ; RV64ZVE32F-NEXT: andi a2, a1, 32
2273 ; RV64ZVE32F-NEXT: beqz a2, .LBB31_9
2274 ; RV64ZVE32F-NEXT: .LBB31_8: # %cond.store9
2275 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
2276 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 1
2277 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2278 ; RV64ZVE32F-NEXT: andi a2, a2, 255
2279 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2280 ; RV64ZVE32F-NEXT: add a2, a0, a2
2281 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
2282 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5
2283 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
2284 ; RV64ZVE32F-NEXT: .LBB31_9: # %else10
2285 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
2286 ; RV64ZVE32F-NEXT: andi a2, a1, 64
2287 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2
2288 ; RV64ZVE32F-NEXT: bnez a2, .LBB31_15
2289 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
2290 ; RV64ZVE32F-NEXT: andi a1, a1, -128
2291 ; RV64ZVE32F-NEXT: bnez a1, .LBB31_16
2292 ; RV64ZVE32F-NEXT: .LBB31_11: # %else14
2293 ; RV64ZVE32F-NEXT: ret
2294 ; RV64ZVE32F-NEXT: .LBB31_12: # %cond.store3
2295 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2296 ; RV64ZVE32F-NEXT: andi a2, a2, 255
2297 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2298 ; RV64ZVE32F-NEXT: add a2, a0, a2
2299 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2300 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2
2301 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
2302 ; RV64ZVE32F-NEXT: andi a2, a1, 8
2303 ; RV64ZVE32F-NEXT: beqz a2, .LBB31_6
2304 ; RV64ZVE32F-NEXT: .LBB31_13: # %cond.store5
2305 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
2306 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
2307 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2308 ; RV64ZVE32F-NEXT: andi a2, a2, 255
2309 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2310 ; RV64ZVE32F-NEXT: add a2, a0, a2
2311 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2312 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
2313 ; RV64ZVE32F-NEXT: vse32.v v10, (a2)
2314 ; RV64ZVE32F-NEXT: andi a2, a1, 16
2315 ; RV64ZVE32F-NEXT: beqz a2, .LBB31_7
2316 ; RV64ZVE32F-NEXT: .LBB31_14: # %cond.store7
2317 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
2318 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
2319 ; RV64ZVE32F-NEXT: andi a2, a2, 255
2320 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2321 ; RV64ZVE32F-NEXT: add a2, a0, a2
2322 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
2323 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
2324 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
2325 ; RV64ZVE32F-NEXT: andi a2, a1, 32
2326 ; RV64ZVE32F-NEXT: bnez a2, .LBB31_8
2327 ; RV64ZVE32F-NEXT: j .LBB31_9
2328 ; RV64ZVE32F-NEXT: .LBB31_15: # %cond.store11
2329 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2330 ; RV64ZVE32F-NEXT: andi a2, a2, 255
2331 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2332 ; RV64ZVE32F-NEXT: add a2, a0, a2
2333 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
2334 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6
2335 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
2336 ; RV64ZVE32F-NEXT: andi a1, a1, -128
2337 ; RV64ZVE32F-NEXT: beqz a1, .LBB31_11
2338 ; RV64ZVE32F-NEXT: .LBB31_16: # %cond.store13
2339 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
2340 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
2341 ; RV64ZVE32F-NEXT: vmv.x.s a1, v10
2342 ; RV64ZVE32F-NEXT: andi a1, a1, 255
2343 ; RV64ZVE32F-NEXT: slli a1, a1, 2
2344 ; RV64ZVE32F-NEXT: add a0, a0, a1
2345 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
2346 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
2347 ; RV64ZVE32F-NEXT: vse32.v v8, (a0)
2348 ; RV64ZVE32F-NEXT: ret
2349 %eidxs = zext <8 x i8> %idxs to <8 x i32>
2350 %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %eidxs
2351 call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %val, <8 x ptr> %ptrs, i32 4, <8 x i1> %m)
2355 define void @mscatter_baseidx_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m) {
2356 ; RV32-LABEL: mscatter_baseidx_v8i16_v8i32:
2358 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
2359 ; RV32-NEXT: vsext.vf2 v12, v10
2360 ; RV32-NEXT: vsll.vi v10, v12, 2
2361 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
2364 ; RV64-LABEL: mscatter_baseidx_v8i16_v8i32:
2366 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
2367 ; RV64-NEXT: vsext.vf4 v12, v10
2368 ; RV64-NEXT: vsll.vi v12, v12, 2
2369 ; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
2370 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
2373 ; RV64ZVE32F-LABEL: mscatter_baseidx_v8i16_v8i32:
2374 ; RV64ZVE32F: # %bb.0:
2375 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
2376 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
2377 ; RV64ZVE32F-NEXT: andi a2, a1, 1
2378 ; RV64ZVE32F-NEXT: beqz a2, .LBB32_2
2379 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
2380 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
2381 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2382 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2383 ; RV64ZVE32F-NEXT: add a2, a0, a2
2384 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2385 ; RV64ZVE32F-NEXT: vse32.v v8, (a2)
2386 ; RV64ZVE32F-NEXT: .LBB32_2: # %else
2387 ; RV64ZVE32F-NEXT: andi a2, a1, 2
2388 ; RV64ZVE32F-NEXT: beqz a2, .LBB32_4
2389 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
2390 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
2391 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1
2392 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
2393 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2394 ; RV64ZVE32F-NEXT: add a2, a0, a2
2395 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2396 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 1
2397 ; RV64ZVE32F-NEXT: vse32.v v11, (a2)
2398 ; RV64ZVE32F-NEXT: .LBB32_4: # %else2
2399 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
2400 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4
2401 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
2402 ; RV64ZVE32F-NEXT: andi a2, a1, 4
2403 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
2404 ; RV64ZVE32F-NEXT: bnez a2, .LBB32_12
2405 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
2406 ; RV64ZVE32F-NEXT: andi a2, a1, 8
2407 ; RV64ZVE32F-NEXT: bnez a2, .LBB32_13
2408 ; RV64ZVE32F-NEXT: .LBB32_6: # %else6
2409 ; RV64ZVE32F-NEXT: andi a2, a1, 16
2410 ; RV64ZVE32F-NEXT: bnez a2, .LBB32_14
2411 ; RV64ZVE32F-NEXT: .LBB32_7: # %else8
2412 ; RV64ZVE32F-NEXT: andi a2, a1, 32
2413 ; RV64ZVE32F-NEXT: beqz a2, .LBB32_9
2414 ; RV64ZVE32F-NEXT: .LBB32_8: # %cond.store9
2415 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
2416 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 1
2417 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2418 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2419 ; RV64ZVE32F-NEXT: add a2, a0, a2
2420 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
2421 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5
2422 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
2423 ; RV64ZVE32F-NEXT: .LBB32_9: # %else10
2424 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
2425 ; RV64ZVE32F-NEXT: andi a2, a1, 64
2426 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2
2427 ; RV64ZVE32F-NEXT: bnez a2, .LBB32_15
2428 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
2429 ; RV64ZVE32F-NEXT: andi a1, a1, -128
2430 ; RV64ZVE32F-NEXT: bnez a1, .LBB32_16
2431 ; RV64ZVE32F-NEXT: .LBB32_11: # %else14
2432 ; RV64ZVE32F-NEXT: ret
2433 ; RV64ZVE32F-NEXT: .LBB32_12: # %cond.store3
2434 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2435 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2436 ; RV64ZVE32F-NEXT: add a2, a0, a2
2437 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2438 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2
2439 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
2440 ; RV64ZVE32F-NEXT: andi a2, a1, 8
2441 ; RV64ZVE32F-NEXT: beqz a2, .LBB32_6
2442 ; RV64ZVE32F-NEXT: .LBB32_13: # %cond.store5
2443 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
2444 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
2445 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2446 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2447 ; RV64ZVE32F-NEXT: add a2, a0, a2
2448 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2449 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
2450 ; RV64ZVE32F-NEXT: vse32.v v10, (a2)
2451 ; RV64ZVE32F-NEXT: andi a2, a1, 16
2452 ; RV64ZVE32F-NEXT: beqz a2, .LBB32_7
2453 ; RV64ZVE32F-NEXT: .LBB32_14: # %cond.store7
2454 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
2455 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
2456 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2457 ; RV64ZVE32F-NEXT: add a2, a0, a2
2458 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
2459 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
2460 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
2461 ; RV64ZVE32F-NEXT: andi a2, a1, 32
2462 ; RV64ZVE32F-NEXT: bnez a2, .LBB32_8
2463 ; RV64ZVE32F-NEXT: j .LBB32_9
2464 ; RV64ZVE32F-NEXT: .LBB32_15: # %cond.store11
2465 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2466 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2467 ; RV64ZVE32F-NEXT: add a2, a0, a2
2468 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
2469 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6
2470 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
2471 ; RV64ZVE32F-NEXT: andi a1, a1, -128
2472 ; RV64ZVE32F-NEXT: beqz a1, .LBB32_11
2473 ; RV64ZVE32F-NEXT: .LBB32_16: # %cond.store13
2474 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
2475 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
2476 ; RV64ZVE32F-NEXT: vmv.x.s a1, v10
2477 ; RV64ZVE32F-NEXT: slli a1, a1, 2
2478 ; RV64ZVE32F-NEXT: add a0, a0, a1
2479 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
2480 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
2481 ; RV64ZVE32F-NEXT: vse32.v v8, (a0)
2482 ; RV64ZVE32F-NEXT: ret
2483 %ptrs = getelementptr inbounds i32, ptr %base, <8 x i16> %idxs
2484 call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %val, <8 x ptr> %ptrs, i32 4, <8 x i1> %m)
2488 define void @mscatter_baseidx_sext_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m) {
2489 ; RV32-LABEL: mscatter_baseidx_sext_v8i16_v8i32:
2491 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
2492 ; RV32-NEXT: vsext.vf2 v12, v10
2493 ; RV32-NEXT: vsll.vi v10, v12, 2
2494 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
2497 ; RV64-LABEL: mscatter_baseidx_sext_v8i16_v8i32:
2499 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
2500 ; RV64-NEXT: vsext.vf4 v12, v10
2501 ; RV64-NEXT: vsll.vi v12, v12, 2
2502 ; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
2503 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
2506 ; RV64ZVE32F-LABEL: mscatter_baseidx_sext_v8i16_v8i32:
2507 ; RV64ZVE32F: # %bb.0:
2508 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
2509 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
2510 ; RV64ZVE32F-NEXT: andi a2, a1, 1
2511 ; RV64ZVE32F-NEXT: beqz a2, .LBB33_2
2512 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
2513 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
2514 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2515 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2516 ; RV64ZVE32F-NEXT: add a2, a0, a2
2517 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2518 ; RV64ZVE32F-NEXT: vse32.v v8, (a2)
2519 ; RV64ZVE32F-NEXT: .LBB33_2: # %else
2520 ; RV64ZVE32F-NEXT: andi a2, a1, 2
2521 ; RV64ZVE32F-NEXT: beqz a2, .LBB33_4
2522 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
2523 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
2524 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1
2525 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
2526 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2527 ; RV64ZVE32F-NEXT: add a2, a0, a2
2528 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2529 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 1
2530 ; RV64ZVE32F-NEXT: vse32.v v11, (a2)
2531 ; RV64ZVE32F-NEXT: .LBB33_4: # %else2
2532 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
2533 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4
2534 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
2535 ; RV64ZVE32F-NEXT: andi a2, a1, 4
2536 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
2537 ; RV64ZVE32F-NEXT: bnez a2, .LBB33_12
2538 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
2539 ; RV64ZVE32F-NEXT: andi a2, a1, 8
2540 ; RV64ZVE32F-NEXT: bnez a2, .LBB33_13
2541 ; RV64ZVE32F-NEXT: .LBB33_6: # %else6
2542 ; RV64ZVE32F-NEXT: andi a2, a1, 16
2543 ; RV64ZVE32F-NEXT: bnez a2, .LBB33_14
2544 ; RV64ZVE32F-NEXT: .LBB33_7: # %else8
2545 ; RV64ZVE32F-NEXT: andi a2, a1, 32
2546 ; RV64ZVE32F-NEXT: beqz a2, .LBB33_9
2547 ; RV64ZVE32F-NEXT: .LBB33_8: # %cond.store9
2548 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
2549 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 1
2550 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2551 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2552 ; RV64ZVE32F-NEXT: add a2, a0, a2
2553 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
2554 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5
2555 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
2556 ; RV64ZVE32F-NEXT: .LBB33_9: # %else10
2557 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
2558 ; RV64ZVE32F-NEXT: andi a2, a1, 64
2559 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2
2560 ; RV64ZVE32F-NEXT: bnez a2, .LBB33_15
2561 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
2562 ; RV64ZVE32F-NEXT: andi a1, a1, -128
2563 ; RV64ZVE32F-NEXT: bnez a1, .LBB33_16
2564 ; RV64ZVE32F-NEXT: .LBB33_11: # %else14
2565 ; RV64ZVE32F-NEXT: ret
2566 ; RV64ZVE32F-NEXT: .LBB33_12: # %cond.store3
2567 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2568 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2569 ; RV64ZVE32F-NEXT: add a2, a0, a2
2570 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2571 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2
2572 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
2573 ; RV64ZVE32F-NEXT: andi a2, a1, 8
2574 ; RV64ZVE32F-NEXT: beqz a2, .LBB33_6
2575 ; RV64ZVE32F-NEXT: .LBB33_13: # %cond.store5
2576 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
2577 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
2578 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2579 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2580 ; RV64ZVE32F-NEXT: add a2, a0, a2
2581 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2582 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
2583 ; RV64ZVE32F-NEXT: vse32.v v10, (a2)
2584 ; RV64ZVE32F-NEXT: andi a2, a1, 16
2585 ; RV64ZVE32F-NEXT: beqz a2, .LBB33_7
2586 ; RV64ZVE32F-NEXT: .LBB33_14: # %cond.store7
2587 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
2588 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
2589 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2590 ; RV64ZVE32F-NEXT: add a2, a0, a2
2591 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
2592 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
2593 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
2594 ; RV64ZVE32F-NEXT: andi a2, a1, 32
2595 ; RV64ZVE32F-NEXT: bnez a2, .LBB33_8
2596 ; RV64ZVE32F-NEXT: j .LBB33_9
2597 ; RV64ZVE32F-NEXT: .LBB33_15: # %cond.store11
2598 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2599 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2600 ; RV64ZVE32F-NEXT: add a2, a0, a2
2601 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
2602 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6
2603 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
2604 ; RV64ZVE32F-NEXT: andi a1, a1, -128
2605 ; RV64ZVE32F-NEXT: beqz a1, .LBB33_11
2606 ; RV64ZVE32F-NEXT: .LBB33_16: # %cond.store13
2607 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
2608 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
2609 ; RV64ZVE32F-NEXT: vmv.x.s a1, v10
2610 ; RV64ZVE32F-NEXT: slli a1, a1, 2
2611 ; RV64ZVE32F-NEXT: add a0, a0, a1
2612 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
2613 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
2614 ; RV64ZVE32F-NEXT: vse32.v v8, (a0)
2615 ; RV64ZVE32F-NEXT: ret
2616 %eidxs = sext <8 x i16> %idxs to <8 x i32>
2617 %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %eidxs
2618 call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %val, <8 x ptr> %ptrs, i32 4, <8 x i1> %m)
2622 define void @mscatter_baseidx_zext_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m) {
2623 ; RV32-LABEL: mscatter_baseidx_zext_v8i16_v8i32:
2625 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
2626 ; RV32-NEXT: vzext.vf2 v12, v10
2627 ; RV32-NEXT: vsll.vi v10, v12, 2
2628 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
2631 ; RV64-LABEL: mscatter_baseidx_zext_v8i16_v8i32:
2633 ; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma
2634 ; RV64-NEXT: vzext.vf2 v12, v10
2635 ; RV64-NEXT: vsll.vi v10, v12, 2
2636 ; RV64-NEXT: vsoxei32.v v8, (a0), v10, v0.t
2639 ; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i16_v8i32:
2640 ; RV64ZVE32F: # %bb.0:
2641 ; RV64ZVE32F-NEXT: lui a1, 16
2642 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
2643 ; RV64ZVE32F-NEXT: vmv.x.s a2, v0
2644 ; RV64ZVE32F-NEXT: andi a3, a2, 1
2645 ; RV64ZVE32F-NEXT: addiw a1, a1, -1
2646 ; RV64ZVE32F-NEXT: beqz a3, .LBB34_2
2647 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
2648 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
2649 ; RV64ZVE32F-NEXT: vmv.x.s a3, v10
2650 ; RV64ZVE32F-NEXT: and a3, a3, a1
2651 ; RV64ZVE32F-NEXT: slli a3, a3, 2
2652 ; RV64ZVE32F-NEXT: add a3, a0, a3
2653 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2654 ; RV64ZVE32F-NEXT: vse32.v v8, (a3)
2655 ; RV64ZVE32F-NEXT: .LBB34_2: # %else
2656 ; RV64ZVE32F-NEXT: andi a3, a2, 2
2657 ; RV64ZVE32F-NEXT: beqz a3, .LBB34_4
2658 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
2659 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
2660 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1
2661 ; RV64ZVE32F-NEXT: vmv.x.s a3, v11
2662 ; RV64ZVE32F-NEXT: and a3, a3, a1
2663 ; RV64ZVE32F-NEXT: slli a3, a3, 2
2664 ; RV64ZVE32F-NEXT: add a3, a0, a3
2665 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2666 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 1
2667 ; RV64ZVE32F-NEXT: vse32.v v11, (a3)
2668 ; RV64ZVE32F-NEXT: .LBB34_4: # %else2
2669 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
2670 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4
2671 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
2672 ; RV64ZVE32F-NEXT: andi a3, a2, 4
2673 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
2674 ; RV64ZVE32F-NEXT: bnez a3, .LBB34_12
2675 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
2676 ; RV64ZVE32F-NEXT: andi a3, a2, 8
2677 ; RV64ZVE32F-NEXT: bnez a3, .LBB34_13
2678 ; RV64ZVE32F-NEXT: .LBB34_6: # %else6
2679 ; RV64ZVE32F-NEXT: andi a3, a2, 16
2680 ; RV64ZVE32F-NEXT: bnez a3, .LBB34_14
2681 ; RV64ZVE32F-NEXT: .LBB34_7: # %else8
2682 ; RV64ZVE32F-NEXT: andi a3, a2, 32
2683 ; RV64ZVE32F-NEXT: beqz a3, .LBB34_9
2684 ; RV64ZVE32F-NEXT: .LBB34_8: # %cond.store9
2685 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
2686 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 1
2687 ; RV64ZVE32F-NEXT: vmv.x.s a3, v10
2688 ; RV64ZVE32F-NEXT: and a3, a3, a1
2689 ; RV64ZVE32F-NEXT: slli a3, a3, 2
2690 ; RV64ZVE32F-NEXT: add a3, a0, a3
2691 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
2692 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5
2693 ; RV64ZVE32F-NEXT: vse32.v v12, (a3)
2694 ; RV64ZVE32F-NEXT: .LBB34_9: # %else10
2695 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
2696 ; RV64ZVE32F-NEXT: andi a3, a2, 64
2697 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2
2698 ; RV64ZVE32F-NEXT: bnez a3, .LBB34_15
2699 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
2700 ; RV64ZVE32F-NEXT: andi a2, a2, -128
2701 ; RV64ZVE32F-NEXT: bnez a2, .LBB34_16
2702 ; RV64ZVE32F-NEXT: .LBB34_11: # %else14
2703 ; RV64ZVE32F-NEXT: ret
2704 ; RV64ZVE32F-NEXT: .LBB34_12: # %cond.store3
2705 ; RV64ZVE32F-NEXT: vmv.x.s a3, v10
2706 ; RV64ZVE32F-NEXT: and a3, a3, a1
2707 ; RV64ZVE32F-NEXT: slli a3, a3, 2
2708 ; RV64ZVE32F-NEXT: add a3, a0, a3
2709 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2710 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2
2711 ; RV64ZVE32F-NEXT: vse32.v v12, (a3)
2712 ; RV64ZVE32F-NEXT: andi a3, a2, 8
2713 ; RV64ZVE32F-NEXT: beqz a3, .LBB34_6
2714 ; RV64ZVE32F-NEXT: .LBB34_13: # %cond.store5
2715 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
2716 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
2717 ; RV64ZVE32F-NEXT: vmv.x.s a3, v10
2718 ; RV64ZVE32F-NEXT: and a3, a3, a1
2719 ; RV64ZVE32F-NEXT: slli a3, a3, 2
2720 ; RV64ZVE32F-NEXT: add a3, a0, a3
2721 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2722 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
2723 ; RV64ZVE32F-NEXT: vse32.v v10, (a3)
2724 ; RV64ZVE32F-NEXT: andi a3, a2, 16
2725 ; RV64ZVE32F-NEXT: beqz a3, .LBB34_7
2726 ; RV64ZVE32F-NEXT: .LBB34_14: # %cond.store7
2727 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
2728 ; RV64ZVE32F-NEXT: vmv.x.s a3, v11
2729 ; RV64ZVE32F-NEXT: and a3, a3, a1
2730 ; RV64ZVE32F-NEXT: slli a3, a3, 2
2731 ; RV64ZVE32F-NEXT: add a3, a0, a3
2732 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
2733 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
2734 ; RV64ZVE32F-NEXT: vse32.v v12, (a3)
2735 ; RV64ZVE32F-NEXT: andi a3, a2, 32
2736 ; RV64ZVE32F-NEXT: bnez a3, .LBB34_8
2737 ; RV64ZVE32F-NEXT: j .LBB34_9
2738 ; RV64ZVE32F-NEXT: .LBB34_15: # %cond.store11
2739 ; RV64ZVE32F-NEXT: vmv.x.s a3, v10
2740 ; RV64ZVE32F-NEXT: and a3, a3, a1
2741 ; RV64ZVE32F-NEXT: slli a3, a3, 2
2742 ; RV64ZVE32F-NEXT: add a3, a0, a3
2743 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
2744 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6
2745 ; RV64ZVE32F-NEXT: vse32.v v12, (a3)
2746 ; RV64ZVE32F-NEXT: andi a2, a2, -128
2747 ; RV64ZVE32F-NEXT: beqz a2, .LBB34_11
2748 ; RV64ZVE32F-NEXT: .LBB34_16: # %cond.store13
2749 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
2750 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
2751 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2752 ; RV64ZVE32F-NEXT: and a1, a2, a1
2753 ; RV64ZVE32F-NEXT: slli a1, a1, 2
2754 ; RV64ZVE32F-NEXT: add a0, a0, a1
2755 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
2756 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
2757 ; RV64ZVE32F-NEXT: vse32.v v8, (a0)
2758 ; RV64ZVE32F-NEXT: ret
2759 %eidxs = zext <8 x i16> %idxs to <8 x i32>
2760 %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %eidxs
2761 call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %val, <8 x ptr> %ptrs, i32 4, <8 x i1> %m)
2765 define void @mscatter_baseidx_v8i32(<8 x i32> %val, ptr %base, <8 x i32> %idxs, <8 x i1> %m) {
2766 ; RV32-LABEL: mscatter_baseidx_v8i32:
2768 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
2769 ; RV32-NEXT: vsll.vi v10, v10, 2
2770 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
2773 ; RV64-LABEL: mscatter_baseidx_v8i32:
2775 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
2776 ; RV64-NEXT: vsext.vf2 v12, v10
2777 ; RV64-NEXT: vsll.vi v12, v12, 2
2778 ; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
2779 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
2782 ; RV64ZVE32F-LABEL: mscatter_baseidx_v8i32:
2783 ; RV64ZVE32F: # %bb.0:
2784 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
2785 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
2786 ; RV64ZVE32F-NEXT: andi a2, a1, 1
2787 ; RV64ZVE32F-NEXT: beqz a2, .LBB35_2
2788 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
2789 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2790 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2791 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2792 ; RV64ZVE32F-NEXT: add a2, a0, a2
2793 ; RV64ZVE32F-NEXT: vse32.v v8, (a2)
2794 ; RV64ZVE32F-NEXT: .LBB35_2: # %else
2795 ; RV64ZVE32F-NEXT: andi a2, a1, 2
2796 ; RV64ZVE32F-NEXT: beqz a2, .LBB35_4
2797 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
2798 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2799 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v10, 1
2800 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12
2801 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2802 ; RV64ZVE32F-NEXT: add a2, a0, a2
2803 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 1
2804 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
2805 ; RV64ZVE32F-NEXT: .LBB35_4: # %else2
2806 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
2807 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v10, 4
2808 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
2809 ; RV64ZVE32F-NEXT: andi a2, a1, 4
2810 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
2811 ; RV64ZVE32F-NEXT: bnez a2, .LBB35_12
2812 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
2813 ; RV64ZVE32F-NEXT: andi a2, a1, 8
2814 ; RV64ZVE32F-NEXT: bnez a2, .LBB35_13
2815 ; RV64ZVE32F-NEXT: .LBB35_6: # %else6
2816 ; RV64ZVE32F-NEXT: andi a2, a1, 16
2817 ; RV64ZVE32F-NEXT: bnez a2, .LBB35_14
2818 ; RV64ZVE32F-NEXT: .LBB35_7: # %else8
2819 ; RV64ZVE32F-NEXT: andi a2, a1, 32
2820 ; RV64ZVE32F-NEXT: beqz a2, .LBB35_9
2821 ; RV64ZVE32F-NEXT: .LBB35_8: # %cond.store9
2822 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2823 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v12, 1
2824 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2825 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2826 ; RV64ZVE32F-NEXT: add a2, a0, a2
2827 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
2828 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 5
2829 ; RV64ZVE32F-NEXT: vse32.v v10, (a2)
2830 ; RV64ZVE32F-NEXT: .LBB35_9: # %else10
2831 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
2832 ; RV64ZVE32F-NEXT: andi a2, a1, 64
2833 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v12, 2
2834 ; RV64ZVE32F-NEXT: bnez a2, .LBB35_15
2835 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
2836 ; RV64ZVE32F-NEXT: andi a1, a1, -128
2837 ; RV64ZVE32F-NEXT: bnez a1, .LBB35_16
2838 ; RV64ZVE32F-NEXT: .LBB35_11: # %else14
2839 ; RV64ZVE32F-NEXT: ret
2840 ; RV64ZVE32F-NEXT: .LBB35_12: # %cond.store3
2841 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2842 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2843 ; RV64ZVE32F-NEXT: add a2, a0, a2
2844 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2
2845 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2846 ; RV64ZVE32F-NEXT: vse32.v v11, (a2)
2847 ; RV64ZVE32F-NEXT: andi a2, a1, 8
2848 ; RV64ZVE32F-NEXT: beqz a2, .LBB35_6
2849 ; RV64ZVE32F-NEXT: .LBB35_13: # %cond.store5
2850 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2851 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
2852 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2853 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2854 ; RV64ZVE32F-NEXT: add a2, a0, a2
2855 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
2856 ; RV64ZVE32F-NEXT: vse32.v v10, (a2)
2857 ; RV64ZVE32F-NEXT: andi a2, a1, 16
2858 ; RV64ZVE32F-NEXT: beqz a2, .LBB35_7
2859 ; RV64ZVE32F-NEXT: .LBB35_14: # %cond.store7
2860 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
2861 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12
2862 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2863 ; RV64ZVE32F-NEXT: add a2, a0, a2
2864 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
2865 ; RV64ZVE32F-NEXT: vse32.v v10, (a2)
2866 ; RV64ZVE32F-NEXT: andi a2, a1, 32
2867 ; RV64ZVE32F-NEXT: bnez a2, .LBB35_8
2868 ; RV64ZVE32F-NEXT: j .LBB35_9
2869 ; RV64ZVE32F-NEXT: .LBB35_15: # %cond.store11
2870 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2871 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2872 ; RV64ZVE32F-NEXT: add a2, a0, a2
2873 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
2874 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6
2875 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
2876 ; RV64ZVE32F-NEXT: andi a1, a1, -128
2877 ; RV64ZVE32F-NEXT: beqz a1, .LBB35_11
2878 ; RV64ZVE32F-NEXT: .LBB35_16: # %cond.store13
2879 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2880 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
2881 ; RV64ZVE32F-NEXT: vmv.x.s a1, v10
2882 ; RV64ZVE32F-NEXT: slli a1, a1, 2
2883 ; RV64ZVE32F-NEXT: add a0, a0, a1
2884 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
2885 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
2886 ; RV64ZVE32F-NEXT: vse32.v v8, (a0)
2887 ; RV64ZVE32F-NEXT: ret
2888 %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %idxs
2889 call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %val, <8 x ptr> %ptrs, i32 4, <8 x i1> %m)
2893 declare void @llvm.masked.scatter.v1i64.v1p0(<1 x i64>, <1 x ptr>, i32, <1 x i1>)
2895 define void @mscatter_v1i64(<1 x i64> %val, <1 x ptr> %ptrs, <1 x i1> %m) {
2896 ; RV32V-LABEL: mscatter_v1i64:
2898 ; RV32V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
2899 ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t
2902 ; RV64-LABEL: mscatter_v1i64:
2904 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
2905 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
2908 ; RV32ZVE32F-LABEL: mscatter_v1i64:
2909 ; RV32ZVE32F: # %bb.0:
2910 ; RV32ZVE32F-NEXT: vsetvli a2, zero, e8, mf4, ta, ma
2911 ; RV32ZVE32F-NEXT: vfirst.m a2, v0
2912 ; RV32ZVE32F-NEXT: bnez a2, .LBB36_2
2913 ; RV32ZVE32F-NEXT: # %bb.1: # %cond.store
2914 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2915 ; RV32ZVE32F-NEXT: vmv.x.s a2, v8
2916 ; RV32ZVE32F-NEXT: sw a1, 4(a2)
2917 ; RV32ZVE32F-NEXT: sw a0, 0(a2)
2918 ; RV32ZVE32F-NEXT: .LBB36_2: # %else
2919 ; RV32ZVE32F-NEXT: ret
2921 ; RV64ZVE32F-LABEL: mscatter_v1i64:
2922 ; RV64ZVE32F: # %bb.0:
2923 ; RV64ZVE32F-NEXT: vsetvli a2, zero, e8, mf4, ta, ma
2924 ; RV64ZVE32F-NEXT: vfirst.m a2, v0
2925 ; RV64ZVE32F-NEXT: bnez a2, .LBB36_2
2926 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
2927 ; RV64ZVE32F-NEXT: sd a0, 0(a1)
2928 ; RV64ZVE32F-NEXT: .LBB36_2: # %else
2929 ; RV64ZVE32F-NEXT: ret
2930 call void @llvm.masked.scatter.v1i64.v1p0(<1 x i64> %val, <1 x ptr> %ptrs, i32 8, <1 x i1> %m)
2934 declare void @llvm.masked.scatter.v2i64.v2p0(<2 x i64>, <2 x ptr>, i32, <2 x i1>)
2936 define void @mscatter_v2i64(<2 x i64> %val, <2 x ptr> %ptrs, <2 x i1> %m) {
2937 ; RV32V-LABEL: mscatter_v2i64:
2939 ; RV32V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
2940 ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t
2943 ; RV64-LABEL: mscatter_v2i64:
2945 ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
2946 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
2949 ; RV32ZVE32F-LABEL: mscatter_v2i64:
2950 ; RV32ZVE32F: # %bb.0:
2951 ; RV32ZVE32F-NEXT: lw a2, 12(a0)
2952 ; RV32ZVE32F-NEXT: lw a1, 8(a0)
2953 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
2954 ; RV32ZVE32F-NEXT: vmv.x.s a3, v0
2955 ; RV32ZVE32F-NEXT: andi a4, a3, 1
2956 ; RV32ZVE32F-NEXT: bnez a4, .LBB37_3
2957 ; RV32ZVE32F-NEXT: # %bb.1: # %else
2958 ; RV32ZVE32F-NEXT: andi a3, a3, 2
2959 ; RV32ZVE32F-NEXT: bnez a3, .LBB37_4
2960 ; RV32ZVE32F-NEXT: .LBB37_2: # %else2
2961 ; RV32ZVE32F-NEXT: ret
2962 ; RV32ZVE32F-NEXT: .LBB37_3: # %cond.store
2963 ; RV32ZVE32F-NEXT: lw a4, 4(a0)
2964 ; RV32ZVE32F-NEXT: lw a0, 0(a0)
2965 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2966 ; RV32ZVE32F-NEXT: vmv.x.s a5, v8
2967 ; RV32ZVE32F-NEXT: sw a4, 4(a5)
2968 ; RV32ZVE32F-NEXT: sw a0, 0(a5)
2969 ; RV32ZVE32F-NEXT: andi a3, a3, 2
2970 ; RV32ZVE32F-NEXT: beqz a3, .LBB37_2
2971 ; RV32ZVE32F-NEXT: .LBB37_4: # %cond.store1
2972 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2973 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 1
2974 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
2975 ; RV32ZVE32F-NEXT: sw a2, 4(a0)
2976 ; RV32ZVE32F-NEXT: sw a1, 0(a0)
2977 ; RV32ZVE32F-NEXT: ret
2979 ; RV64ZVE32F-LABEL: mscatter_v2i64:
2980 ; RV64ZVE32F: # %bb.0:
2981 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
2982 ; RV64ZVE32F-NEXT: vmv.x.s a4, v0
2983 ; RV64ZVE32F-NEXT: andi a5, a4, 1
2984 ; RV64ZVE32F-NEXT: bnez a5, .LBB37_3
2985 ; RV64ZVE32F-NEXT: # %bb.1: # %else
2986 ; RV64ZVE32F-NEXT: andi a4, a4, 2
2987 ; RV64ZVE32F-NEXT: bnez a4, .LBB37_4
2988 ; RV64ZVE32F-NEXT: .LBB37_2: # %else2
2989 ; RV64ZVE32F-NEXT: ret
2990 ; RV64ZVE32F-NEXT: .LBB37_3: # %cond.store
2991 ; RV64ZVE32F-NEXT: sd a0, 0(a2)
2992 ; RV64ZVE32F-NEXT: andi a4, a4, 2
2993 ; RV64ZVE32F-NEXT: beqz a4, .LBB37_2
2994 ; RV64ZVE32F-NEXT: .LBB37_4: # %cond.store1
2995 ; RV64ZVE32F-NEXT: sd a1, 0(a3)
2996 ; RV64ZVE32F-NEXT: ret
2997 call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> %val, <2 x ptr> %ptrs, i32 8, <2 x i1> %m)
3001 declare void @llvm.masked.scatter.v4i64.v4p0(<4 x i64>, <4 x ptr>, i32, <4 x i1>)
3003 define void @mscatter_v4i64(<4 x i64> %val, <4 x ptr> %ptrs, <4 x i1> %m) {
3004 ; RV32V-LABEL: mscatter_v4i64:
3006 ; RV32V-NEXT: vsetivli zero, 4, e64, m2, ta, ma
3007 ; RV32V-NEXT: vsoxei32.v v8, (zero), v10, v0.t
3010 ; RV64-LABEL: mscatter_v4i64:
3012 ; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
3013 ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
3016 ; RV32ZVE32F-LABEL: mscatter_v4i64:
3017 ; RV32ZVE32F: # %bb.0:
3018 ; RV32ZVE32F-NEXT: lw a1, 28(a0)
3019 ; RV32ZVE32F-NEXT: lw a2, 24(a0)
3020 ; RV32ZVE32F-NEXT: lw a3, 20(a0)
3021 ; RV32ZVE32F-NEXT: lw a4, 16(a0)
3022 ; RV32ZVE32F-NEXT: lw a7, 12(a0)
3023 ; RV32ZVE32F-NEXT: lw a6, 8(a0)
3024 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
3025 ; RV32ZVE32F-NEXT: vmv.x.s a5, v0
3026 ; RV32ZVE32F-NEXT: andi t0, a5, 1
3027 ; RV32ZVE32F-NEXT: bnez t0, .LBB38_5
3028 ; RV32ZVE32F-NEXT: # %bb.1: # %else
3029 ; RV32ZVE32F-NEXT: andi a0, a5, 2
3030 ; RV32ZVE32F-NEXT: bnez a0, .LBB38_6
3031 ; RV32ZVE32F-NEXT: .LBB38_2: # %else2
3032 ; RV32ZVE32F-NEXT: andi a0, a5, 4
3033 ; RV32ZVE32F-NEXT: bnez a0, .LBB38_7
3034 ; RV32ZVE32F-NEXT: .LBB38_3: # %else4
3035 ; RV32ZVE32F-NEXT: andi a5, a5, 8
3036 ; RV32ZVE32F-NEXT: bnez a5, .LBB38_8
3037 ; RV32ZVE32F-NEXT: .LBB38_4: # %else6
3038 ; RV32ZVE32F-NEXT: ret
3039 ; RV32ZVE32F-NEXT: .LBB38_5: # %cond.store
3040 ; RV32ZVE32F-NEXT: lw t0, 4(a0)
3041 ; RV32ZVE32F-NEXT: lw a0, 0(a0)
3042 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
3043 ; RV32ZVE32F-NEXT: vmv.x.s t1, v8
3044 ; RV32ZVE32F-NEXT: sw t0, 4(t1)
3045 ; RV32ZVE32F-NEXT: sw a0, 0(t1)
3046 ; RV32ZVE32F-NEXT: andi a0, a5, 2
3047 ; RV32ZVE32F-NEXT: beqz a0, .LBB38_2
3048 ; RV32ZVE32F-NEXT: .LBB38_6: # %cond.store1
3049 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
3050 ; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 1
3051 ; RV32ZVE32F-NEXT: vmv.x.s a0, v9
3052 ; RV32ZVE32F-NEXT: sw a7, 4(a0)
3053 ; RV32ZVE32F-NEXT: sw a6, 0(a0)
3054 ; RV32ZVE32F-NEXT: andi a0, a5, 4
3055 ; RV32ZVE32F-NEXT: beqz a0, .LBB38_3
3056 ; RV32ZVE32F-NEXT: .LBB38_7: # %cond.store3
3057 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
3058 ; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 2
3059 ; RV32ZVE32F-NEXT: vmv.x.s a0, v9
3060 ; RV32ZVE32F-NEXT: sw a4, 0(a0)
3061 ; RV32ZVE32F-NEXT: sw a3, 4(a0)
3062 ; RV32ZVE32F-NEXT: andi a5, a5, 8
3063 ; RV32ZVE32F-NEXT: beqz a5, .LBB38_4
3064 ; RV32ZVE32F-NEXT: .LBB38_8: # %cond.store5
3065 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
3066 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 3
3067 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
3068 ; RV32ZVE32F-NEXT: sw a2, 0(a0)
3069 ; RV32ZVE32F-NEXT: sw a1, 4(a0)
3070 ; RV32ZVE32F-NEXT: ret
3072 ; RV64ZVE32F-LABEL: mscatter_v4i64:
3073 ; RV64ZVE32F: # %bb.0:
3074 ; RV64ZVE32F-NEXT: ld a2, 24(a1)
3075 ; RV64ZVE32F-NEXT: ld a4, 16(a1)
3076 ; RV64ZVE32F-NEXT: ld a7, 8(a1)
3077 ; RV64ZVE32F-NEXT: ld a3, 24(a0)
3078 ; RV64ZVE32F-NEXT: ld a5, 16(a0)
3079 ; RV64ZVE32F-NEXT: ld t0, 8(a0)
3080 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
3081 ; RV64ZVE32F-NEXT: vmv.x.s a6, v0
3082 ; RV64ZVE32F-NEXT: andi t1, a6, 1
3083 ; RV64ZVE32F-NEXT: bnez t1, .LBB38_5
3084 ; RV64ZVE32F-NEXT: # %bb.1: # %else
3085 ; RV64ZVE32F-NEXT: andi a0, a6, 2
3086 ; RV64ZVE32F-NEXT: bnez a0, .LBB38_6
3087 ; RV64ZVE32F-NEXT: .LBB38_2: # %else2
3088 ; RV64ZVE32F-NEXT: andi a0, a6, 4
3089 ; RV64ZVE32F-NEXT: bnez a0, .LBB38_7
3090 ; RV64ZVE32F-NEXT: .LBB38_3: # %else4
3091 ; RV64ZVE32F-NEXT: andi a0, a6, 8
3092 ; RV64ZVE32F-NEXT: bnez a0, .LBB38_8
3093 ; RV64ZVE32F-NEXT: .LBB38_4: # %else6
3094 ; RV64ZVE32F-NEXT: ret
3095 ; RV64ZVE32F-NEXT: .LBB38_5: # %cond.store
3096 ; RV64ZVE32F-NEXT: ld a1, 0(a1)
3097 ; RV64ZVE32F-NEXT: ld a0, 0(a0)
3098 ; RV64ZVE32F-NEXT: sd a0, 0(a1)
3099 ; RV64ZVE32F-NEXT: andi a0, a6, 2
3100 ; RV64ZVE32F-NEXT: beqz a0, .LBB38_2
3101 ; RV64ZVE32F-NEXT: .LBB38_6: # %cond.store1
3102 ; RV64ZVE32F-NEXT: sd t0, 0(a7)
3103 ; RV64ZVE32F-NEXT: andi a0, a6, 4
3104 ; RV64ZVE32F-NEXT: beqz a0, .LBB38_3
3105 ; RV64ZVE32F-NEXT: .LBB38_7: # %cond.store3
3106 ; RV64ZVE32F-NEXT: sd a5, 0(a4)
3107 ; RV64ZVE32F-NEXT: andi a0, a6, 8
3108 ; RV64ZVE32F-NEXT: beqz a0, .LBB38_4
3109 ; RV64ZVE32F-NEXT: .LBB38_8: # %cond.store5
3110 ; RV64ZVE32F-NEXT: sd a3, 0(a2)
3111 ; RV64ZVE32F-NEXT: ret
3112 call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> %val, <4 x ptr> %ptrs, i32 8, <4 x i1> %m)
3116 define void @mscatter_truemask_v4i64(<4 x i64> %val, <4 x ptr> %ptrs) {
3117 ; RV32V-LABEL: mscatter_truemask_v4i64:
3119 ; RV32V-NEXT: vsetivli zero, 4, e64, m2, ta, ma
3120 ; RV32V-NEXT: vsoxei32.v v8, (zero), v10
3123 ; RV64-LABEL: mscatter_truemask_v4i64:
3125 ; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
3126 ; RV64-NEXT: vsoxei64.v v8, (zero), v10
3129 ; RV32ZVE32F-LABEL: mscatter_truemask_v4i64:
3130 ; RV32ZVE32F: # %bb.0:
3131 ; RV32ZVE32F-NEXT: lw a1, 28(a0)
3132 ; RV32ZVE32F-NEXT: lw a2, 24(a0)
3133 ; RV32ZVE32F-NEXT: lw a3, 20(a0)
3134 ; RV32ZVE32F-NEXT: lw a4, 16(a0)
3135 ; RV32ZVE32F-NEXT: lw a7, 12(a0)
3136 ; RV32ZVE32F-NEXT: lw a6, 8(a0)
3137 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
3138 ; RV32ZVE32F-NEXT: vmset.m v9
3139 ; RV32ZVE32F-NEXT: vmv.x.s a5, v9
3140 ; RV32ZVE32F-NEXT: beqz zero, .LBB39_5
3141 ; RV32ZVE32F-NEXT: # %bb.1: # %else
3142 ; RV32ZVE32F-NEXT: andi a0, a5, 2
3143 ; RV32ZVE32F-NEXT: bnez a0, .LBB39_6
3144 ; RV32ZVE32F-NEXT: .LBB39_2: # %else2
3145 ; RV32ZVE32F-NEXT: andi a0, a5, 4
3146 ; RV32ZVE32F-NEXT: bnez a0, .LBB39_7
3147 ; RV32ZVE32F-NEXT: .LBB39_3: # %else4
3148 ; RV32ZVE32F-NEXT: andi a5, a5, 8
3149 ; RV32ZVE32F-NEXT: bnez a5, .LBB39_8
3150 ; RV32ZVE32F-NEXT: .LBB39_4: # %else6
3151 ; RV32ZVE32F-NEXT: ret
3152 ; RV32ZVE32F-NEXT: .LBB39_5: # %cond.store
3153 ; RV32ZVE32F-NEXT: lw t0, 4(a0)
3154 ; RV32ZVE32F-NEXT: lw a0, 0(a0)
3155 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
3156 ; RV32ZVE32F-NEXT: vmv.x.s t1, v8
3157 ; RV32ZVE32F-NEXT: sw t0, 4(t1)
3158 ; RV32ZVE32F-NEXT: sw a0, 0(t1)
3159 ; RV32ZVE32F-NEXT: andi a0, a5, 2
3160 ; RV32ZVE32F-NEXT: beqz a0, .LBB39_2
3161 ; RV32ZVE32F-NEXT: .LBB39_6: # %cond.store1
3162 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
3163 ; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 1
3164 ; RV32ZVE32F-NEXT: vmv.x.s a0, v9
3165 ; RV32ZVE32F-NEXT: sw a7, 4(a0)
3166 ; RV32ZVE32F-NEXT: sw a6, 0(a0)
3167 ; RV32ZVE32F-NEXT: andi a0, a5, 4
3168 ; RV32ZVE32F-NEXT: beqz a0, .LBB39_3
3169 ; RV32ZVE32F-NEXT: .LBB39_7: # %cond.store3
3170 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
3171 ; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 2
3172 ; RV32ZVE32F-NEXT: vmv.x.s a0, v9
3173 ; RV32ZVE32F-NEXT: sw a4, 0(a0)
3174 ; RV32ZVE32F-NEXT: sw a3, 4(a0)
3175 ; RV32ZVE32F-NEXT: andi a5, a5, 8
3176 ; RV32ZVE32F-NEXT: beqz a5, .LBB39_4
3177 ; RV32ZVE32F-NEXT: .LBB39_8: # %cond.store5
3178 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
3179 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 3
3180 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
3181 ; RV32ZVE32F-NEXT: sw a2, 0(a0)
3182 ; RV32ZVE32F-NEXT: sw a1, 4(a0)
3183 ; RV32ZVE32F-NEXT: ret
3185 ; RV64ZVE32F-LABEL: mscatter_truemask_v4i64:
3186 ; RV64ZVE32F: # %bb.0:
3187 ; RV64ZVE32F-NEXT: ld a2, 24(a1)
3188 ; RV64ZVE32F-NEXT: ld a4, 16(a1)
3189 ; RV64ZVE32F-NEXT: ld a7, 8(a1)
3190 ; RV64ZVE32F-NEXT: ld a3, 24(a0)
3191 ; RV64ZVE32F-NEXT: ld a5, 16(a0)
3192 ; RV64ZVE32F-NEXT: ld t0, 8(a0)
3193 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
3194 ; RV64ZVE32F-NEXT: vmset.m v8
3195 ; RV64ZVE32F-NEXT: vmv.x.s a6, v8
3196 ; RV64ZVE32F-NEXT: beqz zero, .LBB39_5
3197 ; RV64ZVE32F-NEXT: # %bb.1: # %else
3198 ; RV64ZVE32F-NEXT: andi a0, a6, 2
3199 ; RV64ZVE32F-NEXT: bnez a0, .LBB39_6
3200 ; RV64ZVE32F-NEXT: .LBB39_2: # %else2
3201 ; RV64ZVE32F-NEXT: andi a0, a6, 4
3202 ; RV64ZVE32F-NEXT: bnez a0, .LBB39_7
3203 ; RV64ZVE32F-NEXT: .LBB39_3: # %else4
3204 ; RV64ZVE32F-NEXT: andi a0, a6, 8
3205 ; RV64ZVE32F-NEXT: bnez a0, .LBB39_8
3206 ; RV64ZVE32F-NEXT: .LBB39_4: # %else6
3207 ; RV64ZVE32F-NEXT: ret
3208 ; RV64ZVE32F-NEXT: .LBB39_5: # %cond.store
3209 ; RV64ZVE32F-NEXT: ld a1, 0(a1)
3210 ; RV64ZVE32F-NEXT: ld a0, 0(a0)
3211 ; RV64ZVE32F-NEXT: sd a0, 0(a1)
3212 ; RV64ZVE32F-NEXT: andi a0, a6, 2
3213 ; RV64ZVE32F-NEXT: beqz a0, .LBB39_2
3214 ; RV64ZVE32F-NEXT: .LBB39_6: # %cond.store1
3215 ; RV64ZVE32F-NEXT: sd t0, 0(a7)
3216 ; RV64ZVE32F-NEXT: andi a0, a6, 4
3217 ; RV64ZVE32F-NEXT: beqz a0, .LBB39_3
3218 ; RV64ZVE32F-NEXT: .LBB39_7: # %cond.store3
3219 ; RV64ZVE32F-NEXT: sd a5, 0(a4)
3220 ; RV64ZVE32F-NEXT: andi a0, a6, 8
3221 ; RV64ZVE32F-NEXT: beqz a0, .LBB39_4
3222 ; RV64ZVE32F-NEXT: .LBB39_8: # %cond.store5
3223 ; RV64ZVE32F-NEXT: sd a3, 0(a2)
3224 ; RV64ZVE32F-NEXT: ret
3225 %mhead = insertelement <4 x i1> poison, i1 1, i32 0
3226 %mtrue = shufflevector <4 x i1> %mhead, <4 x i1> poison, <4 x i32> zeroinitializer
3227 call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> %val, <4 x ptr> %ptrs, i32 8, <4 x i1> %mtrue)
3231 define void @mscatter_falsemask_v4i64(<4 x i64> %val, <4 x ptr> %ptrs) {
3232 ; CHECK-LABEL: mscatter_falsemask_v4i64:
3235 call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> %val, <4 x ptr> %ptrs, i32 8, <4 x i1> zeroinitializer)
3239 declare void @llvm.masked.scatter.v8i64.v8p0(<8 x i64>, <8 x ptr>, i32, <8 x i1>)
3241 define void @mscatter_v8i64(<8 x i64> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
3242 ; RV32V-LABEL: mscatter_v8i64:
3244 ; RV32V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
3245 ; RV32V-NEXT: vsoxei32.v v8, (zero), v12, v0.t
3248 ; RV64-LABEL: mscatter_v8i64:
3250 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
3251 ; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t
3254 ; RV32ZVE32F-LABEL: mscatter_v8i64:
3255 ; RV32ZVE32F: # %bb.0:
3256 ; RV32ZVE32F-NEXT: addi sp, sp, -16
3257 ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
3258 ; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
3259 ; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
3260 ; RV32ZVE32F-NEXT: sw s2, 4(sp) # 4-byte Folded Spill
3261 ; RV32ZVE32F-NEXT: .cfi_offset s0, -4
3262 ; RV32ZVE32F-NEXT: .cfi_offset s1, -8
3263 ; RV32ZVE32F-NEXT: .cfi_offset s2, -12
3264 ; RV32ZVE32F-NEXT: lw a1, 60(a0)
3265 ; RV32ZVE32F-NEXT: lw a2, 56(a0)
3266 ; RV32ZVE32F-NEXT: lw a3, 52(a0)
3267 ; RV32ZVE32F-NEXT: lw a4, 48(a0)
3268 ; RV32ZVE32F-NEXT: lw a5, 44(a0)
3269 ; RV32ZVE32F-NEXT: lw a7, 40(a0)
3270 ; RV32ZVE32F-NEXT: lw t0, 36(a0)
3271 ; RV32ZVE32F-NEXT: lw t1, 32(a0)
3272 ; RV32ZVE32F-NEXT: lw t2, 28(a0)
3273 ; RV32ZVE32F-NEXT: lw t3, 24(a0)
3274 ; RV32ZVE32F-NEXT: lw t4, 20(a0)
3275 ; RV32ZVE32F-NEXT: lw t5, 16(a0)
3276 ; RV32ZVE32F-NEXT: lw s0, 12(a0)
3277 ; RV32ZVE32F-NEXT: lw t6, 8(a0)
3278 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
3279 ; RV32ZVE32F-NEXT: vmv.x.s a6, v0
3280 ; RV32ZVE32F-NEXT: andi s1, a6, 1
3281 ; RV32ZVE32F-NEXT: bnez s1, .LBB41_10
3282 ; RV32ZVE32F-NEXT: # %bb.1: # %else
3283 ; RV32ZVE32F-NEXT: andi a0, a6, 2
3284 ; RV32ZVE32F-NEXT: bnez a0, .LBB41_11
3285 ; RV32ZVE32F-NEXT: .LBB41_2: # %else2
3286 ; RV32ZVE32F-NEXT: andi a0, a6, 4
3287 ; RV32ZVE32F-NEXT: bnez a0, .LBB41_12
3288 ; RV32ZVE32F-NEXT: .LBB41_3: # %else4
3289 ; RV32ZVE32F-NEXT: andi a0, a6, 8
3290 ; RV32ZVE32F-NEXT: bnez a0, .LBB41_13
3291 ; RV32ZVE32F-NEXT: .LBB41_4: # %else6
3292 ; RV32ZVE32F-NEXT: andi a0, a6, 16
3293 ; RV32ZVE32F-NEXT: bnez a0, .LBB41_14
3294 ; RV32ZVE32F-NEXT: .LBB41_5: # %else8
3295 ; RV32ZVE32F-NEXT: andi a0, a6, 32
3296 ; RV32ZVE32F-NEXT: bnez a0, .LBB41_15
3297 ; RV32ZVE32F-NEXT: .LBB41_6: # %else10
3298 ; RV32ZVE32F-NEXT: andi a0, a6, 64
3299 ; RV32ZVE32F-NEXT: bnez a0, .LBB41_16
3300 ; RV32ZVE32F-NEXT: .LBB41_7: # %else12
3301 ; RV32ZVE32F-NEXT: andi a0, a6, -128
3302 ; RV32ZVE32F-NEXT: beqz a0, .LBB41_9
3303 ; RV32ZVE32F-NEXT: .LBB41_8: # %cond.store13
3304 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
3305 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
3306 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
3307 ; RV32ZVE32F-NEXT: sw a2, 0(a0)
3308 ; RV32ZVE32F-NEXT: sw a1, 4(a0)
3309 ; RV32ZVE32F-NEXT: .LBB41_9: # %else14
3310 ; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
3311 ; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
3312 ; RV32ZVE32F-NEXT: lw s2, 4(sp) # 4-byte Folded Reload
3313 ; RV32ZVE32F-NEXT: addi sp, sp, 16
3314 ; RV32ZVE32F-NEXT: ret
3315 ; RV32ZVE32F-NEXT: .LBB41_10: # %cond.store
3316 ; RV32ZVE32F-NEXT: lw s1, 4(a0)
3317 ; RV32ZVE32F-NEXT: lw a0, 0(a0)
3318 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
3319 ; RV32ZVE32F-NEXT: vmv.x.s s2, v8
3320 ; RV32ZVE32F-NEXT: sw s1, 4(s2)
3321 ; RV32ZVE32F-NEXT: sw a0, 0(s2)
3322 ; RV32ZVE32F-NEXT: andi a0, a6, 2
3323 ; RV32ZVE32F-NEXT: beqz a0, .LBB41_2
3324 ; RV32ZVE32F-NEXT: .LBB41_11: # %cond.store1
3325 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
3326 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
3327 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
3328 ; RV32ZVE32F-NEXT: sw s0, 4(a0)
3329 ; RV32ZVE32F-NEXT: sw t6, 0(a0)
3330 ; RV32ZVE32F-NEXT: andi a0, a6, 4
3331 ; RV32ZVE32F-NEXT: beqz a0, .LBB41_3
3332 ; RV32ZVE32F-NEXT: .LBB41_12: # %cond.store3
3333 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
3334 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
3335 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
3336 ; RV32ZVE32F-NEXT: sw t5, 0(a0)
3337 ; RV32ZVE32F-NEXT: sw t4, 4(a0)
3338 ; RV32ZVE32F-NEXT: andi a0, a6, 8
3339 ; RV32ZVE32F-NEXT: beqz a0, .LBB41_4
3340 ; RV32ZVE32F-NEXT: .LBB41_13: # %cond.store5
3341 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
3342 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
3343 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
3344 ; RV32ZVE32F-NEXT: sw t3, 0(a0)
3345 ; RV32ZVE32F-NEXT: sw t2, 4(a0)
3346 ; RV32ZVE32F-NEXT: andi a0, a6, 16
3347 ; RV32ZVE32F-NEXT: beqz a0, .LBB41_5
3348 ; RV32ZVE32F-NEXT: .LBB41_14: # %cond.store7
3349 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
3350 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
3351 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
3352 ; RV32ZVE32F-NEXT: sw t1, 0(a0)
3353 ; RV32ZVE32F-NEXT: sw t0, 4(a0)
3354 ; RV32ZVE32F-NEXT: andi a0, a6, 32
3355 ; RV32ZVE32F-NEXT: beqz a0, .LBB41_6
3356 ; RV32ZVE32F-NEXT: .LBB41_15: # %cond.store9
3357 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
3358 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
3359 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
3360 ; RV32ZVE32F-NEXT: sw a7, 0(a0)
3361 ; RV32ZVE32F-NEXT: sw a5, 4(a0)
3362 ; RV32ZVE32F-NEXT: andi a0, a6, 64
3363 ; RV32ZVE32F-NEXT: beqz a0, .LBB41_7
3364 ; RV32ZVE32F-NEXT: .LBB41_16: # %cond.store11
3365 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
3366 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
3367 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
3368 ; RV32ZVE32F-NEXT: sw a4, 0(a0)
3369 ; RV32ZVE32F-NEXT: sw a3, 4(a0)
3370 ; RV32ZVE32F-NEXT: andi a0, a6, -128
3371 ; RV32ZVE32F-NEXT: bnez a0, .LBB41_8
3372 ; RV32ZVE32F-NEXT: j .LBB41_9
3374 ; RV64ZVE32F-LABEL: mscatter_v8i64:
3375 ; RV64ZVE32F: # %bb.0:
3376 ; RV64ZVE32F-NEXT: addi sp, sp, -32
3377 ; RV64ZVE32F-NEXT: .cfi_def_cfa_offset 32
3378 ; RV64ZVE32F-NEXT: sd s0, 24(sp) # 8-byte Folded Spill
3379 ; RV64ZVE32F-NEXT: sd s1, 16(sp) # 8-byte Folded Spill
3380 ; RV64ZVE32F-NEXT: sd s2, 8(sp) # 8-byte Folded Spill
3381 ; RV64ZVE32F-NEXT: .cfi_offset s0, -8
3382 ; RV64ZVE32F-NEXT: .cfi_offset s1, -16
3383 ; RV64ZVE32F-NEXT: .cfi_offset s2, -24
3384 ; RV64ZVE32F-NEXT: ld a2, 56(a1)
3385 ; RV64ZVE32F-NEXT: ld a4, 48(a1)
3386 ; RV64ZVE32F-NEXT: ld a6, 40(a1)
3387 ; RV64ZVE32F-NEXT: ld t1, 32(a1)
3388 ; RV64ZVE32F-NEXT: ld t3, 24(a1)
3389 ; RV64ZVE32F-NEXT: ld t5, 16(a1)
3390 ; RV64ZVE32F-NEXT: ld s0, 8(a1)
3391 ; RV64ZVE32F-NEXT: ld a3, 56(a0)
3392 ; RV64ZVE32F-NEXT: ld a5, 48(a0)
3393 ; RV64ZVE32F-NEXT: ld t0, 40(a0)
3394 ; RV64ZVE32F-NEXT: ld t2, 32(a0)
3395 ; RV64ZVE32F-NEXT: ld t4, 24(a0)
3396 ; RV64ZVE32F-NEXT: ld t6, 16(a0)
3397 ; RV64ZVE32F-NEXT: ld s1, 8(a0)
3398 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
3399 ; RV64ZVE32F-NEXT: vmv.x.s a7, v0
3400 ; RV64ZVE32F-NEXT: andi s2, a7, 1
3401 ; RV64ZVE32F-NEXT: bnez s2, .LBB41_10
3402 ; RV64ZVE32F-NEXT: # %bb.1: # %else
3403 ; RV64ZVE32F-NEXT: andi a0, a7, 2
3404 ; RV64ZVE32F-NEXT: bnez a0, .LBB41_11
3405 ; RV64ZVE32F-NEXT: .LBB41_2: # %else2
3406 ; RV64ZVE32F-NEXT: andi a0, a7, 4
3407 ; RV64ZVE32F-NEXT: bnez a0, .LBB41_12
3408 ; RV64ZVE32F-NEXT: .LBB41_3: # %else4
3409 ; RV64ZVE32F-NEXT: andi a0, a7, 8
3410 ; RV64ZVE32F-NEXT: bnez a0, .LBB41_13
3411 ; RV64ZVE32F-NEXT: .LBB41_4: # %else6
3412 ; RV64ZVE32F-NEXT: andi a0, a7, 16
3413 ; RV64ZVE32F-NEXT: bnez a0, .LBB41_14
3414 ; RV64ZVE32F-NEXT: .LBB41_5: # %else8
3415 ; RV64ZVE32F-NEXT: andi a0, a7, 32
3416 ; RV64ZVE32F-NEXT: bnez a0, .LBB41_15
3417 ; RV64ZVE32F-NEXT: .LBB41_6: # %else10
3418 ; RV64ZVE32F-NEXT: andi a0, a7, 64
3419 ; RV64ZVE32F-NEXT: bnez a0, .LBB41_16
3420 ; RV64ZVE32F-NEXT: .LBB41_7: # %else12
3421 ; RV64ZVE32F-NEXT: andi a0, a7, -128
3422 ; RV64ZVE32F-NEXT: beqz a0, .LBB41_9
3423 ; RV64ZVE32F-NEXT: .LBB41_8: # %cond.store13
3424 ; RV64ZVE32F-NEXT: sd a3, 0(a2)
3425 ; RV64ZVE32F-NEXT: .LBB41_9: # %else14
3426 ; RV64ZVE32F-NEXT: ld s0, 24(sp) # 8-byte Folded Reload
3427 ; RV64ZVE32F-NEXT: ld s1, 16(sp) # 8-byte Folded Reload
3428 ; RV64ZVE32F-NEXT: ld s2, 8(sp) # 8-byte Folded Reload
3429 ; RV64ZVE32F-NEXT: addi sp, sp, 32
3430 ; RV64ZVE32F-NEXT: ret
3431 ; RV64ZVE32F-NEXT: .LBB41_10: # %cond.store
3432 ; RV64ZVE32F-NEXT: ld a1, 0(a1)
3433 ; RV64ZVE32F-NEXT: ld a0, 0(a0)
3434 ; RV64ZVE32F-NEXT: sd a0, 0(a1)
3435 ; RV64ZVE32F-NEXT: andi a0, a7, 2
3436 ; RV64ZVE32F-NEXT: beqz a0, .LBB41_2
3437 ; RV64ZVE32F-NEXT: .LBB41_11: # %cond.store1
3438 ; RV64ZVE32F-NEXT: sd s1, 0(s0)
3439 ; RV64ZVE32F-NEXT: andi a0, a7, 4
3440 ; RV64ZVE32F-NEXT: beqz a0, .LBB41_3
3441 ; RV64ZVE32F-NEXT: .LBB41_12: # %cond.store3
3442 ; RV64ZVE32F-NEXT: sd t6, 0(t5)
3443 ; RV64ZVE32F-NEXT: andi a0, a7, 8
3444 ; RV64ZVE32F-NEXT: beqz a0, .LBB41_4
3445 ; RV64ZVE32F-NEXT: .LBB41_13: # %cond.store5
3446 ; RV64ZVE32F-NEXT: sd t4, 0(t3)
3447 ; RV64ZVE32F-NEXT: andi a0, a7, 16
3448 ; RV64ZVE32F-NEXT: beqz a0, .LBB41_5
3449 ; RV64ZVE32F-NEXT: .LBB41_14: # %cond.store7
3450 ; RV64ZVE32F-NEXT: sd t2, 0(t1)
3451 ; RV64ZVE32F-NEXT: andi a0, a7, 32
3452 ; RV64ZVE32F-NEXT: beqz a0, .LBB41_6
3453 ; RV64ZVE32F-NEXT: .LBB41_15: # %cond.store9
3454 ; RV64ZVE32F-NEXT: sd t0, 0(a6)
3455 ; RV64ZVE32F-NEXT: andi a0, a7, 64
3456 ; RV64ZVE32F-NEXT: beqz a0, .LBB41_7
3457 ; RV64ZVE32F-NEXT: .LBB41_16: # %cond.store11
3458 ; RV64ZVE32F-NEXT: sd a5, 0(a4)
3459 ; RV64ZVE32F-NEXT: andi a0, a7, -128
3460 ; RV64ZVE32F-NEXT: bnez a0, .LBB41_8
3461 ; RV64ZVE32F-NEXT: j .LBB41_9
3462 call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m)
3466 define void @mscatter_baseidx_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) {
3467 ; RV32V-LABEL: mscatter_baseidx_v8i8_v8i64:
3469 ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
3470 ; RV32V-NEXT: vsext.vf4 v14, v12
3471 ; RV32V-NEXT: vsll.vi v12, v14, 3
3472 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
3473 ; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t
3476 ; RV64-LABEL: mscatter_baseidx_v8i8_v8i64:
3478 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
3479 ; RV64-NEXT: vsext.vf8 v16, v12
3480 ; RV64-NEXT: vsll.vi v12, v16, 3
3481 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
3484 ; RV32ZVE32F-LABEL: mscatter_baseidx_v8i8_v8i64:
3485 ; RV32ZVE32F: # %bb.0:
3486 ; RV32ZVE32F-NEXT: addi sp, sp, -16
3487 ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
3488 ; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
3489 ; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
3490 ; RV32ZVE32F-NEXT: sw s2, 4(sp) # 4-byte Folded Spill
3491 ; RV32ZVE32F-NEXT: .cfi_offset s0, -4
3492 ; RV32ZVE32F-NEXT: .cfi_offset s1, -8
3493 ; RV32ZVE32F-NEXT: .cfi_offset s2, -12
3494 ; RV32ZVE32F-NEXT: lw a2, 60(a0)
3495 ; RV32ZVE32F-NEXT: lw a3, 56(a0)
3496 ; RV32ZVE32F-NEXT: lw a4, 52(a0)
3497 ; RV32ZVE32F-NEXT: lw a5, 48(a0)
3498 ; RV32ZVE32F-NEXT: lw a6, 44(a0)
3499 ; RV32ZVE32F-NEXT: lw a7, 40(a0)
3500 ; RV32ZVE32F-NEXT: lw t0, 36(a0)
3501 ; RV32ZVE32F-NEXT: lw t1, 32(a0)
3502 ; RV32ZVE32F-NEXT: lw t2, 28(a0)
3503 ; RV32ZVE32F-NEXT: lw t3, 24(a0)
3504 ; RV32ZVE32F-NEXT: lw t4, 20(a0)
3505 ; RV32ZVE32F-NEXT: lw t5, 16(a0)
3506 ; RV32ZVE32F-NEXT: lw s0, 12(a0)
3507 ; RV32ZVE32F-NEXT: lw t6, 8(a0)
3508 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
3509 ; RV32ZVE32F-NEXT: vsext.vf4 v10, v8
3510 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3
3511 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
3512 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
3513 ; RV32ZVE32F-NEXT: vmv.x.s a1, v0
3514 ; RV32ZVE32F-NEXT: andi s1, a1, 1
3515 ; RV32ZVE32F-NEXT: bnez s1, .LBB42_10
3516 ; RV32ZVE32F-NEXT: # %bb.1: # %else
3517 ; RV32ZVE32F-NEXT: andi a0, a1, 2
3518 ; RV32ZVE32F-NEXT: bnez a0, .LBB42_11
3519 ; RV32ZVE32F-NEXT: .LBB42_2: # %else2
3520 ; RV32ZVE32F-NEXT: andi a0, a1, 4
3521 ; RV32ZVE32F-NEXT: bnez a0, .LBB42_12
3522 ; RV32ZVE32F-NEXT: .LBB42_3: # %else4
3523 ; RV32ZVE32F-NEXT: andi a0, a1, 8
3524 ; RV32ZVE32F-NEXT: bnez a0, .LBB42_13
3525 ; RV32ZVE32F-NEXT: .LBB42_4: # %else6
3526 ; RV32ZVE32F-NEXT: andi a0, a1, 16
3527 ; RV32ZVE32F-NEXT: bnez a0, .LBB42_14
3528 ; RV32ZVE32F-NEXT: .LBB42_5: # %else8
3529 ; RV32ZVE32F-NEXT: andi a0, a1, 32
3530 ; RV32ZVE32F-NEXT: bnez a0, .LBB42_15
3531 ; RV32ZVE32F-NEXT: .LBB42_6: # %else10
3532 ; RV32ZVE32F-NEXT: andi a0, a1, 64
3533 ; RV32ZVE32F-NEXT: bnez a0, .LBB42_16
3534 ; RV32ZVE32F-NEXT: .LBB42_7: # %else12
3535 ; RV32ZVE32F-NEXT: andi a0, a1, -128
3536 ; RV32ZVE32F-NEXT: beqz a0, .LBB42_9
3537 ; RV32ZVE32F-NEXT: .LBB42_8: # %cond.store13
3538 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
3539 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
3540 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
3541 ; RV32ZVE32F-NEXT: sw a3, 0(a0)
3542 ; RV32ZVE32F-NEXT: sw a2, 4(a0)
3543 ; RV32ZVE32F-NEXT: .LBB42_9: # %else14
3544 ; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
3545 ; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
3546 ; RV32ZVE32F-NEXT: lw s2, 4(sp) # 4-byte Folded Reload
3547 ; RV32ZVE32F-NEXT: addi sp, sp, 16
3548 ; RV32ZVE32F-NEXT: ret
3549 ; RV32ZVE32F-NEXT: .LBB42_10: # %cond.store
3550 ; RV32ZVE32F-NEXT: lw s1, 4(a0)
3551 ; RV32ZVE32F-NEXT: lw a0, 0(a0)
3552 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
3553 ; RV32ZVE32F-NEXT: vmv.x.s s2, v8
3554 ; RV32ZVE32F-NEXT: sw s1, 4(s2)
3555 ; RV32ZVE32F-NEXT: sw a0, 0(s2)
3556 ; RV32ZVE32F-NEXT: andi a0, a1, 2
3557 ; RV32ZVE32F-NEXT: beqz a0, .LBB42_2
3558 ; RV32ZVE32F-NEXT: .LBB42_11: # %cond.store1
3559 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
3560 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
3561 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
3562 ; RV32ZVE32F-NEXT: sw s0, 4(a0)
3563 ; RV32ZVE32F-NEXT: sw t6, 0(a0)
3564 ; RV32ZVE32F-NEXT: andi a0, a1, 4
3565 ; RV32ZVE32F-NEXT: beqz a0, .LBB42_3
3566 ; RV32ZVE32F-NEXT: .LBB42_12: # %cond.store3
3567 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
3568 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
3569 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
3570 ; RV32ZVE32F-NEXT: sw t5, 0(a0)
3571 ; RV32ZVE32F-NEXT: sw t4, 4(a0)
3572 ; RV32ZVE32F-NEXT: andi a0, a1, 8
3573 ; RV32ZVE32F-NEXT: beqz a0, .LBB42_4
3574 ; RV32ZVE32F-NEXT: .LBB42_13: # %cond.store5
3575 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
3576 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
3577 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
3578 ; RV32ZVE32F-NEXT: sw t3, 0(a0)
3579 ; RV32ZVE32F-NEXT: sw t2, 4(a0)
3580 ; RV32ZVE32F-NEXT: andi a0, a1, 16
3581 ; RV32ZVE32F-NEXT: beqz a0, .LBB42_5
3582 ; RV32ZVE32F-NEXT: .LBB42_14: # %cond.store7
3583 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
3584 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
3585 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
3586 ; RV32ZVE32F-NEXT: sw t1, 0(a0)
3587 ; RV32ZVE32F-NEXT: sw t0, 4(a0)
3588 ; RV32ZVE32F-NEXT: andi a0, a1, 32
3589 ; RV32ZVE32F-NEXT: beqz a0, .LBB42_6
3590 ; RV32ZVE32F-NEXT: .LBB42_15: # %cond.store9
3591 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
3592 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
3593 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
3594 ; RV32ZVE32F-NEXT: sw a7, 0(a0)
3595 ; RV32ZVE32F-NEXT: sw a6, 4(a0)
3596 ; RV32ZVE32F-NEXT: andi a0, a1, 64
3597 ; RV32ZVE32F-NEXT: beqz a0, .LBB42_7
3598 ; RV32ZVE32F-NEXT: .LBB42_16: # %cond.store11
3599 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
3600 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
3601 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
3602 ; RV32ZVE32F-NEXT: sw a5, 0(a0)
3603 ; RV32ZVE32F-NEXT: sw a4, 4(a0)
3604 ; RV32ZVE32F-NEXT: andi a0, a1, -128
3605 ; RV32ZVE32F-NEXT: bnez a0, .LBB42_8
3606 ; RV32ZVE32F-NEXT: j .LBB42_9
3608 ; RV64ZVE32F-LABEL: mscatter_baseidx_v8i8_v8i64:
3609 ; RV64ZVE32F: # %bb.0:
3610 ; RV64ZVE32F-NEXT: ld a2, 56(a0)
3611 ; RV64ZVE32F-NEXT: ld a3, 48(a0)
3612 ; RV64ZVE32F-NEXT: ld a5, 40(a0)
3613 ; RV64ZVE32F-NEXT: ld a6, 32(a0)
3614 ; RV64ZVE32F-NEXT: ld a7, 24(a0)
3615 ; RV64ZVE32F-NEXT: ld t0, 16(a0)
3616 ; RV64ZVE32F-NEXT: ld t1, 8(a0)
3617 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
3618 ; RV64ZVE32F-NEXT: vmv.x.s a4, v0
3619 ; RV64ZVE32F-NEXT: andi t2, a4, 1
3620 ; RV64ZVE32F-NEXT: beqz t2, .LBB42_2
3621 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
3622 ; RV64ZVE32F-NEXT: ld a0, 0(a0)
3623 ; RV64ZVE32F-NEXT: vmv.x.s t2, v8
3624 ; RV64ZVE32F-NEXT: slli t2, t2, 3
3625 ; RV64ZVE32F-NEXT: add t2, a1, t2
3626 ; RV64ZVE32F-NEXT: sd a0, 0(t2)
3627 ; RV64ZVE32F-NEXT: .LBB42_2: # %else
3628 ; RV64ZVE32F-NEXT: andi a0, a4, 2
3629 ; RV64ZVE32F-NEXT: beqz a0, .LBB42_4
3630 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
3631 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
3632 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
3633 ; RV64ZVE32F-NEXT: vmv.x.s a0, v9
3634 ; RV64ZVE32F-NEXT: slli a0, a0, 3
3635 ; RV64ZVE32F-NEXT: add a0, a1, a0
3636 ; RV64ZVE32F-NEXT: sd t1, 0(a0)
3637 ; RV64ZVE32F-NEXT: .LBB42_4: # %else2
3638 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
3639 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
3640 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
3641 ; RV64ZVE32F-NEXT: andi a0, a4, 4
3642 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
3643 ; RV64ZVE32F-NEXT: bnez a0, .LBB42_12
3644 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
3645 ; RV64ZVE32F-NEXT: andi a0, a4, 8
3646 ; RV64ZVE32F-NEXT: bnez a0, .LBB42_13
3647 ; RV64ZVE32F-NEXT: .LBB42_6: # %else6
3648 ; RV64ZVE32F-NEXT: andi a0, a4, 16
3649 ; RV64ZVE32F-NEXT: bnez a0, .LBB42_14
3650 ; RV64ZVE32F-NEXT: .LBB42_7: # %else8
3651 ; RV64ZVE32F-NEXT: andi a0, a4, 32
3652 ; RV64ZVE32F-NEXT: beqz a0, .LBB42_9
3653 ; RV64ZVE32F-NEXT: .LBB42_8: # %cond.store9
3654 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
3655 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
3656 ; RV64ZVE32F-NEXT: slli a0, a0, 3
3657 ; RV64ZVE32F-NEXT: add a0, a1, a0
3658 ; RV64ZVE32F-NEXT: sd a5, 0(a0)
3659 ; RV64ZVE32F-NEXT: .LBB42_9: # %else10
3660 ; RV64ZVE32F-NEXT: andi a0, a4, 64
3661 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
3662 ; RV64ZVE32F-NEXT: bnez a0, .LBB42_15
3663 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
3664 ; RV64ZVE32F-NEXT: andi a0, a4, -128
3665 ; RV64ZVE32F-NEXT: bnez a0, .LBB42_16
3666 ; RV64ZVE32F-NEXT: .LBB42_11: # %else14
3667 ; RV64ZVE32F-NEXT: ret
3668 ; RV64ZVE32F-NEXT: .LBB42_12: # %cond.store3
3669 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
3670 ; RV64ZVE32F-NEXT: slli a0, a0, 3
3671 ; RV64ZVE32F-NEXT: add a0, a1, a0
3672 ; RV64ZVE32F-NEXT: sd t0, 0(a0)
3673 ; RV64ZVE32F-NEXT: andi a0, a4, 8
3674 ; RV64ZVE32F-NEXT: beqz a0, .LBB42_6
3675 ; RV64ZVE32F-NEXT: .LBB42_13: # %cond.store5
3676 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
3677 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
3678 ; RV64ZVE32F-NEXT: slli a0, a0, 3
3679 ; RV64ZVE32F-NEXT: add a0, a1, a0
3680 ; RV64ZVE32F-NEXT: sd a7, 0(a0)
3681 ; RV64ZVE32F-NEXT: andi a0, a4, 16
3682 ; RV64ZVE32F-NEXT: beqz a0, .LBB42_7
3683 ; RV64ZVE32F-NEXT: .LBB42_14: # %cond.store7
3684 ; RV64ZVE32F-NEXT: vmv.x.s a0, v9
3685 ; RV64ZVE32F-NEXT: slli a0, a0, 3
3686 ; RV64ZVE32F-NEXT: add a0, a1, a0
3687 ; RV64ZVE32F-NEXT: sd a6, 0(a0)
3688 ; RV64ZVE32F-NEXT: andi a0, a4, 32
3689 ; RV64ZVE32F-NEXT: bnez a0, .LBB42_8
3690 ; RV64ZVE32F-NEXT: j .LBB42_9
3691 ; RV64ZVE32F-NEXT: .LBB42_15: # %cond.store11
3692 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
3693 ; RV64ZVE32F-NEXT: slli a0, a0, 3
3694 ; RV64ZVE32F-NEXT: add a0, a1, a0
3695 ; RV64ZVE32F-NEXT: sd a3, 0(a0)
3696 ; RV64ZVE32F-NEXT: andi a0, a4, -128
3697 ; RV64ZVE32F-NEXT: beqz a0, .LBB42_11
3698 ; RV64ZVE32F-NEXT: .LBB42_16: # %cond.store13
3699 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
3700 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
3701 ; RV64ZVE32F-NEXT: slli a0, a0, 3
3702 ; RV64ZVE32F-NEXT: add a0, a1, a0
3703 ; RV64ZVE32F-NEXT: sd a2, 0(a0)
3704 ; RV64ZVE32F-NEXT: ret
3705 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i8> %idxs
3706 call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m)
3710 define void @mscatter_baseidx_sext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) {
3711 ; RV32V-LABEL: mscatter_baseidx_sext_v8i8_v8i64:
3713 ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
3714 ; RV32V-NEXT: vsext.vf4 v14, v12
3715 ; RV32V-NEXT: vsll.vi v12, v14, 3
3716 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
3717 ; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t
3720 ; RV64-LABEL: mscatter_baseidx_sext_v8i8_v8i64:
3722 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
3723 ; RV64-NEXT: vsext.vf8 v16, v12
3724 ; RV64-NEXT: vsll.vi v12, v16, 3
3725 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
3728 ; RV32ZVE32F-LABEL: mscatter_baseidx_sext_v8i8_v8i64:
3729 ; RV32ZVE32F: # %bb.0:
3730 ; RV32ZVE32F-NEXT: addi sp, sp, -16
3731 ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
3732 ; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
3733 ; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
3734 ; RV32ZVE32F-NEXT: sw s2, 4(sp) # 4-byte Folded Spill
3735 ; RV32ZVE32F-NEXT: .cfi_offset s0, -4
3736 ; RV32ZVE32F-NEXT: .cfi_offset s1, -8
3737 ; RV32ZVE32F-NEXT: .cfi_offset s2, -12
3738 ; RV32ZVE32F-NEXT: lw a2, 60(a0)
3739 ; RV32ZVE32F-NEXT: lw a3, 56(a0)
3740 ; RV32ZVE32F-NEXT: lw a4, 52(a0)
3741 ; RV32ZVE32F-NEXT: lw a5, 48(a0)
3742 ; RV32ZVE32F-NEXT: lw a6, 44(a0)
3743 ; RV32ZVE32F-NEXT: lw a7, 40(a0)
3744 ; RV32ZVE32F-NEXT: lw t0, 36(a0)
3745 ; RV32ZVE32F-NEXT: lw t1, 32(a0)
3746 ; RV32ZVE32F-NEXT: lw t2, 28(a0)
3747 ; RV32ZVE32F-NEXT: lw t3, 24(a0)
3748 ; RV32ZVE32F-NEXT: lw t4, 20(a0)
3749 ; RV32ZVE32F-NEXT: lw t5, 16(a0)
3750 ; RV32ZVE32F-NEXT: lw s0, 12(a0)
3751 ; RV32ZVE32F-NEXT: lw t6, 8(a0)
3752 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
3753 ; RV32ZVE32F-NEXT: vsext.vf4 v10, v8
3754 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3
3755 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
3756 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
3757 ; RV32ZVE32F-NEXT: vmv.x.s a1, v0
3758 ; RV32ZVE32F-NEXT: andi s1, a1, 1
3759 ; RV32ZVE32F-NEXT: bnez s1, .LBB43_10
3760 ; RV32ZVE32F-NEXT: # %bb.1: # %else
3761 ; RV32ZVE32F-NEXT: andi a0, a1, 2
3762 ; RV32ZVE32F-NEXT: bnez a0, .LBB43_11
3763 ; RV32ZVE32F-NEXT: .LBB43_2: # %else2
3764 ; RV32ZVE32F-NEXT: andi a0, a1, 4
3765 ; RV32ZVE32F-NEXT: bnez a0, .LBB43_12
3766 ; RV32ZVE32F-NEXT: .LBB43_3: # %else4
3767 ; RV32ZVE32F-NEXT: andi a0, a1, 8
3768 ; RV32ZVE32F-NEXT: bnez a0, .LBB43_13
3769 ; RV32ZVE32F-NEXT: .LBB43_4: # %else6
3770 ; RV32ZVE32F-NEXT: andi a0, a1, 16
3771 ; RV32ZVE32F-NEXT: bnez a0, .LBB43_14
3772 ; RV32ZVE32F-NEXT: .LBB43_5: # %else8
3773 ; RV32ZVE32F-NEXT: andi a0, a1, 32
3774 ; RV32ZVE32F-NEXT: bnez a0, .LBB43_15
3775 ; RV32ZVE32F-NEXT: .LBB43_6: # %else10
3776 ; RV32ZVE32F-NEXT: andi a0, a1, 64
3777 ; RV32ZVE32F-NEXT: bnez a0, .LBB43_16
3778 ; RV32ZVE32F-NEXT: .LBB43_7: # %else12
3779 ; RV32ZVE32F-NEXT: andi a0, a1, -128
3780 ; RV32ZVE32F-NEXT: beqz a0, .LBB43_9
3781 ; RV32ZVE32F-NEXT: .LBB43_8: # %cond.store13
3782 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
3783 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
3784 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
3785 ; RV32ZVE32F-NEXT: sw a3, 0(a0)
3786 ; RV32ZVE32F-NEXT: sw a2, 4(a0)
3787 ; RV32ZVE32F-NEXT: .LBB43_9: # %else14
3788 ; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
3789 ; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
3790 ; RV32ZVE32F-NEXT: lw s2, 4(sp) # 4-byte Folded Reload
3791 ; RV32ZVE32F-NEXT: addi sp, sp, 16
3792 ; RV32ZVE32F-NEXT: ret
3793 ; RV32ZVE32F-NEXT: .LBB43_10: # %cond.store
3794 ; RV32ZVE32F-NEXT: lw s1, 4(a0)
3795 ; RV32ZVE32F-NEXT: lw a0, 0(a0)
3796 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
3797 ; RV32ZVE32F-NEXT: vmv.x.s s2, v8
3798 ; RV32ZVE32F-NEXT: sw s1, 4(s2)
3799 ; RV32ZVE32F-NEXT: sw a0, 0(s2)
3800 ; RV32ZVE32F-NEXT: andi a0, a1, 2
3801 ; RV32ZVE32F-NEXT: beqz a0, .LBB43_2
3802 ; RV32ZVE32F-NEXT: .LBB43_11: # %cond.store1
3803 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
3804 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
3805 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
3806 ; RV32ZVE32F-NEXT: sw s0, 4(a0)
3807 ; RV32ZVE32F-NEXT: sw t6, 0(a0)
3808 ; RV32ZVE32F-NEXT: andi a0, a1, 4
3809 ; RV32ZVE32F-NEXT: beqz a0, .LBB43_3
3810 ; RV32ZVE32F-NEXT: .LBB43_12: # %cond.store3
3811 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
3812 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
3813 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
3814 ; RV32ZVE32F-NEXT: sw t5, 0(a0)
3815 ; RV32ZVE32F-NEXT: sw t4, 4(a0)
3816 ; RV32ZVE32F-NEXT: andi a0, a1, 8
3817 ; RV32ZVE32F-NEXT: beqz a0, .LBB43_4
3818 ; RV32ZVE32F-NEXT: .LBB43_13: # %cond.store5
3819 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
3820 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
3821 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
3822 ; RV32ZVE32F-NEXT: sw t3, 0(a0)
3823 ; RV32ZVE32F-NEXT: sw t2, 4(a0)
3824 ; RV32ZVE32F-NEXT: andi a0, a1, 16
3825 ; RV32ZVE32F-NEXT: beqz a0, .LBB43_5
3826 ; RV32ZVE32F-NEXT: .LBB43_14: # %cond.store7
3827 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
3828 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
3829 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
3830 ; RV32ZVE32F-NEXT: sw t1, 0(a0)
3831 ; RV32ZVE32F-NEXT: sw t0, 4(a0)
3832 ; RV32ZVE32F-NEXT: andi a0, a1, 32
3833 ; RV32ZVE32F-NEXT: beqz a0, .LBB43_6
3834 ; RV32ZVE32F-NEXT: .LBB43_15: # %cond.store9
3835 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
3836 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
3837 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
3838 ; RV32ZVE32F-NEXT: sw a7, 0(a0)
3839 ; RV32ZVE32F-NEXT: sw a6, 4(a0)
3840 ; RV32ZVE32F-NEXT: andi a0, a1, 64
3841 ; RV32ZVE32F-NEXT: beqz a0, .LBB43_7
3842 ; RV32ZVE32F-NEXT: .LBB43_16: # %cond.store11
3843 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
3844 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
3845 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
3846 ; RV32ZVE32F-NEXT: sw a5, 0(a0)
3847 ; RV32ZVE32F-NEXT: sw a4, 4(a0)
3848 ; RV32ZVE32F-NEXT: andi a0, a1, -128
3849 ; RV32ZVE32F-NEXT: bnez a0, .LBB43_8
3850 ; RV32ZVE32F-NEXT: j .LBB43_9
3852 ; RV64ZVE32F-LABEL: mscatter_baseidx_sext_v8i8_v8i64:
3853 ; RV64ZVE32F: # %bb.0:
3854 ; RV64ZVE32F-NEXT: ld a2, 56(a0)
3855 ; RV64ZVE32F-NEXT: ld a3, 48(a0)
3856 ; RV64ZVE32F-NEXT: ld a5, 40(a0)
3857 ; RV64ZVE32F-NEXT: ld a6, 32(a0)
3858 ; RV64ZVE32F-NEXT: ld a7, 24(a0)
3859 ; RV64ZVE32F-NEXT: ld t0, 16(a0)
3860 ; RV64ZVE32F-NEXT: ld t1, 8(a0)
3861 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
3862 ; RV64ZVE32F-NEXT: vmv.x.s a4, v0
3863 ; RV64ZVE32F-NEXT: andi t2, a4, 1
3864 ; RV64ZVE32F-NEXT: beqz t2, .LBB43_2
3865 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
3866 ; RV64ZVE32F-NEXT: ld a0, 0(a0)
3867 ; RV64ZVE32F-NEXT: vmv.x.s t2, v8
3868 ; RV64ZVE32F-NEXT: slli t2, t2, 3
3869 ; RV64ZVE32F-NEXT: add t2, a1, t2
3870 ; RV64ZVE32F-NEXT: sd a0, 0(t2)
3871 ; RV64ZVE32F-NEXT: .LBB43_2: # %else
3872 ; RV64ZVE32F-NEXT: andi a0, a4, 2
3873 ; RV64ZVE32F-NEXT: beqz a0, .LBB43_4
3874 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
3875 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
3876 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
3877 ; RV64ZVE32F-NEXT: vmv.x.s a0, v9
3878 ; RV64ZVE32F-NEXT: slli a0, a0, 3
3879 ; RV64ZVE32F-NEXT: add a0, a1, a0
3880 ; RV64ZVE32F-NEXT: sd t1, 0(a0)
3881 ; RV64ZVE32F-NEXT: .LBB43_4: # %else2
3882 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
3883 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
3884 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
3885 ; RV64ZVE32F-NEXT: andi a0, a4, 4
3886 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
3887 ; RV64ZVE32F-NEXT: bnez a0, .LBB43_12
3888 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
3889 ; RV64ZVE32F-NEXT: andi a0, a4, 8
3890 ; RV64ZVE32F-NEXT: bnez a0, .LBB43_13
3891 ; RV64ZVE32F-NEXT: .LBB43_6: # %else6
3892 ; RV64ZVE32F-NEXT: andi a0, a4, 16
3893 ; RV64ZVE32F-NEXT: bnez a0, .LBB43_14
3894 ; RV64ZVE32F-NEXT: .LBB43_7: # %else8
3895 ; RV64ZVE32F-NEXT: andi a0, a4, 32
3896 ; RV64ZVE32F-NEXT: beqz a0, .LBB43_9
3897 ; RV64ZVE32F-NEXT: .LBB43_8: # %cond.store9
3898 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
3899 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
3900 ; RV64ZVE32F-NEXT: slli a0, a0, 3
3901 ; RV64ZVE32F-NEXT: add a0, a1, a0
3902 ; RV64ZVE32F-NEXT: sd a5, 0(a0)
3903 ; RV64ZVE32F-NEXT: .LBB43_9: # %else10
3904 ; RV64ZVE32F-NEXT: andi a0, a4, 64
3905 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
3906 ; RV64ZVE32F-NEXT: bnez a0, .LBB43_15
3907 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
3908 ; RV64ZVE32F-NEXT: andi a0, a4, -128
3909 ; RV64ZVE32F-NEXT: bnez a0, .LBB43_16
3910 ; RV64ZVE32F-NEXT: .LBB43_11: # %else14
3911 ; RV64ZVE32F-NEXT: ret
3912 ; RV64ZVE32F-NEXT: .LBB43_12: # %cond.store3
3913 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
3914 ; RV64ZVE32F-NEXT: slli a0, a0, 3
3915 ; RV64ZVE32F-NEXT: add a0, a1, a0
3916 ; RV64ZVE32F-NEXT: sd t0, 0(a0)
3917 ; RV64ZVE32F-NEXT: andi a0, a4, 8
3918 ; RV64ZVE32F-NEXT: beqz a0, .LBB43_6
3919 ; RV64ZVE32F-NEXT: .LBB43_13: # %cond.store5
3920 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
3921 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
3922 ; RV64ZVE32F-NEXT: slli a0, a0, 3
3923 ; RV64ZVE32F-NEXT: add a0, a1, a0
3924 ; RV64ZVE32F-NEXT: sd a7, 0(a0)
3925 ; RV64ZVE32F-NEXT: andi a0, a4, 16
3926 ; RV64ZVE32F-NEXT: beqz a0, .LBB43_7
3927 ; RV64ZVE32F-NEXT: .LBB43_14: # %cond.store7
3928 ; RV64ZVE32F-NEXT: vmv.x.s a0, v9
3929 ; RV64ZVE32F-NEXT: slli a0, a0, 3
3930 ; RV64ZVE32F-NEXT: add a0, a1, a0
3931 ; RV64ZVE32F-NEXT: sd a6, 0(a0)
3932 ; RV64ZVE32F-NEXT: andi a0, a4, 32
3933 ; RV64ZVE32F-NEXT: bnez a0, .LBB43_8
3934 ; RV64ZVE32F-NEXT: j .LBB43_9
3935 ; RV64ZVE32F-NEXT: .LBB43_15: # %cond.store11
3936 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
3937 ; RV64ZVE32F-NEXT: slli a0, a0, 3
3938 ; RV64ZVE32F-NEXT: add a0, a1, a0
3939 ; RV64ZVE32F-NEXT: sd a3, 0(a0)
3940 ; RV64ZVE32F-NEXT: andi a0, a4, -128
3941 ; RV64ZVE32F-NEXT: beqz a0, .LBB43_11
3942 ; RV64ZVE32F-NEXT: .LBB43_16: # %cond.store13
3943 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
3944 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
3945 ; RV64ZVE32F-NEXT: slli a0, a0, 3
3946 ; RV64ZVE32F-NEXT: add a0, a1, a0
3947 ; RV64ZVE32F-NEXT: sd a2, 0(a0)
3948 ; RV64ZVE32F-NEXT: ret
3949 %eidxs = sext <8 x i8> %idxs to <8 x i64>
3950 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs
3951 call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m)
3955 define void @mscatter_baseidx_zext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) {
3956 ; RV32V-LABEL: mscatter_baseidx_zext_v8i8_v8i64:
3958 ; RV32V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
3959 ; RV32V-NEXT: vzext.vf2 v13, v12
3960 ; RV32V-NEXT: vsll.vi v12, v13, 3
3961 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
3962 ; RV32V-NEXT: vsoxei16.v v8, (a0), v12, v0.t
3965 ; RV64-LABEL: mscatter_baseidx_zext_v8i8_v8i64:
3967 ; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
3968 ; RV64-NEXT: vzext.vf2 v13, v12
3969 ; RV64-NEXT: vsll.vi v12, v13, 3
3970 ; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, ma
3971 ; RV64-NEXT: vsoxei16.v v8, (a0), v12, v0.t
3974 ; RV32ZVE32F-LABEL: mscatter_baseidx_zext_v8i8_v8i64:
3975 ; RV32ZVE32F: # %bb.0:
3976 ; RV32ZVE32F-NEXT: addi sp, sp, -16
3977 ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
3978 ; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
3979 ; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
3980 ; RV32ZVE32F-NEXT: sw s2, 4(sp) # 4-byte Folded Spill
3981 ; RV32ZVE32F-NEXT: .cfi_offset s0, -4
3982 ; RV32ZVE32F-NEXT: .cfi_offset s1, -8
3983 ; RV32ZVE32F-NEXT: .cfi_offset s2, -12
3984 ; RV32ZVE32F-NEXT: lw a2, 60(a0)
3985 ; RV32ZVE32F-NEXT: lw a3, 56(a0)
3986 ; RV32ZVE32F-NEXT: lw a4, 52(a0)
3987 ; RV32ZVE32F-NEXT: lw a5, 48(a0)
3988 ; RV32ZVE32F-NEXT: lw a6, 44(a0)
3989 ; RV32ZVE32F-NEXT: lw a7, 40(a0)
3990 ; RV32ZVE32F-NEXT: lw t0, 36(a0)
3991 ; RV32ZVE32F-NEXT: lw t1, 32(a0)
3992 ; RV32ZVE32F-NEXT: lw t2, 28(a0)
3993 ; RV32ZVE32F-NEXT: lw t3, 24(a0)
3994 ; RV32ZVE32F-NEXT: lw t4, 20(a0)
3995 ; RV32ZVE32F-NEXT: lw t5, 16(a0)
3996 ; RV32ZVE32F-NEXT: lw s0, 12(a0)
3997 ; RV32ZVE32F-NEXT: lw t6, 8(a0)
3998 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
3999 ; RV32ZVE32F-NEXT: vzext.vf4 v10, v8
4000 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3
4001 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
4002 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
4003 ; RV32ZVE32F-NEXT: vmv.x.s a1, v0
4004 ; RV32ZVE32F-NEXT: andi s1, a1, 1
4005 ; RV32ZVE32F-NEXT: bnez s1, .LBB44_10
4006 ; RV32ZVE32F-NEXT: # %bb.1: # %else
4007 ; RV32ZVE32F-NEXT: andi a0, a1, 2
4008 ; RV32ZVE32F-NEXT: bnez a0, .LBB44_11
4009 ; RV32ZVE32F-NEXT: .LBB44_2: # %else2
4010 ; RV32ZVE32F-NEXT: andi a0, a1, 4
4011 ; RV32ZVE32F-NEXT: bnez a0, .LBB44_12
4012 ; RV32ZVE32F-NEXT: .LBB44_3: # %else4
4013 ; RV32ZVE32F-NEXT: andi a0, a1, 8
4014 ; RV32ZVE32F-NEXT: bnez a0, .LBB44_13
4015 ; RV32ZVE32F-NEXT: .LBB44_4: # %else6
4016 ; RV32ZVE32F-NEXT: andi a0, a1, 16
4017 ; RV32ZVE32F-NEXT: bnez a0, .LBB44_14
4018 ; RV32ZVE32F-NEXT: .LBB44_5: # %else8
4019 ; RV32ZVE32F-NEXT: andi a0, a1, 32
4020 ; RV32ZVE32F-NEXT: bnez a0, .LBB44_15
4021 ; RV32ZVE32F-NEXT: .LBB44_6: # %else10
4022 ; RV32ZVE32F-NEXT: andi a0, a1, 64
4023 ; RV32ZVE32F-NEXT: bnez a0, .LBB44_16
4024 ; RV32ZVE32F-NEXT: .LBB44_7: # %else12
4025 ; RV32ZVE32F-NEXT: andi a0, a1, -128
4026 ; RV32ZVE32F-NEXT: beqz a0, .LBB44_9
4027 ; RV32ZVE32F-NEXT: .LBB44_8: # %cond.store13
4028 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
4029 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
4030 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
4031 ; RV32ZVE32F-NEXT: sw a3, 0(a0)
4032 ; RV32ZVE32F-NEXT: sw a2, 4(a0)
4033 ; RV32ZVE32F-NEXT: .LBB44_9: # %else14
4034 ; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
4035 ; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
4036 ; RV32ZVE32F-NEXT: lw s2, 4(sp) # 4-byte Folded Reload
4037 ; RV32ZVE32F-NEXT: addi sp, sp, 16
4038 ; RV32ZVE32F-NEXT: ret
4039 ; RV32ZVE32F-NEXT: .LBB44_10: # %cond.store
4040 ; RV32ZVE32F-NEXT: lw s1, 4(a0)
4041 ; RV32ZVE32F-NEXT: lw a0, 0(a0)
4042 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
4043 ; RV32ZVE32F-NEXT: vmv.x.s s2, v8
4044 ; RV32ZVE32F-NEXT: sw s1, 4(s2)
4045 ; RV32ZVE32F-NEXT: sw a0, 0(s2)
4046 ; RV32ZVE32F-NEXT: andi a0, a1, 2
4047 ; RV32ZVE32F-NEXT: beqz a0, .LBB44_2
4048 ; RV32ZVE32F-NEXT: .LBB44_11: # %cond.store1
4049 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
4050 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
4051 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
4052 ; RV32ZVE32F-NEXT: sw s0, 4(a0)
4053 ; RV32ZVE32F-NEXT: sw t6, 0(a0)
4054 ; RV32ZVE32F-NEXT: andi a0, a1, 4
4055 ; RV32ZVE32F-NEXT: beqz a0, .LBB44_3
4056 ; RV32ZVE32F-NEXT: .LBB44_12: # %cond.store3
4057 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
4058 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
4059 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
4060 ; RV32ZVE32F-NEXT: sw t5, 0(a0)
4061 ; RV32ZVE32F-NEXT: sw t4, 4(a0)
4062 ; RV32ZVE32F-NEXT: andi a0, a1, 8
4063 ; RV32ZVE32F-NEXT: beqz a0, .LBB44_4
4064 ; RV32ZVE32F-NEXT: .LBB44_13: # %cond.store5
4065 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
4066 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
4067 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
4068 ; RV32ZVE32F-NEXT: sw t3, 0(a0)
4069 ; RV32ZVE32F-NEXT: sw t2, 4(a0)
4070 ; RV32ZVE32F-NEXT: andi a0, a1, 16
4071 ; RV32ZVE32F-NEXT: beqz a0, .LBB44_5
4072 ; RV32ZVE32F-NEXT: .LBB44_14: # %cond.store7
4073 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
4074 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
4075 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
4076 ; RV32ZVE32F-NEXT: sw t1, 0(a0)
4077 ; RV32ZVE32F-NEXT: sw t0, 4(a0)
4078 ; RV32ZVE32F-NEXT: andi a0, a1, 32
4079 ; RV32ZVE32F-NEXT: beqz a0, .LBB44_6
4080 ; RV32ZVE32F-NEXT: .LBB44_15: # %cond.store9
4081 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
4082 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
4083 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
4084 ; RV32ZVE32F-NEXT: sw a7, 0(a0)
4085 ; RV32ZVE32F-NEXT: sw a6, 4(a0)
4086 ; RV32ZVE32F-NEXT: andi a0, a1, 64
4087 ; RV32ZVE32F-NEXT: beqz a0, .LBB44_7
4088 ; RV32ZVE32F-NEXT: .LBB44_16: # %cond.store11
4089 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
4090 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
4091 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
4092 ; RV32ZVE32F-NEXT: sw a5, 0(a0)
4093 ; RV32ZVE32F-NEXT: sw a4, 4(a0)
4094 ; RV32ZVE32F-NEXT: andi a0, a1, -128
4095 ; RV32ZVE32F-NEXT: bnez a0, .LBB44_8
4096 ; RV32ZVE32F-NEXT: j .LBB44_9
4098 ; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i8_v8i64:
4099 ; RV64ZVE32F: # %bb.0:
4100 ; RV64ZVE32F-NEXT: ld a2, 56(a0)
4101 ; RV64ZVE32F-NEXT: ld a3, 48(a0)
4102 ; RV64ZVE32F-NEXT: ld a5, 40(a0)
4103 ; RV64ZVE32F-NEXT: ld a6, 32(a0)
4104 ; RV64ZVE32F-NEXT: ld a7, 24(a0)
4105 ; RV64ZVE32F-NEXT: ld t0, 16(a0)
4106 ; RV64ZVE32F-NEXT: ld t1, 8(a0)
4107 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
4108 ; RV64ZVE32F-NEXT: vmv.x.s a4, v0
4109 ; RV64ZVE32F-NEXT: andi t2, a4, 1
4110 ; RV64ZVE32F-NEXT: beqz t2, .LBB44_2
4111 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
4112 ; RV64ZVE32F-NEXT: ld a0, 0(a0)
4113 ; RV64ZVE32F-NEXT: vmv.x.s t2, v8
4114 ; RV64ZVE32F-NEXT: andi t2, t2, 255
4115 ; RV64ZVE32F-NEXT: slli t2, t2, 3
4116 ; RV64ZVE32F-NEXT: add t2, a1, t2
4117 ; RV64ZVE32F-NEXT: sd a0, 0(t2)
4118 ; RV64ZVE32F-NEXT: .LBB44_2: # %else
4119 ; RV64ZVE32F-NEXT: andi a0, a4, 2
4120 ; RV64ZVE32F-NEXT: beqz a0, .LBB44_4
4121 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
4122 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
4123 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
4124 ; RV64ZVE32F-NEXT: vmv.x.s a0, v9
4125 ; RV64ZVE32F-NEXT: andi a0, a0, 255
4126 ; RV64ZVE32F-NEXT: slli a0, a0, 3
4127 ; RV64ZVE32F-NEXT: add a0, a1, a0
4128 ; RV64ZVE32F-NEXT: sd t1, 0(a0)
4129 ; RV64ZVE32F-NEXT: .LBB44_4: # %else2
4130 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
4131 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
4132 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
4133 ; RV64ZVE32F-NEXT: andi a0, a4, 4
4134 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
4135 ; RV64ZVE32F-NEXT: bnez a0, .LBB44_12
4136 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
4137 ; RV64ZVE32F-NEXT: andi a0, a4, 8
4138 ; RV64ZVE32F-NEXT: bnez a0, .LBB44_13
4139 ; RV64ZVE32F-NEXT: .LBB44_6: # %else6
4140 ; RV64ZVE32F-NEXT: andi a0, a4, 16
4141 ; RV64ZVE32F-NEXT: bnez a0, .LBB44_14
4142 ; RV64ZVE32F-NEXT: .LBB44_7: # %else8
4143 ; RV64ZVE32F-NEXT: andi a0, a4, 32
4144 ; RV64ZVE32F-NEXT: beqz a0, .LBB44_9
4145 ; RV64ZVE32F-NEXT: .LBB44_8: # %cond.store9
4146 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
4147 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
4148 ; RV64ZVE32F-NEXT: andi a0, a0, 255
4149 ; RV64ZVE32F-NEXT: slli a0, a0, 3
4150 ; RV64ZVE32F-NEXT: add a0, a1, a0
4151 ; RV64ZVE32F-NEXT: sd a5, 0(a0)
4152 ; RV64ZVE32F-NEXT: .LBB44_9: # %else10
4153 ; RV64ZVE32F-NEXT: andi a0, a4, 64
4154 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
4155 ; RV64ZVE32F-NEXT: bnez a0, .LBB44_15
4156 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
4157 ; RV64ZVE32F-NEXT: andi a0, a4, -128
4158 ; RV64ZVE32F-NEXT: bnez a0, .LBB44_16
4159 ; RV64ZVE32F-NEXT: .LBB44_11: # %else14
4160 ; RV64ZVE32F-NEXT: ret
4161 ; RV64ZVE32F-NEXT: .LBB44_12: # %cond.store3
4162 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
4163 ; RV64ZVE32F-NEXT: andi a0, a0, 255
4164 ; RV64ZVE32F-NEXT: slli a0, a0, 3
4165 ; RV64ZVE32F-NEXT: add a0, a1, a0
4166 ; RV64ZVE32F-NEXT: sd t0, 0(a0)
4167 ; RV64ZVE32F-NEXT: andi a0, a4, 8
4168 ; RV64ZVE32F-NEXT: beqz a0, .LBB44_6
4169 ; RV64ZVE32F-NEXT: .LBB44_13: # %cond.store5
4170 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
4171 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
4172 ; RV64ZVE32F-NEXT: andi a0, a0, 255
4173 ; RV64ZVE32F-NEXT: slli a0, a0, 3
4174 ; RV64ZVE32F-NEXT: add a0, a1, a0
4175 ; RV64ZVE32F-NEXT: sd a7, 0(a0)
4176 ; RV64ZVE32F-NEXT: andi a0, a4, 16
4177 ; RV64ZVE32F-NEXT: beqz a0, .LBB44_7
4178 ; RV64ZVE32F-NEXT: .LBB44_14: # %cond.store7
4179 ; RV64ZVE32F-NEXT: vmv.x.s a0, v9
4180 ; RV64ZVE32F-NEXT: andi a0, a0, 255
4181 ; RV64ZVE32F-NEXT: slli a0, a0, 3
4182 ; RV64ZVE32F-NEXT: add a0, a1, a0
4183 ; RV64ZVE32F-NEXT: sd a6, 0(a0)
4184 ; RV64ZVE32F-NEXT: andi a0, a4, 32
4185 ; RV64ZVE32F-NEXT: bnez a0, .LBB44_8
4186 ; RV64ZVE32F-NEXT: j .LBB44_9
4187 ; RV64ZVE32F-NEXT: .LBB44_15: # %cond.store11
4188 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
4189 ; RV64ZVE32F-NEXT: andi a0, a0, 255
4190 ; RV64ZVE32F-NEXT: slli a0, a0, 3
4191 ; RV64ZVE32F-NEXT: add a0, a1, a0
4192 ; RV64ZVE32F-NEXT: sd a3, 0(a0)
4193 ; RV64ZVE32F-NEXT: andi a0, a4, -128
4194 ; RV64ZVE32F-NEXT: beqz a0, .LBB44_11
4195 ; RV64ZVE32F-NEXT: .LBB44_16: # %cond.store13
4196 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
4197 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
4198 ; RV64ZVE32F-NEXT: andi a0, a0, 255
4199 ; RV64ZVE32F-NEXT: slli a0, a0, 3
4200 ; RV64ZVE32F-NEXT: add a0, a1, a0
4201 ; RV64ZVE32F-NEXT: sd a2, 0(a0)
4202 ; RV64ZVE32F-NEXT: ret
4203 %eidxs = zext <8 x i8> %idxs to <8 x i64>
4204 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs
4205 call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m)
4209 define void @mscatter_baseidx_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m) {
4210 ; RV32V-LABEL: mscatter_baseidx_v8i16_v8i64:
4212 ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
4213 ; RV32V-NEXT: vsext.vf2 v14, v12
4214 ; RV32V-NEXT: vsll.vi v12, v14, 3
4215 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
4216 ; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t
4219 ; RV64-LABEL: mscatter_baseidx_v8i16_v8i64:
4221 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
4222 ; RV64-NEXT: vsext.vf4 v16, v12
4223 ; RV64-NEXT: vsll.vi v12, v16, 3
4224 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
4227 ; RV32ZVE32F-LABEL: mscatter_baseidx_v8i16_v8i64:
4228 ; RV32ZVE32F: # %bb.0:
4229 ; RV32ZVE32F-NEXT: addi sp, sp, -16
4230 ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
4231 ; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
4232 ; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
4233 ; RV32ZVE32F-NEXT: sw s2, 4(sp) # 4-byte Folded Spill
4234 ; RV32ZVE32F-NEXT: .cfi_offset s0, -4
4235 ; RV32ZVE32F-NEXT: .cfi_offset s1, -8
4236 ; RV32ZVE32F-NEXT: .cfi_offset s2, -12
4237 ; RV32ZVE32F-NEXT: lw a2, 60(a0)
4238 ; RV32ZVE32F-NEXT: lw a3, 56(a0)
4239 ; RV32ZVE32F-NEXT: lw a4, 52(a0)
4240 ; RV32ZVE32F-NEXT: lw a5, 48(a0)
4241 ; RV32ZVE32F-NEXT: lw a6, 44(a0)
4242 ; RV32ZVE32F-NEXT: lw a7, 40(a0)
4243 ; RV32ZVE32F-NEXT: lw t0, 36(a0)
4244 ; RV32ZVE32F-NEXT: lw t1, 32(a0)
4245 ; RV32ZVE32F-NEXT: lw t2, 28(a0)
4246 ; RV32ZVE32F-NEXT: lw t3, 24(a0)
4247 ; RV32ZVE32F-NEXT: lw t4, 20(a0)
4248 ; RV32ZVE32F-NEXT: lw t5, 16(a0)
4249 ; RV32ZVE32F-NEXT: lw s0, 12(a0)
4250 ; RV32ZVE32F-NEXT: lw t6, 8(a0)
4251 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
4252 ; RV32ZVE32F-NEXT: vsext.vf2 v10, v8
4253 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3
4254 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
4255 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
4256 ; RV32ZVE32F-NEXT: vmv.x.s a1, v0
4257 ; RV32ZVE32F-NEXT: andi s1, a1, 1
4258 ; RV32ZVE32F-NEXT: bnez s1, .LBB45_10
4259 ; RV32ZVE32F-NEXT: # %bb.1: # %else
4260 ; RV32ZVE32F-NEXT: andi a0, a1, 2
4261 ; RV32ZVE32F-NEXT: bnez a0, .LBB45_11
4262 ; RV32ZVE32F-NEXT: .LBB45_2: # %else2
4263 ; RV32ZVE32F-NEXT: andi a0, a1, 4
4264 ; RV32ZVE32F-NEXT: bnez a0, .LBB45_12
4265 ; RV32ZVE32F-NEXT: .LBB45_3: # %else4
4266 ; RV32ZVE32F-NEXT: andi a0, a1, 8
4267 ; RV32ZVE32F-NEXT: bnez a0, .LBB45_13
4268 ; RV32ZVE32F-NEXT: .LBB45_4: # %else6
4269 ; RV32ZVE32F-NEXT: andi a0, a1, 16
4270 ; RV32ZVE32F-NEXT: bnez a0, .LBB45_14
4271 ; RV32ZVE32F-NEXT: .LBB45_5: # %else8
4272 ; RV32ZVE32F-NEXT: andi a0, a1, 32
4273 ; RV32ZVE32F-NEXT: bnez a0, .LBB45_15
4274 ; RV32ZVE32F-NEXT: .LBB45_6: # %else10
4275 ; RV32ZVE32F-NEXT: andi a0, a1, 64
4276 ; RV32ZVE32F-NEXT: bnez a0, .LBB45_16
4277 ; RV32ZVE32F-NEXT: .LBB45_7: # %else12
4278 ; RV32ZVE32F-NEXT: andi a0, a1, -128
4279 ; RV32ZVE32F-NEXT: beqz a0, .LBB45_9
4280 ; RV32ZVE32F-NEXT: .LBB45_8: # %cond.store13
4281 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
4282 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
4283 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
4284 ; RV32ZVE32F-NEXT: sw a3, 0(a0)
4285 ; RV32ZVE32F-NEXT: sw a2, 4(a0)
4286 ; RV32ZVE32F-NEXT: .LBB45_9: # %else14
4287 ; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
4288 ; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
4289 ; RV32ZVE32F-NEXT: lw s2, 4(sp) # 4-byte Folded Reload
4290 ; RV32ZVE32F-NEXT: addi sp, sp, 16
4291 ; RV32ZVE32F-NEXT: ret
4292 ; RV32ZVE32F-NEXT: .LBB45_10: # %cond.store
4293 ; RV32ZVE32F-NEXT: lw s1, 4(a0)
4294 ; RV32ZVE32F-NEXT: lw a0, 0(a0)
4295 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
4296 ; RV32ZVE32F-NEXT: vmv.x.s s2, v8
4297 ; RV32ZVE32F-NEXT: sw s1, 4(s2)
4298 ; RV32ZVE32F-NEXT: sw a0, 0(s2)
4299 ; RV32ZVE32F-NEXT: andi a0, a1, 2
4300 ; RV32ZVE32F-NEXT: beqz a0, .LBB45_2
4301 ; RV32ZVE32F-NEXT: .LBB45_11: # %cond.store1
4302 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
4303 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
4304 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
4305 ; RV32ZVE32F-NEXT: sw s0, 4(a0)
4306 ; RV32ZVE32F-NEXT: sw t6, 0(a0)
4307 ; RV32ZVE32F-NEXT: andi a0, a1, 4
4308 ; RV32ZVE32F-NEXT: beqz a0, .LBB45_3
4309 ; RV32ZVE32F-NEXT: .LBB45_12: # %cond.store3
4310 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
4311 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
4312 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
4313 ; RV32ZVE32F-NEXT: sw t5, 0(a0)
4314 ; RV32ZVE32F-NEXT: sw t4, 4(a0)
4315 ; RV32ZVE32F-NEXT: andi a0, a1, 8
4316 ; RV32ZVE32F-NEXT: beqz a0, .LBB45_4
4317 ; RV32ZVE32F-NEXT: .LBB45_13: # %cond.store5
4318 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
4319 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
4320 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
4321 ; RV32ZVE32F-NEXT: sw t3, 0(a0)
4322 ; RV32ZVE32F-NEXT: sw t2, 4(a0)
4323 ; RV32ZVE32F-NEXT: andi a0, a1, 16
4324 ; RV32ZVE32F-NEXT: beqz a0, .LBB45_5
4325 ; RV32ZVE32F-NEXT: .LBB45_14: # %cond.store7
4326 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
4327 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
4328 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
4329 ; RV32ZVE32F-NEXT: sw t1, 0(a0)
4330 ; RV32ZVE32F-NEXT: sw t0, 4(a0)
4331 ; RV32ZVE32F-NEXT: andi a0, a1, 32
4332 ; RV32ZVE32F-NEXT: beqz a0, .LBB45_6
4333 ; RV32ZVE32F-NEXT: .LBB45_15: # %cond.store9
4334 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
4335 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
4336 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
4337 ; RV32ZVE32F-NEXT: sw a7, 0(a0)
4338 ; RV32ZVE32F-NEXT: sw a6, 4(a0)
4339 ; RV32ZVE32F-NEXT: andi a0, a1, 64
4340 ; RV32ZVE32F-NEXT: beqz a0, .LBB45_7
4341 ; RV32ZVE32F-NEXT: .LBB45_16: # %cond.store11
4342 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
4343 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
4344 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
4345 ; RV32ZVE32F-NEXT: sw a5, 0(a0)
4346 ; RV32ZVE32F-NEXT: sw a4, 4(a0)
4347 ; RV32ZVE32F-NEXT: andi a0, a1, -128
4348 ; RV32ZVE32F-NEXT: bnez a0, .LBB45_8
4349 ; RV32ZVE32F-NEXT: j .LBB45_9
4351 ; RV64ZVE32F-LABEL: mscatter_baseidx_v8i16_v8i64:
4352 ; RV64ZVE32F: # %bb.0:
4353 ; RV64ZVE32F-NEXT: ld a2, 56(a0)
4354 ; RV64ZVE32F-NEXT: ld a3, 48(a0)
4355 ; RV64ZVE32F-NEXT: ld a5, 40(a0)
4356 ; RV64ZVE32F-NEXT: ld a6, 32(a0)
4357 ; RV64ZVE32F-NEXT: ld a7, 24(a0)
4358 ; RV64ZVE32F-NEXT: ld t0, 16(a0)
4359 ; RV64ZVE32F-NEXT: ld t1, 8(a0)
4360 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
4361 ; RV64ZVE32F-NEXT: vmv.x.s a4, v0
4362 ; RV64ZVE32F-NEXT: andi t2, a4, 1
4363 ; RV64ZVE32F-NEXT: beqz t2, .LBB45_2
4364 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
4365 ; RV64ZVE32F-NEXT: ld a0, 0(a0)
4366 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
4367 ; RV64ZVE32F-NEXT: vmv.x.s t2, v8
4368 ; RV64ZVE32F-NEXT: slli t2, t2, 3
4369 ; RV64ZVE32F-NEXT: add t2, a1, t2
4370 ; RV64ZVE32F-NEXT: sd a0, 0(t2)
4371 ; RV64ZVE32F-NEXT: .LBB45_2: # %else
4372 ; RV64ZVE32F-NEXT: andi a0, a4, 2
4373 ; RV64ZVE32F-NEXT: beqz a0, .LBB45_4
4374 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
4375 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
4376 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
4377 ; RV64ZVE32F-NEXT: vmv.x.s a0, v9
4378 ; RV64ZVE32F-NEXT: slli a0, a0, 3
4379 ; RV64ZVE32F-NEXT: add a0, a1, a0
4380 ; RV64ZVE32F-NEXT: sd t1, 0(a0)
4381 ; RV64ZVE32F-NEXT: .LBB45_4: # %else2
4382 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
4383 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
4384 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
4385 ; RV64ZVE32F-NEXT: andi a0, a4, 4
4386 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
4387 ; RV64ZVE32F-NEXT: bnez a0, .LBB45_12
4388 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
4389 ; RV64ZVE32F-NEXT: andi a0, a4, 8
4390 ; RV64ZVE32F-NEXT: bnez a0, .LBB45_13
4391 ; RV64ZVE32F-NEXT: .LBB45_6: # %else6
4392 ; RV64ZVE32F-NEXT: andi a0, a4, 16
4393 ; RV64ZVE32F-NEXT: bnez a0, .LBB45_14
4394 ; RV64ZVE32F-NEXT: .LBB45_7: # %else8
4395 ; RV64ZVE32F-NEXT: andi a0, a4, 32
4396 ; RV64ZVE32F-NEXT: beqz a0, .LBB45_9
4397 ; RV64ZVE32F-NEXT: .LBB45_8: # %cond.store9
4398 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
4399 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
4400 ; RV64ZVE32F-NEXT: slli a0, a0, 3
4401 ; RV64ZVE32F-NEXT: add a0, a1, a0
4402 ; RV64ZVE32F-NEXT: sd a5, 0(a0)
4403 ; RV64ZVE32F-NEXT: .LBB45_9: # %else10
4404 ; RV64ZVE32F-NEXT: andi a0, a4, 64
4405 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
4406 ; RV64ZVE32F-NEXT: bnez a0, .LBB45_15
4407 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
4408 ; RV64ZVE32F-NEXT: andi a0, a4, -128
4409 ; RV64ZVE32F-NEXT: bnez a0, .LBB45_16
4410 ; RV64ZVE32F-NEXT: .LBB45_11: # %else14
4411 ; RV64ZVE32F-NEXT: ret
4412 ; RV64ZVE32F-NEXT: .LBB45_12: # %cond.store3
4413 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
4414 ; RV64ZVE32F-NEXT: slli a0, a0, 3
4415 ; RV64ZVE32F-NEXT: add a0, a1, a0
4416 ; RV64ZVE32F-NEXT: sd t0, 0(a0)
4417 ; RV64ZVE32F-NEXT: andi a0, a4, 8
4418 ; RV64ZVE32F-NEXT: beqz a0, .LBB45_6
4419 ; RV64ZVE32F-NEXT: .LBB45_13: # %cond.store5
4420 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
4421 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
4422 ; RV64ZVE32F-NEXT: slli a0, a0, 3
4423 ; RV64ZVE32F-NEXT: add a0, a1, a0
4424 ; RV64ZVE32F-NEXT: sd a7, 0(a0)
4425 ; RV64ZVE32F-NEXT: andi a0, a4, 16
4426 ; RV64ZVE32F-NEXT: beqz a0, .LBB45_7
4427 ; RV64ZVE32F-NEXT: .LBB45_14: # %cond.store7
4428 ; RV64ZVE32F-NEXT: vmv.x.s a0, v9
4429 ; RV64ZVE32F-NEXT: slli a0, a0, 3
4430 ; RV64ZVE32F-NEXT: add a0, a1, a0
4431 ; RV64ZVE32F-NEXT: sd a6, 0(a0)
4432 ; RV64ZVE32F-NEXT: andi a0, a4, 32
4433 ; RV64ZVE32F-NEXT: bnez a0, .LBB45_8
4434 ; RV64ZVE32F-NEXT: j .LBB45_9
4435 ; RV64ZVE32F-NEXT: .LBB45_15: # %cond.store11
4436 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
4437 ; RV64ZVE32F-NEXT: slli a0, a0, 3
4438 ; RV64ZVE32F-NEXT: add a0, a1, a0
4439 ; RV64ZVE32F-NEXT: sd a3, 0(a0)
4440 ; RV64ZVE32F-NEXT: andi a0, a4, -128
4441 ; RV64ZVE32F-NEXT: beqz a0, .LBB45_11
4442 ; RV64ZVE32F-NEXT: .LBB45_16: # %cond.store13
4443 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
4444 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
4445 ; RV64ZVE32F-NEXT: slli a0, a0, 3
4446 ; RV64ZVE32F-NEXT: add a0, a1, a0
4447 ; RV64ZVE32F-NEXT: sd a2, 0(a0)
4448 ; RV64ZVE32F-NEXT: ret
4449 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i16> %idxs
4450 call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m)
4454 define void @mscatter_baseidx_sext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m) {
4455 ; RV32V-LABEL: mscatter_baseidx_sext_v8i16_v8i64:
4457 ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
4458 ; RV32V-NEXT: vsext.vf2 v14, v12
4459 ; RV32V-NEXT: vsll.vi v12, v14, 3
4460 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
4461 ; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t
4464 ; RV64-LABEL: mscatter_baseidx_sext_v8i16_v8i64:
4466 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
4467 ; RV64-NEXT: vsext.vf4 v16, v12
4468 ; RV64-NEXT: vsll.vi v12, v16, 3
4469 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
4472 ; RV32ZVE32F-LABEL: mscatter_baseidx_sext_v8i16_v8i64:
4473 ; RV32ZVE32F: # %bb.0:
4474 ; RV32ZVE32F-NEXT: addi sp, sp, -16
4475 ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
4476 ; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
4477 ; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
4478 ; RV32ZVE32F-NEXT: sw s2, 4(sp) # 4-byte Folded Spill
4479 ; RV32ZVE32F-NEXT: .cfi_offset s0, -4
4480 ; RV32ZVE32F-NEXT: .cfi_offset s1, -8
4481 ; RV32ZVE32F-NEXT: .cfi_offset s2, -12
4482 ; RV32ZVE32F-NEXT: lw a2, 60(a0)
4483 ; RV32ZVE32F-NEXT: lw a3, 56(a0)
4484 ; RV32ZVE32F-NEXT: lw a4, 52(a0)
4485 ; RV32ZVE32F-NEXT: lw a5, 48(a0)
4486 ; RV32ZVE32F-NEXT: lw a6, 44(a0)
4487 ; RV32ZVE32F-NEXT: lw a7, 40(a0)
4488 ; RV32ZVE32F-NEXT: lw t0, 36(a0)
4489 ; RV32ZVE32F-NEXT: lw t1, 32(a0)
4490 ; RV32ZVE32F-NEXT: lw t2, 28(a0)
4491 ; RV32ZVE32F-NEXT: lw t3, 24(a0)
4492 ; RV32ZVE32F-NEXT: lw t4, 20(a0)
4493 ; RV32ZVE32F-NEXT: lw t5, 16(a0)
4494 ; RV32ZVE32F-NEXT: lw s0, 12(a0)
4495 ; RV32ZVE32F-NEXT: lw t6, 8(a0)
4496 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
4497 ; RV32ZVE32F-NEXT: vsext.vf2 v10, v8
4498 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3
4499 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
4500 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
4501 ; RV32ZVE32F-NEXT: vmv.x.s a1, v0
4502 ; RV32ZVE32F-NEXT: andi s1, a1, 1
4503 ; RV32ZVE32F-NEXT: bnez s1, .LBB46_10
4504 ; RV32ZVE32F-NEXT: # %bb.1: # %else
4505 ; RV32ZVE32F-NEXT: andi a0, a1, 2
4506 ; RV32ZVE32F-NEXT: bnez a0, .LBB46_11
4507 ; RV32ZVE32F-NEXT: .LBB46_2: # %else2
4508 ; RV32ZVE32F-NEXT: andi a0, a1, 4
4509 ; RV32ZVE32F-NEXT: bnez a0, .LBB46_12
4510 ; RV32ZVE32F-NEXT: .LBB46_3: # %else4
4511 ; RV32ZVE32F-NEXT: andi a0, a1, 8
4512 ; RV32ZVE32F-NEXT: bnez a0, .LBB46_13
4513 ; RV32ZVE32F-NEXT: .LBB46_4: # %else6
4514 ; RV32ZVE32F-NEXT: andi a0, a1, 16
4515 ; RV32ZVE32F-NEXT: bnez a0, .LBB46_14
4516 ; RV32ZVE32F-NEXT: .LBB46_5: # %else8
4517 ; RV32ZVE32F-NEXT: andi a0, a1, 32
4518 ; RV32ZVE32F-NEXT: bnez a0, .LBB46_15
4519 ; RV32ZVE32F-NEXT: .LBB46_6: # %else10
4520 ; RV32ZVE32F-NEXT: andi a0, a1, 64
4521 ; RV32ZVE32F-NEXT: bnez a0, .LBB46_16
4522 ; RV32ZVE32F-NEXT: .LBB46_7: # %else12
4523 ; RV32ZVE32F-NEXT: andi a0, a1, -128
4524 ; RV32ZVE32F-NEXT: beqz a0, .LBB46_9
4525 ; RV32ZVE32F-NEXT: .LBB46_8: # %cond.store13
4526 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
4527 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
4528 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
4529 ; RV32ZVE32F-NEXT: sw a3, 0(a0)
4530 ; RV32ZVE32F-NEXT: sw a2, 4(a0)
4531 ; RV32ZVE32F-NEXT: .LBB46_9: # %else14
4532 ; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
4533 ; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
4534 ; RV32ZVE32F-NEXT: lw s2, 4(sp) # 4-byte Folded Reload
4535 ; RV32ZVE32F-NEXT: addi sp, sp, 16
4536 ; RV32ZVE32F-NEXT: ret
4537 ; RV32ZVE32F-NEXT: .LBB46_10: # %cond.store
4538 ; RV32ZVE32F-NEXT: lw s1, 4(a0)
4539 ; RV32ZVE32F-NEXT: lw a0, 0(a0)
4540 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
4541 ; RV32ZVE32F-NEXT: vmv.x.s s2, v8
4542 ; RV32ZVE32F-NEXT: sw s1, 4(s2)
4543 ; RV32ZVE32F-NEXT: sw a0, 0(s2)
4544 ; RV32ZVE32F-NEXT: andi a0, a1, 2
4545 ; RV32ZVE32F-NEXT: beqz a0, .LBB46_2
4546 ; RV32ZVE32F-NEXT: .LBB46_11: # %cond.store1
4547 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
4548 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
4549 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
4550 ; RV32ZVE32F-NEXT: sw s0, 4(a0)
4551 ; RV32ZVE32F-NEXT: sw t6, 0(a0)
4552 ; RV32ZVE32F-NEXT: andi a0, a1, 4
4553 ; RV32ZVE32F-NEXT: beqz a0, .LBB46_3
4554 ; RV32ZVE32F-NEXT: .LBB46_12: # %cond.store3
4555 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
4556 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
4557 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
4558 ; RV32ZVE32F-NEXT: sw t5, 0(a0)
4559 ; RV32ZVE32F-NEXT: sw t4, 4(a0)
4560 ; RV32ZVE32F-NEXT: andi a0, a1, 8
4561 ; RV32ZVE32F-NEXT: beqz a0, .LBB46_4
4562 ; RV32ZVE32F-NEXT: .LBB46_13: # %cond.store5
4563 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
4564 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
4565 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
4566 ; RV32ZVE32F-NEXT: sw t3, 0(a0)
4567 ; RV32ZVE32F-NEXT: sw t2, 4(a0)
4568 ; RV32ZVE32F-NEXT: andi a0, a1, 16
4569 ; RV32ZVE32F-NEXT: beqz a0, .LBB46_5
4570 ; RV32ZVE32F-NEXT: .LBB46_14: # %cond.store7
4571 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
4572 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
4573 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
4574 ; RV32ZVE32F-NEXT: sw t1, 0(a0)
4575 ; RV32ZVE32F-NEXT: sw t0, 4(a0)
4576 ; RV32ZVE32F-NEXT: andi a0, a1, 32
4577 ; RV32ZVE32F-NEXT: beqz a0, .LBB46_6
4578 ; RV32ZVE32F-NEXT: .LBB46_15: # %cond.store9
4579 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
4580 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
4581 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
4582 ; RV32ZVE32F-NEXT: sw a7, 0(a0)
4583 ; RV32ZVE32F-NEXT: sw a6, 4(a0)
4584 ; RV32ZVE32F-NEXT: andi a0, a1, 64
4585 ; RV32ZVE32F-NEXT: beqz a0, .LBB46_7
4586 ; RV32ZVE32F-NEXT: .LBB46_16: # %cond.store11
4587 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
4588 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
4589 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
4590 ; RV32ZVE32F-NEXT: sw a5, 0(a0)
4591 ; RV32ZVE32F-NEXT: sw a4, 4(a0)
4592 ; RV32ZVE32F-NEXT: andi a0, a1, -128
4593 ; RV32ZVE32F-NEXT: bnez a0, .LBB46_8
4594 ; RV32ZVE32F-NEXT: j .LBB46_9
4596 ; RV64ZVE32F-LABEL: mscatter_baseidx_sext_v8i16_v8i64:
4597 ; RV64ZVE32F: # %bb.0:
4598 ; RV64ZVE32F-NEXT: ld a2, 56(a0)
4599 ; RV64ZVE32F-NEXT: ld a3, 48(a0)
4600 ; RV64ZVE32F-NEXT: ld a5, 40(a0)
4601 ; RV64ZVE32F-NEXT: ld a6, 32(a0)
4602 ; RV64ZVE32F-NEXT: ld a7, 24(a0)
4603 ; RV64ZVE32F-NEXT: ld t0, 16(a0)
4604 ; RV64ZVE32F-NEXT: ld t1, 8(a0)
4605 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
4606 ; RV64ZVE32F-NEXT: vmv.x.s a4, v0
4607 ; RV64ZVE32F-NEXT: andi t2, a4, 1
4608 ; RV64ZVE32F-NEXT: beqz t2, .LBB46_2
4609 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
4610 ; RV64ZVE32F-NEXT: ld a0, 0(a0)
4611 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
4612 ; RV64ZVE32F-NEXT: vmv.x.s t2, v8
4613 ; RV64ZVE32F-NEXT: slli t2, t2, 3
4614 ; RV64ZVE32F-NEXT: add t2, a1, t2
4615 ; RV64ZVE32F-NEXT: sd a0, 0(t2)
4616 ; RV64ZVE32F-NEXT: .LBB46_2: # %else
4617 ; RV64ZVE32F-NEXT: andi a0, a4, 2
4618 ; RV64ZVE32F-NEXT: beqz a0, .LBB46_4
4619 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
4620 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
4621 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
4622 ; RV64ZVE32F-NEXT: vmv.x.s a0, v9
4623 ; RV64ZVE32F-NEXT: slli a0, a0, 3
4624 ; RV64ZVE32F-NEXT: add a0, a1, a0
4625 ; RV64ZVE32F-NEXT: sd t1, 0(a0)
4626 ; RV64ZVE32F-NEXT: .LBB46_4: # %else2
4627 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
4628 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
4629 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
4630 ; RV64ZVE32F-NEXT: andi a0, a4, 4
4631 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
4632 ; RV64ZVE32F-NEXT: bnez a0, .LBB46_12
4633 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
4634 ; RV64ZVE32F-NEXT: andi a0, a4, 8
4635 ; RV64ZVE32F-NEXT: bnez a0, .LBB46_13
4636 ; RV64ZVE32F-NEXT: .LBB46_6: # %else6
4637 ; RV64ZVE32F-NEXT: andi a0, a4, 16
4638 ; RV64ZVE32F-NEXT: bnez a0, .LBB46_14
4639 ; RV64ZVE32F-NEXT: .LBB46_7: # %else8
4640 ; RV64ZVE32F-NEXT: andi a0, a4, 32
4641 ; RV64ZVE32F-NEXT: beqz a0, .LBB46_9
4642 ; RV64ZVE32F-NEXT: .LBB46_8: # %cond.store9
4643 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
4644 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
4645 ; RV64ZVE32F-NEXT: slli a0, a0, 3
4646 ; RV64ZVE32F-NEXT: add a0, a1, a0
4647 ; RV64ZVE32F-NEXT: sd a5, 0(a0)
4648 ; RV64ZVE32F-NEXT: .LBB46_9: # %else10
4649 ; RV64ZVE32F-NEXT: andi a0, a4, 64
4650 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
4651 ; RV64ZVE32F-NEXT: bnez a0, .LBB46_15
4652 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
4653 ; RV64ZVE32F-NEXT: andi a0, a4, -128
4654 ; RV64ZVE32F-NEXT: bnez a0, .LBB46_16
4655 ; RV64ZVE32F-NEXT: .LBB46_11: # %else14
4656 ; RV64ZVE32F-NEXT: ret
4657 ; RV64ZVE32F-NEXT: .LBB46_12: # %cond.store3
4658 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
4659 ; RV64ZVE32F-NEXT: slli a0, a0, 3
4660 ; RV64ZVE32F-NEXT: add a0, a1, a0
4661 ; RV64ZVE32F-NEXT: sd t0, 0(a0)
4662 ; RV64ZVE32F-NEXT: andi a0, a4, 8
4663 ; RV64ZVE32F-NEXT: beqz a0, .LBB46_6
4664 ; RV64ZVE32F-NEXT: .LBB46_13: # %cond.store5
4665 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
4666 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
4667 ; RV64ZVE32F-NEXT: slli a0, a0, 3
4668 ; RV64ZVE32F-NEXT: add a0, a1, a0
4669 ; RV64ZVE32F-NEXT: sd a7, 0(a0)
4670 ; RV64ZVE32F-NEXT: andi a0, a4, 16
4671 ; RV64ZVE32F-NEXT: beqz a0, .LBB46_7
4672 ; RV64ZVE32F-NEXT: .LBB46_14: # %cond.store7
4673 ; RV64ZVE32F-NEXT: vmv.x.s a0, v9
4674 ; RV64ZVE32F-NEXT: slli a0, a0, 3
4675 ; RV64ZVE32F-NEXT: add a0, a1, a0
4676 ; RV64ZVE32F-NEXT: sd a6, 0(a0)
4677 ; RV64ZVE32F-NEXT: andi a0, a4, 32
4678 ; RV64ZVE32F-NEXT: bnez a0, .LBB46_8
4679 ; RV64ZVE32F-NEXT: j .LBB46_9
4680 ; RV64ZVE32F-NEXT: .LBB46_15: # %cond.store11
4681 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
4682 ; RV64ZVE32F-NEXT: slli a0, a0, 3
4683 ; RV64ZVE32F-NEXT: add a0, a1, a0
4684 ; RV64ZVE32F-NEXT: sd a3, 0(a0)
4685 ; RV64ZVE32F-NEXT: andi a0, a4, -128
4686 ; RV64ZVE32F-NEXT: beqz a0, .LBB46_11
4687 ; RV64ZVE32F-NEXT: .LBB46_16: # %cond.store13
4688 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
4689 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
4690 ; RV64ZVE32F-NEXT: slli a0, a0, 3
4691 ; RV64ZVE32F-NEXT: add a0, a1, a0
4692 ; RV64ZVE32F-NEXT: sd a2, 0(a0)
4693 ; RV64ZVE32F-NEXT: ret
4694 %eidxs = sext <8 x i16> %idxs to <8 x i64>
4695 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs
4696 call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m)
4700 define void @mscatter_baseidx_zext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m) {
4701 ; RV32V-LABEL: mscatter_baseidx_zext_v8i16_v8i64:
4703 ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
4704 ; RV32V-NEXT: vzext.vf2 v14, v12
4705 ; RV32V-NEXT: vsll.vi v12, v14, 3
4706 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
4707 ; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t
4710 ; RV64-LABEL: mscatter_baseidx_zext_v8i16_v8i64:
4712 ; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma
4713 ; RV64-NEXT: vzext.vf2 v14, v12
4714 ; RV64-NEXT: vsll.vi v12, v14, 3
4715 ; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, ma
4716 ; RV64-NEXT: vsoxei32.v v8, (a0), v12, v0.t
4719 ; RV32ZVE32F-LABEL: mscatter_baseidx_zext_v8i16_v8i64:
4720 ; RV32ZVE32F: # %bb.0:
4721 ; RV32ZVE32F-NEXT: addi sp, sp, -16
4722 ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
4723 ; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
4724 ; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
4725 ; RV32ZVE32F-NEXT: sw s2, 4(sp) # 4-byte Folded Spill
4726 ; RV32ZVE32F-NEXT: .cfi_offset s0, -4
4727 ; RV32ZVE32F-NEXT: .cfi_offset s1, -8
4728 ; RV32ZVE32F-NEXT: .cfi_offset s2, -12
4729 ; RV32ZVE32F-NEXT: lw a2, 60(a0)
4730 ; RV32ZVE32F-NEXT: lw a3, 56(a0)
4731 ; RV32ZVE32F-NEXT: lw a4, 52(a0)
4732 ; RV32ZVE32F-NEXT: lw a5, 48(a0)
4733 ; RV32ZVE32F-NEXT: lw a6, 44(a0)
4734 ; RV32ZVE32F-NEXT: lw a7, 40(a0)
4735 ; RV32ZVE32F-NEXT: lw t0, 36(a0)
4736 ; RV32ZVE32F-NEXT: lw t1, 32(a0)
4737 ; RV32ZVE32F-NEXT: lw t2, 28(a0)
4738 ; RV32ZVE32F-NEXT: lw t3, 24(a0)
4739 ; RV32ZVE32F-NEXT: lw t4, 20(a0)
4740 ; RV32ZVE32F-NEXT: lw t5, 16(a0)
4741 ; RV32ZVE32F-NEXT: lw s0, 12(a0)
4742 ; RV32ZVE32F-NEXT: lw t6, 8(a0)
4743 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
4744 ; RV32ZVE32F-NEXT: vzext.vf2 v10, v8
4745 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3
4746 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
4747 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
4748 ; RV32ZVE32F-NEXT: vmv.x.s a1, v0
4749 ; RV32ZVE32F-NEXT: andi s1, a1, 1
4750 ; RV32ZVE32F-NEXT: bnez s1, .LBB47_10
4751 ; RV32ZVE32F-NEXT: # %bb.1: # %else
4752 ; RV32ZVE32F-NEXT: andi a0, a1, 2
4753 ; RV32ZVE32F-NEXT: bnez a0, .LBB47_11
4754 ; RV32ZVE32F-NEXT: .LBB47_2: # %else2
4755 ; RV32ZVE32F-NEXT: andi a0, a1, 4
4756 ; RV32ZVE32F-NEXT: bnez a0, .LBB47_12
4757 ; RV32ZVE32F-NEXT: .LBB47_3: # %else4
4758 ; RV32ZVE32F-NEXT: andi a0, a1, 8
4759 ; RV32ZVE32F-NEXT: bnez a0, .LBB47_13
4760 ; RV32ZVE32F-NEXT: .LBB47_4: # %else6
4761 ; RV32ZVE32F-NEXT: andi a0, a1, 16
4762 ; RV32ZVE32F-NEXT: bnez a0, .LBB47_14
4763 ; RV32ZVE32F-NEXT: .LBB47_5: # %else8
4764 ; RV32ZVE32F-NEXT: andi a0, a1, 32
4765 ; RV32ZVE32F-NEXT: bnez a0, .LBB47_15
4766 ; RV32ZVE32F-NEXT: .LBB47_6: # %else10
4767 ; RV32ZVE32F-NEXT: andi a0, a1, 64
4768 ; RV32ZVE32F-NEXT: bnez a0, .LBB47_16
4769 ; RV32ZVE32F-NEXT: .LBB47_7: # %else12
4770 ; RV32ZVE32F-NEXT: andi a0, a1, -128
4771 ; RV32ZVE32F-NEXT: beqz a0, .LBB47_9
4772 ; RV32ZVE32F-NEXT: .LBB47_8: # %cond.store13
4773 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
4774 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
4775 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
4776 ; RV32ZVE32F-NEXT: sw a3, 0(a0)
4777 ; RV32ZVE32F-NEXT: sw a2, 4(a0)
4778 ; RV32ZVE32F-NEXT: .LBB47_9: # %else14
4779 ; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
4780 ; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
4781 ; RV32ZVE32F-NEXT: lw s2, 4(sp) # 4-byte Folded Reload
4782 ; RV32ZVE32F-NEXT: addi sp, sp, 16
4783 ; RV32ZVE32F-NEXT: ret
4784 ; RV32ZVE32F-NEXT: .LBB47_10: # %cond.store
4785 ; RV32ZVE32F-NEXT: lw s1, 4(a0)
4786 ; RV32ZVE32F-NEXT: lw a0, 0(a0)
4787 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
4788 ; RV32ZVE32F-NEXT: vmv.x.s s2, v8
4789 ; RV32ZVE32F-NEXT: sw s1, 4(s2)
4790 ; RV32ZVE32F-NEXT: sw a0, 0(s2)
4791 ; RV32ZVE32F-NEXT: andi a0, a1, 2
4792 ; RV32ZVE32F-NEXT: beqz a0, .LBB47_2
4793 ; RV32ZVE32F-NEXT: .LBB47_11: # %cond.store1
4794 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
4795 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
4796 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
4797 ; RV32ZVE32F-NEXT: sw s0, 4(a0)
4798 ; RV32ZVE32F-NEXT: sw t6, 0(a0)
4799 ; RV32ZVE32F-NEXT: andi a0, a1, 4
4800 ; RV32ZVE32F-NEXT: beqz a0, .LBB47_3
4801 ; RV32ZVE32F-NEXT: .LBB47_12: # %cond.store3
4802 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
4803 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
4804 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
4805 ; RV32ZVE32F-NEXT: sw t5, 0(a0)
4806 ; RV32ZVE32F-NEXT: sw t4, 4(a0)
4807 ; RV32ZVE32F-NEXT: andi a0, a1, 8
4808 ; RV32ZVE32F-NEXT: beqz a0, .LBB47_4
4809 ; RV32ZVE32F-NEXT: .LBB47_13: # %cond.store5
4810 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
4811 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
4812 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
4813 ; RV32ZVE32F-NEXT: sw t3, 0(a0)
4814 ; RV32ZVE32F-NEXT: sw t2, 4(a0)
4815 ; RV32ZVE32F-NEXT: andi a0, a1, 16
4816 ; RV32ZVE32F-NEXT: beqz a0, .LBB47_5
4817 ; RV32ZVE32F-NEXT: .LBB47_14: # %cond.store7
4818 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
4819 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
4820 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
4821 ; RV32ZVE32F-NEXT: sw t1, 0(a0)
4822 ; RV32ZVE32F-NEXT: sw t0, 4(a0)
4823 ; RV32ZVE32F-NEXT: andi a0, a1, 32
4824 ; RV32ZVE32F-NEXT: beqz a0, .LBB47_6
4825 ; RV32ZVE32F-NEXT: .LBB47_15: # %cond.store9
4826 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
4827 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
4828 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
4829 ; RV32ZVE32F-NEXT: sw a7, 0(a0)
4830 ; RV32ZVE32F-NEXT: sw a6, 4(a0)
4831 ; RV32ZVE32F-NEXT: andi a0, a1, 64
4832 ; RV32ZVE32F-NEXT: beqz a0, .LBB47_7
4833 ; RV32ZVE32F-NEXT: .LBB47_16: # %cond.store11
4834 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
4835 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
4836 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
4837 ; RV32ZVE32F-NEXT: sw a5, 0(a0)
4838 ; RV32ZVE32F-NEXT: sw a4, 4(a0)
4839 ; RV32ZVE32F-NEXT: andi a0, a1, -128
4840 ; RV32ZVE32F-NEXT: bnez a0, .LBB47_8
4841 ; RV32ZVE32F-NEXT: j .LBB47_9
4843 ; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i16_v8i64:
4844 ; RV64ZVE32F: # %bb.0:
4845 ; RV64ZVE32F-NEXT: ld a2, 56(a0)
4846 ; RV64ZVE32F-NEXT: ld a3, 48(a0)
4847 ; RV64ZVE32F-NEXT: ld a6, 40(a0)
4848 ; RV64ZVE32F-NEXT: ld a7, 32(a0)
4849 ; RV64ZVE32F-NEXT: ld t0, 24(a0)
4850 ; RV64ZVE32F-NEXT: ld t1, 16(a0)
4851 ; RV64ZVE32F-NEXT: ld t2, 8(a0)
4852 ; RV64ZVE32F-NEXT: lui a4, 16
4853 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
4854 ; RV64ZVE32F-NEXT: vmv.x.s a5, v0
4855 ; RV64ZVE32F-NEXT: andi t3, a5, 1
4856 ; RV64ZVE32F-NEXT: addiw a4, a4, -1
4857 ; RV64ZVE32F-NEXT: beqz t3, .LBB47_2
4858 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
4859 ; RV64ZVE32F-NEXT: ld a0, 0(a0)
4860 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
4861 ; RV64ZVE32F-NEXT: vmv.x.s t3, v8
4862 ; RV64ZVE32F-NEXT: and t3, t3, a4
4863 ; RV64ZVE32F-NEXT: slli t3, t3, 3
4864 ; RV64ZVE32F-NEXT: add t3, a1, t3
4865 ; RV64ZVE32F-NEXT: sd a0, 0(t3)
4866 ; RV64ZVE32F-NEXT: .LBB47_2: # %else
4867 ; RV64ZVE32F-NEXT: andi a0, a5, 2
4868 ; RV64ZVE32F-NEXT: beqz a0, .LBB47_4
4869 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
4870 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
4871 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
4872 ; RV64ZVE32F-NEXT: vmv.x.s a0, v9
4873 ; RV64ZVE32F-NEXT: and a0, a0, a4
4874 ; RV64ZVE32F-NEXT: slli a0, a0, 3
4875 ; RV64ZVE32F-NEXT: add a0, a1, a0
4876 ; RV64ZVE32F-NEXT: sd t2, 0(a0)
4877 ; RV64ZVE32F-NEXT: .LBB47_4: # %else2
4878 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
4879 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
4880 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
4881 ; RV64ZVE32F-NEXT: andi a0, a5, 4
4882 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
4883 ; RV64ZVE32F-NEXT: bnez a0, .LBB47_12
4884 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
4885 ; RV64ZVE32F-NEXT: andi a0, a5, 8
4886 ; RV64ZVE32F-NEXT: bnez a0, .LBB47_13
4887 ; RV64ZVE32F-NEXT: .LBB47_6: # %else6
4888 ; RV64ZVE32F-NEXT: andi a0, a5, 16
4889 ; RV64ZVE32F-NEXT: bnez a0, .LBB47_14
4890 ; RV64ZVE32F-NEXT: .LBB47_7: # %else8
4891 ; RV64ZVE32F-NEXT: andi a0, a5, 32
4892 ; RV64ZVE32F-NEXT: beqz a0, .LBB47_9
4893 ; RV64ZVE32F-NEXT: .LBB47_8: # %cond.store9
4894 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
4895 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
4896 ; RV64ZVE32F-NEXT: and a0, a0, a4
4897 ; RV64ZVE32F-NEXT: slli a0, a0, 3
4898 ; RV64ZVE32F-NEXT: add a0, a1, a0
4899 ; RV64ZVE32F-NEXT: sd a6, 0(a0)
4900 ; RV64ZVE32F-NEXT: .LBB47_9: # %else10
4901 ; RV64ZVE32F-NEXT: andi a0, a5, 64
4902 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
4903 ; RV64ZVE32F-NEXT: bnez a0, .LBB47_15
4904 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
4905 ; RV64ZVE32F-NEXT: andi a0, a5, -128
4906 ; RV64ZVE32F-NEXT: bnez a0, .LBB47_16
4907 ; RV64ZVE32F-NEXT: .LBB47_11: # %else14
4908 ; RV64ZVE32F-NEXT: ret
4909 ; RV64ZVE32F-NEXT: .LBB47_12: # %cond.store3
4910 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
4911 ; RV64ZVE32F-NEXT: and a0, a0, a4
4912 ; RV64ZVE32F-NEXT: slli a0, a0, 3
4913 ; RV64ZVE32F-NEXT: add a0, a1, a0
4914 ; RV64ZVE32F-NEXT: sd t1, 0(a0)
4915 ; RV64ZVE32F-NEXT: andi a0, a5, 8
4916 ; RV64ZVE32F-NEXT: beqz a0, .LBB47_6
4917 ; RV64ZVE32F-NEXT: .LBB47_13: # %cond.store5
4918 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
4919 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
4920 ; RV64ZVE32F-NEXT: and a0, a0, a4
4921 ; RV64ZVE32F-NEXT: slli a0, a0, 3
4922 ; RV64ZVE32F-NEXT: add a0, a1, a0
4923 ; RV64ZVE32F-NEXT: sd t0, 0(a0)
4924 ; RV64ZVE32F-NEXT: andi a0, a5, 16
4925 ; RV64ZVE32F-NEXT: beqz a0, .LBB47_7
4926 ; RV64ZVE32F-NEXT: .LBB47_14: # %cond.store7
4927 ; RV64ZVE32F-NEXT: vmv.x.s a0, v9
4928 ; RV64ZVE32F-NEXT: and a0, a0, a4
4929 ; RV64ZVE32F-NEXT: slli a0, a0, 3
4930 ; RV64ZVE32F-NEXT: add a0, a1, a0
4931 ; RV64ZVE32F-NEXT: sd a7, 0(a0)
4932 ; RV64ZVE32F-NEXT: andi a0, a5, 32
4933 ; RV64ZVE32F-NEXT: bnez a0, .LBB47_8
4934 ; RV64ZVE32F-NEXT: j .LBB47_9
4935 ; RV64ZVE32F-NEXT: .LBB47_15: # %cond.store11
4936 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
4937 ; RV64ZVE32F-NEXT: and a0, a0, a4
4938 ; RV64ZVE32F-NEXT: slli a0, a0, 3
4939 ; RV64ZVE32F-NEXT: add a0, a1, a0
4940 ; RV64ZVE32F-NEXT: sd a3, 0(a0)
4941 ; RV64ZVE32F-NEXT: andi a0, a5, -128
4942 ; RV64ZVE32F-NEXT: beqz a0, .LBB47_11
4943 ; RV64ZVE32F-NEXT: .LBB47_16: # %cond.store13
4944 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
4945 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
4946 ; RV64ZVE32F-NEXT: and a0, a0, a4
4947 ; RV64ZVE32F-NEXT: slli a0, a0, 3
4948 ; RV64ZVE32F-NEXT: add a0, a1, a0
4949 ; RV64ZVE32F-NEXT: sd a2, 0(a0)
4950 ; RV64ZVE32F-NEXT: ret
4951 %eidxs = zext <8 x i16> %idxs to <8 x i64>
4952 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs
4953 call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m)
4957 define void @mscatter_baseidx_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i32> %idxs, <8 x i1> %m) {
4958 ; RV32V-LABEL: mscatter_baseidx_v8i32_v8i64:
4960 ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
4961 ; RV32V-NEXT: vsll.vi v12, v12, 3
4962 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
4963 ; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t
4966 ; RV64-LABEL: mscatter_baseidx_v8i32_v8i64:
4968 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
4969 ; RV64-NEXT: vsext.vf2 v16, v12
4970 ; RV64-NEXT: vsll.vi v12, v16, 3
4971 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
4974 ; RV32ZVE32F-LABEL: mscatter_baseidx_v8i32_v8i64:
4975 ; RV32ZVE32F: # %bb.0:
4976 ; RV32ZVE32F-NEXT: addi sp, sp, -16
4977 ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
4978 ; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
4979 ; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
4980 ; RV32ZVE32F-NEXT: sw s2, 4(sp) # 4-byte Folded Spill
4981 ; RV32ZVE32F-NEXT: .cfi_offset s0, -4
4982 ; RV32ZVE32F-NEXT: .cfi_offset s1, -8
4983 ; RV32ZVE32F-NEXT: .cfi_offset s2, -12
4984 ; RV32ZVE32F-NEXT: lw a2, 60(a0)
4985 ; RV32ZVE32F-NEXT: lw a3, 56(a0)
4986 ; RV32ZVE32F-NEXT: lw a4, 52(a0)
4987 ; RV32ZVE32F-NEXT: lw a5, 48(a0)
4988 ; RV32ZVE32F-NEXT: lw a6, 44(a0)
4989 ; RV32ZVE32F-NEXT: lw a7, 40(a0)
4990 ; RV32ZVE32F-NEXT: lw t0, 36(a0)
4991 ; RV32ZVE32F-NEXT: lw t1, 32(a0)
4992 ; RV32ZVE32F-NEXT: lw t2, 28(a0)
4993 ; RV32ZVE32F-NEXT: lw t3, 24(a0)
4994 ; RV32ZVE32F-NEXT: lw t4, 20(a0)
4995 ; RV32ZVE32F-NEXT: lw t5, 16(a0)
4996 ; RV32ZVE32F-NEXT: lw s0, 12(a0)
4997 ; RV32ZVE32F-NEXT: lw t6, 8(a0)
4998 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
4999 ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3
5000 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
5001 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
5002 ; RV32ZVE32F-NEXT: vmv.x.s a1, v0
5003 ; RV32ZVE32F-NEXT: andi s1, a1, 1
5004 ; RV32ZVE32F-NEXT: bnez s1, .LBB48_10
5005 ; RV32ZVE32F-NEXT: # %bb.1: # %else
5006 ; RV32ZVE32F-NEXT: andi a0, a1, 2
5007 ; RV32ZVE32F-NEXT: bnez a0, .LBB48_11
5008 ; RV32ZVE32F-NEXT: .LBB48_2: # %else2
5009 ; RV32ZVE32F-NEXT: andi a0, a1, 4
5010 ; RV32ZVE32F-NEXT: bnez a0, .LBB48_12
5011 ; RV32ZVE32F-NEXT: .LBB48_3: # %else4
5012 ; RV32ZVE32F-NEXT: andi a0, a1, 8
5013 ; RV32ZVE32F-NEXT: bnez a0, .LBB48_13
5014 ; RV32ZVE32F-NEXT: .LBB48_4: # %else6
5015 ; RV32ZVE32F-NEXT: andi a0, a1, 16
5016 ; RV32ZVE32F-NEXT: bnez a0, .LBB48_14
5017 ; RV32ZVE32F-NEXT: .LBB48_5: # %else8
5018 ; RV32ZVE32F-NEXT: andi a0, a1, 32
5019 ; RV32ZVE32F-NEXT: bnez a0, .LBB48_15
5020 ; RV32ZVE32F-NEXT: .LBB48_6: # %else10
5021 ; RV32ZVE32F-NEXT: andi a0, a1, 64
5022 ; RV32ZVE32F-NEXT: bnez a0, .LBB48_16
5023 ; RV32ZVE32F-NEXT: .LBB48_7: # %else12
5024 ; RV32ZVE32F-NEXT: andi a0, a1, -128
5025 ; RV32ZVE32F-NEXT: beqz a0, .LBB48_9
5026 ; RV32ZVE32F-NEXT: .LBB48_8: # %cond.store13
5027 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
5028 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
5029 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
5030 ; RV32ZVE32F-NEXT: sw a3, 0(a0)
5031 ; RV32ZVE32F-NEXT: sw a2, 4(a0)
5032 ; RV32ZVE32F-NEXT: .LBB48_9: # %else14
5033 ; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
5034 ; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
5035 ; RV32ZVE32F-NEXT: lw s2, 4(sp) # 4-byte Folded Reload
5036 ; RV32ZVE32F-NEXT: addi sp, sp, 16
5037 ; RV32ZVE32F-NEXT: ret
5038 ; RV32ZVE32F-NEXT: .LBB48_10: # %cond.store
5039 ; RV32ZVE32F-NEXT: lw s1, 4(a0)
5040 ; RV32ZVE32F-NEXT: lw a0, 0(a0)
5041 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
5042 ; RV32ZVE32F-NEXT: vmv.x.s s2, v8
5043 ; RV32ZVE32F-NEXT: sw s1, 4(s2)
5044 ; RV32ZVE32F-NEXT: sw a0, 0(s2)
5045 ; RV32ZVE32F-NEXT: andi a0, a1, 2
5046 ; RV32ZVE32F-NEXT: beqz a0, .LBB48_2
5047 ; RV32ZVE32F-NEXT: .LBB48_11: # %cond.store1
5048 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
5049 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
5050 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
5051 ; RV32ZVE32F-NEXT: sw s0, 4(a0)
5052 ; RV32ZVE32F-NEXT: sw t6, 0(a0)
5053 ; RV32ZVE32F-NEXT: andi a0, a1, 4
5054 ; RV32ZVE32F-NEXT: beqz a0, .LBB48_3
5055 ; RV32ZVE32F-NEXT: .LBB48_12: # %cond.store3
5056 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
5057 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
5058 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
5059 ; RV32ZVE32F-NEXT: sw t5, 0(a0)
5060 ; RV32ZVE32F-NEXT: sw t4, 4(a0)
5061 ; RV32ZVE32F-NEXT: andi a0, a1, 8
5062 ; RV32ZVE32F-NEXT: beqz a0, .LBB48_4
5063 ; RV32ZVE32F-NEXT: .LBB48_13: # %cond.store5
5064 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
5065 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
5066 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
5067 ; RV32ZVE32F-NEXT: sw t3, 0(a0)
5068 ; RV32ZVE32F-NEXT: sw t2, 4(a0)
5069 ; RV32ZVE32F-NEXT: andi a0, a1, 16
5070 ; RV32ZVE32F-NEXT: beqz a0, .LBB48_5
5071 ; RV32ZVE32F-NEXT: .LBB48_14: # %cond.store7
5072 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
5073 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
5074 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
5075 ; RV32ZVE32F-NEXT: sw t1, 0(a0)
5076 ; RV32ZVE32F-NEXT: sw t0, 4(a0)
5077 ; RV32ZVE32F-NEXT: andi a0, a1, 32
5078 ; RV32ZVE32F-NEXT: beqz a0, .LBB48_6
5079 ; RV32ZVE32F-NEXT: .LBB48_15: # %cond.store9
5080 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
5081 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
5082 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
5083 ; RV32ZVE32F-NEXT: sw a7, 0(a0)
5084 ; RV32ZVE32F-NEXT: sw a6, 4(a0)
5085 ; RV32ZVE32F-NEXT: andi a0, a1, 64
5086 ; RV32ZVE32F-NEXT: beqz a0, .LBB48_7
5087 ; RV32ZVE32F-NEXT: .LBB48_16: # %cond.store11
5088 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
5089 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
5090 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
5091 ; RV32ZVE32F-NEXT: sw a5, 0(a0)
5092 ; RV32ZVE32F-NEXT: sw a4, 4(a0)
5093 ; RV32ZVE32F-NEXT: andi a0, a1, -128
5094 ; RV32ZVE32F-NEXT: bnez a0, .LBB48_8
5095 ; RV32ZVE32F-NEXT: j .LBB48_9
5097 ; RV64ZVE32F-LABEL: mscatter_baseidx_v8i32_v8i64:
5098 ; RV64ZVE32F: # %bb.0:
5099 ; RV64ZVE32F-NEXT: ld a2, 56(a0)
5100 ; RV64ZVE32F-NEXT: ld a3, 48(a0)
5101 ; RV64ZVE32F-NEXT: ld a5, 40(a0)
5102 ; RV64ZVE32F-NEXT: ld a6, 32(a0)
5103 ; RV64ZVE32F-NEXT: ld a7, 24(a0)
5104 ; RV64ZVE32F-NEXT: ld t0, 16(a0)
5105 ; RV64ZVE32F-NEXT: ld t1, 8(a0)
5106 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
5107 ; RV64ZVE32F-NEXT: vmv.x.s a4, v0
5108 ; RV64ZVE32F-NEXT: andi t2, a4, 1
5109 ; RV64ZVE32F-NEXT: beqz t2, .LBB48_2
5110 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
5111 ; RV64ZVE32F-NEXT: ld a0, 0(a0)
5112 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
5113 ; RV64ZVE32F-NEXT: vmv.x.s t2, v8
5114 ; RV64ZVE32F-NEXT: slli t2, t2, 3
5115 ; RV64ZVE32F-NEXT: add t2, a1, t2
5116 ; RV64ZVE32F-NEXT: sd a0, 0(t2)
5117 ; RV64ZVE32F-NEXT: .LBB48_2: # %else
5118 ; RV64ZVE32F-NEXT: andi a0, a4, 2
5119 ; RV64ZVE32F-NEXT: beqz a0, .LBB48_4
5120 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
5121 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
5122 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
5123 ; RV64ZVE32F-NEXT: vmv.x.s a0, v10
5124 ; RV64ZVE32F-NEXT: slli a0, a0, 3
5125 ; RV64ZVE32F-NEXT: add a0, a1, a0
5126 ; RV64ZVE32F-NEXT: sd t1, 0(a0)
5127 ; RV64ZVE32F-NEXT: .LBB48_4: # %else2
5128 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
5129 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
5130 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
5131 ; RV64ZVE32F-NEXT: andi a0, a4, 4
5132 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
5133 ; RV64ZVE32F-NEXT: bnez a0, .LBB48_12
5134 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
5135 ; RV64ZVE32F-NEXT: andi a0, a4, 8
5136 ; RV64ZVE32F-NEXT: bnez a0, .LBB48_13
5137 ; RV64ZVE32F-NEXT: .LBB48_6: # %else6
5138 ; RV64ZVE32F-NEXT: andi a0, a4, 16
5139 ; RV64ZVE32F-NEXT: bnez a0, .LBB48_14
5140 ; RV64ZVE32F-NEXT: .LBB48_7: # %else8
5141 ; RV64ZVE32F-NEXT: andi a0, a4, 32
5142 ; RV64ZVE32F-NEXT: beqz a0, .LBB48_9
5143 ; RV64ZVE32F-NEXT: .LBB48_8: # %cond.store9
5144 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
5145 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
5146 ; RV64ZVE32F-NEXT: slli a0, a0, 3
5147 ; RV64ZVE32F-NEXT: add a0, a1, a0
5148 ; RV64ZVE32F-NEXT: sd a5, 0(a0)
5149 ; RV64ZVE32F-NEXT: .LBB48_9: # %else10
5150 ; RV64ZVE32F-NEXT: andi a0, a4, 64
5151 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
5152 ; RV64ZVE32F-NEXT: bnez a0, .LBB48_15
5153 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
5154 ; RV64ZVE32F-NEXT: andi a0, a4, -128
5155 ; RV64ZVE32F-NEXT: bnez a0, .LBB48_16
5156 ; RV64ZVE32F-NEXT: .LBB48_11: # %else14
5157 ; RV64ZVE32F-NEXT: ret
5158 ; RV64ZVE32F-NEXT: .LBB48_12: # %cond.store3
5159 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
5160 ; RV64ZVE32F-NEXT: slli a0, a0, 3
5161 ; RV64ZVE32F-NEXT: add a0, a1, a0
5162 ; RV64ZVE32F-NEXT: sd t0, 0(a0)
5163 ; RV64ZVE32F-NEXT: andi a0, a4, 8
5164 ; RV64ZVE32F-NEXT: beqz a0, .LBB48_6
5165 ; RV64ZVE32F-NEXT: .LBB48_13: # %cond.store5
5166 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
5167 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
5168 ; RV64ZVE32F-NEXT: slli a0, a0, 3
5169 ; RV64ZVE32F-NEXT: add a0, a1, a0
5170 ; RV64ZVE32F-NEXT: sd a7, 0(a0)
5171 ; RV64ZVE32F-NEXT: andi a0, a4, 16
5172 ; RV64ZVE32F-NEXT: beqz a0, .LBB48_7
5173 ; RV64ZVE32F-NEXT: .LBB48_14: # %cond.store7
5174 ; RV64ZVE32F-NEXT: vmv.x.s a0, v10
5175 ; RV64ZVE32F-NEXT: slli a0, a0, 3
5176 ; RV64ZVE32F-NEXT: add a0, a1, a0
5177 ; RV64ZVE32F-NEXT: sd a6, 0(a0)
5178 ; RV64ZVE32F-NEXT: andi a0, a4, 32
5179 ; RV64ZVE32F-NEXT: bnez a0, .LBB48_8
5180 ; RV64ZVE32F-NEXT: j .LBB48_9
5181 ; RV64ZVE32F-NEXT: .LBB48_15: # %cond.store11
5182 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
5183 ; RV64ZVE32F-NEXT: slli a0, a0, 3
5184 ; RV64ZVE32F-NEXT: add a0, a1, a0
5185 ; RV64ZVE32F-NEXT: sd a3, 0(a0)
5186 ; RV64ZVE32F-NEXT: andi a0, a4, -128
5187 ; RV64ZVE32F-NEXT: beqz a0, .LBB48_11
5188 ; RV64ZVE32F-NEXT: .LBB48_16: # %cond.store13
5189 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
5190 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
5191 ; RV64ZVE32F-NEXT: slli a0, a0, 3
5192 ; RV64ZVE32F-NEXT: add a0, a1, a0
5193 ; RV64ZVE32F-NEXT: sd a2, 0(a0)
5194 ; RV64ZVE32F-NEXT: ret
5195 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i32> %idxs
5196 call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m)
5200 define void @mscatter_baseidx_sext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i32> %idxs, <8 x i1> %m) {
5201 ; RV32V-LABEL: mscatter_baseidx_sext_v8i32_v8i64:
5203 ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
5204 ; RV32V-NEXT: vsll.vi v12, v12, 3
5205 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
5206 ; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t
5209 ; RV64-LABEL: mscatter_baseidx_sext_v8i32_v8i64:
5211 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
5212 ; RV64-NEXT: vsext.vf2 v16, v12
5213 ; RV64-NEXT: vsll.vi v12, v16, 3
5214 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
5217 ; RV32ZVE32F-LABEL: mscatter_baseidx_sext_v8i32_v8i64:
5218 ; RV32ZVE32F: # %bb.0:
5219 ; RV32ZVE32F-NEXT: addi sp, sp, -16
5220 ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
5221 ; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
5222 ; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
5223 ; RV32ZVE32F-NEXT: sw s2, 4(sp) # 4-byte Folded Spill
5224 ; RV32ZVE32F-NEXT: .cfi_offset s0, -4
5225 ; RV32ZVE32F-NEXT: .cfi_offset s1, -8
5226 ; RV32ZVE32F-NEXT: .cfi_offset s2, -12
5227 ; RV32ZVE32F-NEXT: lw a2, 60(a0)
5228 ; RV32ZVE32F-NEXT: lw a3, 56(a0)
5229 ; RV32ZVE32F-NEXT: lw a4, 52(a0)
5230 ; RV32ZVE32F-NEXT: lw a5, 48(a0)
5231 ; RV32ZVE32F-NEXT: lw a6, 44(a0)
5232 ; RV32ZVE32F-NEXT: lw a7, 40(a0)
5233 ; RV32ZVE32F-NEXT: lw t0, 36(a0)
5234 ; RV32ZVE32F-NEXT: lw t1, 32(a0)
5235 ; RV32ZVE32F-NEXT: lw t2, 28(a0)
5236 ; RV32ZVE32F-NEXT: lw t3, 24(a0)
5237 ; RV32ZVE32F-NEXT: lw t4, 20(a0)
5238 ; RV32ZVE32F-NEXT: lw t5, 16(a0)
5239 ; RV32ZVE32F-NEXT: lw s0, 12(a0)
5240 ; RV32ZVE32F-NEXT: lw t6, 8(a0)
5241 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
5242 ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3
5243 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
5244 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
5245 ; RV32ZVE32F-NEXT: vmv.x.s a1, v0
5246 ; RV32ZVE32F-NEXT: andi s1, a1, 1
5247 ; RV32ZVE32F-NEXT: bnez s1, .LBB49_10
5248 ; RV32ZVE32F-NEXT: # %bb.1: # %else
5249 ; RV32ZVE32F-NEXT: andi a0, a1, 2
5250 ; RV32ZVE32F-NEXT: bnez a0, .LBB49_11
5251 ; RV32ZVE32F-NEXT: .LBB49_2: # %else2
5252 ; RV32ZVE32F-NEXT: andi a0, a1, 4
5253 ; RV32ZVE32F-NEXT: bnez a0, .LBB49_12
5254 ; RV32ZVE32F-NEXT: .LBB49_3: # %else4
5255 ; RV32ZVE32F-NEXT: andi a0, a1, 8
5256 ; RV32ZVE32F-NEXT: bnez a0, .LBB49_13
5257 ; RV32ZVE32F-NEXT: .LBB49_4: # %else6
5258 ; RV32ZVE32F-NEXT: andi a0, a1, 16
5259 ; RV32ZVE32F-NEXT: bnez a0, .LBB49_14
5260 ; RV32ZVE32F-NEXT: .LBB49_5: # %else8
5261 ; RV32ZVE32F-NEXT: andi a0, a1, 32
5262 ; RV32ZVE32F-NEXT: bnez a0, .LBB49_15
5263 ; RV32ZVE32F-NEXT: .LBB49_6: # %else10
5264 ; RV32ZVE32F-NEXT: andi a0, a1, 64
5265 ; RV32ZVE32F-NEXT: bnez a0, .LBB49_16
5266 ; RV32ZVE32F-NEXT: .LBB49_7: # %else12
5267 ; RV32ZVE32F-NEXT: andi a0, a1, -128
5268 ; RV32ZVE32F-NEXT: beqz a0, .LBB49_9
5269 ; RV32ZVE32F-NEXT: .LBB49_8: # %cond.store13
5270 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
5271 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
5272 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
5273 ; RV32ZVE32F-NEXT: sw a3, 0(a0)
5274 ; RV32ZVE32F-NEXT: sw a2, 4(a0)
5275 ; RV32ZVE32F-NEXT: .LBB49_9: # %else14
5276 ; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
5277 ; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
5278 ; RV32ZVE32F-NEXT: lw s2, 4(sp) # 4-byte Folded Reload
5279 ; RV32ZVE32F-NEXT: addi sp, sp, 16
5280 ; RV32ZVE32F-NEXT: ret
5281 ; RV32ZVE32F-NEXT: .LBB49_10: # %cond.store
5282 ; RV32ZVE32F-NEXT: lw s1, 4(a0)
5283 ; RV32ZVE32F-NEXT: lw a0, 0(a0)
5284 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
5285 ; RV32ZVE32F-NEXT: vmv.x.s s2, v8
5286 ; RV32ZVE32F-NEXT: sw s1, 4(s2)
5287 ; RV32ZVE32F-NEXT: sw a0, 0(s2)
5288 ; RV32ZVE32F-NEXT: andi a0, a1, 2
5289 ; RV32ZVE32F-NEXT: beqz a0, .LBB49_2
5290 ; RV32ZVE32F-NEXT: .LBB49_11: # %cond.store1
5291 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
5292 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
5293 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
5294 ; RV32ZVE32F-NEXT: sw s0, 4(a0)
5295 ; RV32ZVE32F-NEXT: sw t6, 0(a0)
5296 ; RV32ZVE32F-NEXT: andi a0, a1, 4
5297 ; RV32ZVE32F-NEXT: beqz a0, .LBB49_3
5298 ; RV32ZVE32F-NEXT: .LBB49_12: # %cond.store3
5299 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
5300 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
5301 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
5302 ; RV32ZVE32F-NEXT: sw t5, 0(a0)
5303 ; RV32ZVE32F-NEXT: sw t4, 4(a0)
5304 ; RV32ZVE32F-NEXT: andi a0, a1, 8
5305 ; RV32ZVE32F-NEXT: beqz a0, .LBB49_4
5306 ; RV32ZVE32F-NEXT: .LBB49_13: # %cond.store5
5307 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
5308 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
5309 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
5310 ; RV32ZVE32F-NEXT: sw t3, 0(a0)
5311 ; RV32ZVE32F-NEXT: sw t2, 4(a0)
5312 ; RV32ZVE32F-NEXT: andi a0, a1, 16
5313 ; RV32ZVE32F-NEXT: beqz a0, .LBB49_5
5314 ; RV32ZVE32F-NEXT: .LBB49_14: # %cond.store7
5315 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
5316 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
5317 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
5318 ; RV32ZVE32F-NEXT: sw t1, 0(a0)
5319 ; RV32ZVE32F-NEXT: sw t0, 4(a0)
5320 ; RV32ZVE32F-NEXT: andi a0, a1, 32
5321 ; RV32ZVE32F-NEXT: beqz a0, .LBB49_6
5322 ; RV32ZVE32F-NEXT: .LBB49_15: # %cond.store9
5323 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
5324 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
5325 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
5326 ; RV32ZVE32F-NEXT: sw a7, 0(a0)
5327 ; RV32ZVE32F-NEXT: sw a6, 4(a0)
5328 ; RV32ZVE32F-NEXT: andi a0, a1, 64
5329 ; RV32ZVE32F-NEXT: beqz a0, .LBB49_7
5330 ; RV32ZVE32F-NEXT: .LBB49_16: # %cond.store11
5331 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
5332 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
5333 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
5334 ; RV32ZVE32F-NEXT: sw a5, 0(a0)
5335 ; RV32ZVE32F-NEXT: sw a4, 4(a0)
5336 ; RV32ZVE32F-NEXT: andi a0, a1, -128
5337 ; RV32ZVE32F-NEXT: bnez a0, .LBB49_8
5338 ; RV32ZVE32F-NEXT: j .LBB49_9
5340 ; RV64ZVE32F-LABEL: mscatter_baseidx_sext_v8i32_v8i64:
5341 ; RV64ZVE32F: # %bb.0:
5342 ; RV64ZVE32F-NEXT: ld a2, 56(a0)
5343 ; RV64ZVE32F-NEXT: ld a3, 48(a0)
5344 ; RV64ZVE32F-NEXT: ld a5, 40(a0)
5345 ; RV64ZVE32F-NEXT: ld a6, 32(a0)
5346 ; RV64ZVE32F-NEXT: ld a7, 24(a0)
5347 ; RV64ZVE32F-NEXT: ld t0, 16(a0)
5348 ; RV64ZVE32F-NEXT: ld t1, 8(a0)
5349 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
5350 ; RV64ZVE32F-NEXT: vmv.x.s a4, v0
5351 ; RV64ZVE32F-NEXT: andi t2, a4, 1
5352 ; RV64ZVE32F-NEXT: beqz t2, .LBB49_2
5353 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
5354 ; RV64ZVE32F-NEXT: ld a0, 0(a0)
5355 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
5356 ; RV64ZVE32F-NEXT: vmv.x.s t2, v8
5357 ; RV64ZVE32F-NEXT: slli t2, t2, 3
5358 ; RV64ZVE32F-NEXT: add t2, a1, t2
5359 ; RV64ZVE32F-NEXT: sd a0, 0(t2)
5360 ; RV64ZVE32F-NEXT: .LBB49_2: # %else
5361 ; RV64ZVE32F-NEXT: andi a0, a4, 2
5362 ; RV64ZVE32F-NEXT: beqz a0, .LBB49_4
5363 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
5364 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
5365 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
5366 ; RV64ZVE32F-NEXT: vmv.x.s a0, v10
5367 ; RV64ZVE32F-NEXT: slli a0, a0, 3
5368 ; RV64ZVE32F-NEXT: add a0, a1, a0
5369 ; RV64ZVE32F-NEXT: sd t1, 0(a0)
5370 ; RV64ZVE32F-NEXT: .LBB49_4: # %else2
5371 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
5372 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
5373 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
5374 ; RV64ZVE32F-NEXT: andi a0, a4, 4
5375 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
5376 ; RV64ZVE32F-NEXT: bnez a0, .LBB49_12
5377 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
5378 ; RV64ZVE32F-NEXT: andi a0, a4, 8
5379 ; RV64ZVE32F-NEXT: bnez a0, .LBB49_13
5380 ; RV64ZVE32F-NEXT: .LBB49_6: # %else6
5381 ; RV64ZVE32F-NEXT: andi a0, a4, 16
5382 ; RV64ZVE32F-NEXT: bnez a0, .LBB49_14
5383 ; RV64ZVE32F-NEXT: .LBB49_7: # %else8
5384 ; RV64ZVE32F-NEXT: andi a0, a4, 32
5385 ; RV64ZVE32F-NEXT: beqz a0, .LBB49_9
5386 ; RV64ZVE32F-NEXT: .LBB49_8: # %cond.store9
5387 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
5388 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
5389 ; RV64ZVE32F-NEXT: slli a0, a0, 3
5390 ; RV64ZVE32F-NEXT: add a0, a1, a0
5391 ; RV64ZVE32F-NEXT: sd a5, 0(a0)
5392 ; RV64ZVE32F-NEXT: .LBB49_9: # %else10
5393 ; RV64ZVE32F-NEXT: andi a0, a4, 64
5394 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
5395 ; RV64ZVE32F-NEXT: bnez a0, .LBB49_15
5396 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
5397 ; RV64ZVE32F-NEXT: andi a0, a4, -128
5398 ; RV64ZVE32F-NEXT: bnez a0, .LBB49_16
5399 ; RV64ZVE32F-NEXT: .LBB49_11: # %else14
5400 ; RV64ZVE32F-NEXT: ret
5401 ; RV64ZVE32F-NEXT: .LBB49_12: # %cond.store3
5402 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
5403 ; RV64ZVE32F-NEXT: slli a0, a0, 3
5404 ; RV64ZVE32F-NEXT: add a0, a1, a0
5405 ; RV64ZVE32F-NEXT: sd t0, 0(a0)
5406 ; RV64ZVE32F-NEXT: andi a0, a4, 8
5407 ; RV64ZVE32F-NEXT: beqz a0, .LBB49_6
5408 ; RV64ZVE32F-NEXT: .LBB49_13: # %cond.store5
5409 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
5410 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
5411 ; RV64ZVE32F-NEXT: slli a0, a0, 3
5412 ; RV64ZVE32F-NEXT: add a0, a1, a0
5413 ; RV64ZVE32F-NEXT: sd a7, 0(a0)
5414 ; RV64ZVE32F-NEXT: andi a0, a4, 16
5415 ; RV64ZVE32F-NEXT: beqz a0, .LBB49_7
5416 ; RV64ZVE32F-NEXT: .LBB49_14: # %cond.store7
5417 ; RV64ZVE32F-NEXT: vmv.x.s a0, v10
5418 ; RV64ZVE32F-NEXT: slli a0, a0, 3
5419 ; RV64ZVE32F-NEXT: add a0, a1, a0
5420 ; RV64ZVE32F-NEXT: sd a6, 0(a0)
5421 ; RV64ZVE32F-NEXT: andi a0, a4, 32
5422 ; RV64ZVE32F-NEXT: bnez a0, .LBB49_8
5423 ; RV64ZVE32F-NEXT: j .LBB49_9
5424 ; RV64ZVE32F-NEXT: .LBB49_15: # %cond.store11
5425 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
5426 ; RV64ZVE32F-NEXT: slli a0, a0, 3
5427 ; RV64ZVE32F-NEXT: add a0, a1, a0
5428 ; RV64ZVE32F-NEXT: sd a3, 0(a0)
5429 ; RV64ZVE32F-NEXT: andi a0, a4, -128
5430 ; RV64ZVE32F-NEXT: beqz a0, .LBB49_11
5431 ; RV64ZVE32F-NEXT: .LBB49_16: # %cond.store13
5432 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
5433 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
5434 ; RV64ZVE32F-NEXT: slli a0, a0, 3
5435 ; RV64ZVE32F-NEXT: add a0, a1, a0
5436 ; RV64ZVE32F-NEXT: sd a2, 0(a0)
5437 ; RV64ZVE32F-NEXT: ret
5438 %eidxs = sext <8 x i32> %idxs to <8 x i64>
5439 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs
5440 call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m)
5444 define void @mscatter_baseidx_zext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i32> %idxs, <8 x i1> %m) {
5445 ; RV32V-LABEL: mscatter_baseidx_zext_v8i32_v8i64:
5447 ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
5448 ; RV32V-NEXT: vsll.vi v12, v12, 3
5449 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
5450 ; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t
5453 ; RV64-LABEL: mscatter_baseidx_zext_v8i32_v8i64:
5455 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
5456 ; RV64-NEXT: vzext.vf2 v16, v12
5457 ; RV64-NEXT: vsll.vi v12, v16, 3
5458 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
5461 ; RV32ZVE32F-LABEL: mscatter_baseidx_zext_v8i32_v8i64:
5462 ; RV32ZVE32F: # %bb.0:
5463 ; RV32ZVE32F-NEXT: addi sp, sp, -16
5464 ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
5465 ; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
5466 ; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
5467 ; RV32ZVE32F-NEXT: sw s2, 4(sp) # 4-byte Folded Spill
5468 ; RV32ZVE32F-NEXT: .cfi_offset s0, -4
5469 ; RV32ZVE32F-NEXT: .cfi_offset s1, -8
5470 ; RV32ZVE32F-NEXT: .cfi_offset s2, -12
5471 ; RV32ZVE32F-NEXT: lw a2, 60(a0)
5472 ; RV32ZVE32F-NEXT: lw a3, 56(a0)
5473 ; RV32ZVE32F-NEXT: lw a4, 52(a0)
5474 ; RV32ZVE32F-NEXT: lw a5, 48(a0)
5475 ; RV32ZVE32F-NEXT: lw a6, 44(a0)
5476 ; RV32ZVE32F-NEXT: lw a7, 40(a0)
5477 ; RV32ZVE32F-NEXT: lw t0, 36(a0)
5478 ; RV32ZVE32F-NEXT: lw t1, 32(a0)
5479 ; RV32ZVE32F-NEXT: lw t2, 28(a0)
5480 ; RV32ZVE32F-NEXT: lw t3, 24(a0)
5481 ; RV32ZVE32F-NEXT: lw t4, 20(a0)
5482 ; RV32ZVE32F-NEXT: lw t5, 16(a0)
5483 ; RV32ZVE32F-NEXT: lw s0, 12(a0)
5484 ; RV32ZVE32F-NEXT: lw t6, 8(a0)
5485 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
5486 ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3
5487 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
5488 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
5489 ; RV32ZVE32F-NEXT: vmv.x.s a1, v0
5490 ; RV32ZVE32F-NEXT: andi s1, a1, 1
5491 ; RV32ZVE32F-NEXT: bnez s1, .LBB50_10
5492 ; RV32ZVE32F-NEXT: # %bb.1: # %else
5493 ; RV32ZVE32F-NEXT: andi a0, a1, 2
5494 ; RV32ZVE32F-NEXT: bnez a0, .LBB50_11
5495 ; RV32ZVE32F-NEXT: .LBB50_2: # %else2
5496 ; RV32ZVE32F-NEXT: andi a0, a1, 4
5497 ; RV32ZVE32F-NEXT: bnez a0, .LBB50_12
5498 ; RV32ZVE32F-NEXT: .LBB50_3: # %else4
5499 ; RV32ZVE32F-NEXT: andi a0, a1, 8
5500 ; RV32ZVE32F-NEXT: bnez a0, .LBB50_13
5501 ; RV32ZVE32F-NEXT: .LBB50_4: # %else6
5502 ; RV32ZVE32F-NEXT: andi a0, a1, 16
5503 ; RV32ZVE32F-NEXT: bnez a0, .LBB50_14
5504 ; RV32ZVE32F-NEXT: .LBB50_5: # %else8
5505 ; RV32ZVE32F-NEXT: andi a0, a1, 32
5506 ; RV32ZVE32F-NEXT: bnez a0, .LBB50_15
5507 ; RV32ZVE32F-NEXT: .LBB50_6: # %else10
5508 ; RV32ZVE32F-NEXT: andi a0, a1, 64
5509 ; RV32ZVE32F-NEXT: bnez a0, .LBB50_16
5510 ; RV32ZVE32F-NEXT: .LBB50_7: # %else12
5511 ; RV32ZVE32F-NEXT: andi a0, a1, -128
5512 ; RV32ZVE32F-NEXT: beqz a0, .LBB50_9
5513 ; RV32ZVE32F-NEXT: .LBB50_8: # %cond.store13
5514 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
5515 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
5516 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
5517 ; RV32ZVE32F-NEXT: sw a3, 0(a0)
5518 ; RV32ZVE32F-NEXT: sw a2, 4(a0)
5519 ; RV32ZVE32F-NEXT: .LBB50_9: # %else14
5520 ; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
5521 ; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
5522 ; RV32ZVE32F-NEXT: lw s2, 4(sp) # 4-byte Folded Reload
5523 ; RV32ZVE32F-NEXT: addi sp, sp, 16
5524 ; RV32ZVE32F-NEXT: ret
5525 ; RV32ZVE32F-NEXT: .LBB50_10: # %cond.store
5526 ; RV32ZVE32F-NEXT: lw s1, 4(a0)
5527 ; RV32ZVE32F-NEXT: lw a0, 0(a0)
5528 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
5529 ; RV32ZVE32F-NEXT: vmv.x.s s2, v8
5530 ; RV32ZVE32F-NEXT: sw s1, 4(s2)
5531 ; RV32ZVE32F-NEXT: sw a0, 0(s2)
5532 ; RV32ZVE32F-NEXT: andi a0, a1, 2
5533 ; RV32ZVE32F-NEXT: beqz a0, .LBB50_2
5534 ; RV32ZVE32F-NEXT: .LBB50_11: # %cond.store1
5535 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
5536 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
5537 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
5538 ; RV32ZVE32F-NEXT: sw s0, 4(a0)
5539 ; RV32ZVE32F-NEXT: sw t6, 0(a0)
5540 ; RV32ZVE32F-NEXT: andi a0, a1, 4
5541 ; RV32ZVE32F-NEXT: beqz a0, .LBB50_3
5542 ; RV32ZVE32F-NEXT: .LBB50_12: # %cond.store3
5543 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
5544 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
5545 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
5546 ; RV32ZVE32F-NEXT: sw t5, 0(a0)
5547 ; RV32ZVE32F-NEXT: sw t4, 4(a0)
5548 ; RV32ZVE32F-NEXT: andi a0, a1, 8
5549 ; RV32ZVE32F-NEXT: beqz a0, .LBB50_4
5550 ; RV32ZVE32F-NEXT: .LBB50_13: # %cond.store5
5551 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
5552 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
5553 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
5554 ; RV32ZVE32F-NEXT: sw t3, 0(a0)
5555 ; RV32ZVE32F-NEXT: sw t2, 4(a0)
5556 ; RV32ZVE32F-NEXT: andi a0, a1, 16
5557 ; RV32ZVE32F-NEXT: beqz a0, .LBB50_5
5558 ; RV32ZVE32F-NEXT: .LBB50_14: # %cond.store7
5559 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
5560 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
5561 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
5562 ; RV32ZVE32F-NEXT: sw t1, 0(a0)
5563 ; RV32ZVE32F-NEXT: sw t0, 4(a0)
5564 ; RV32ZVE32F-NEXT: andi a0, a1, 32
5565 ; RV32ZVE32F-NEXT: beqz a0, .LBB50_6
5566 ; RV32ZVE32F-NEXT: .LBB50_15: # %cond.store9
5567 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
5568 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
5569 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
5570 ; RV32ZVE32F-NEXT: sw a7, 0(a0)
5571 ; RV32ZVE32F-NEXT: sw a6, 4(a0)
5572 ; RV32ZVE32F-NEXT: andi a0, a1, 64
5573 ; RV32ZVE32F-NEXT: beqz a0, .LBB50_7
5574 ; RV32ZVE32F-NEXT: .LBB50_16: # %cond.store11
5575 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
5576 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
5577 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
5578 ; RV32ZVE32F-NEXT: sw a5, 0(a0)
5579 ; RV32ZVE32F-NEXT: sw a4, 4(a0)
5580 ; RV32ZVE32F-NEXT: andi a0, a1, -128
5581 ; RV32ZVE32F-NEXT: bnez a0, .LBB50_8
5582 ; RV32ZVE32F-NEXT: j .LBB50_9
5584 ; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i32_v8i64:
5585 ; RV64ZVE32F: # %bb.0:
5586 ; RV64ZVE32F-NEXT: ld a2, 56(a0)
5587 ; RV64ZVE32F-NEXT: ld a3, 48(a0)
5588 ; RV64ZVE32F-NEXT: ld a5, 40(a0)
5589 ; RV64ZVE32F-NEXT: ld a6, 32(a0)
5590 ; RV64ZVE32F-NEXT: ld a7, 24(a0)
5591 ; RV64ZVE32F-NEXT: ld t0, 16(a0)
5592 ; RV64ZVE32F-NEXT: ld t1, 8(a0)
5593 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
5594 ; RV64ZVE32F-NEXT: vmv.x.s a4, v0
5595 ; RV64ZVE32F-NEXT: andi t2, a4, 1
5596 ; RV64ZVE32F-NEXT: beqz t2, .LBB50_2
5597 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
5598 ; RV64ZVE32F-NEXT: ld a0, 0(a0)
5599 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
5600 ; RV64ZVE32F-NEXT: vmv.x.s t2, v8
5601 ; RV64ZVE32F-NEXT: slli t2, t2, 32
5602 ; RV64ZVE32F-NEXT: srli t2, t2, 29
5603 ; RV64ZVE32F-NEXT: add t2, a1, t2
5604 ; RV64ZVE32F-NEXT: sd a0, 0(t2)
5605 ; RV64ZVE32F-NEXT: .LBB50_2: # %else
5606 ; RV64ZVE32F-NEXT: andi a0, a4, 2
5607 ; RV64ZVE32F-NEXT: beqz a0, .LBB50_4
5608 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
5609 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
5610 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
5611 ; RV64ZVE32F-NEXT: vmv.x.s a0, v10
5612 ; RV64ZVE32F-NEXT: slli a0, a0, 32
5613 ; RV64ZVE32F-NEXT: srli a0, a0, 29
5614 ; RV64ZVE32F-NEXT: add a0, a1, a0
5615 ; RV64ZVE32F-NEXT: sd t1, 0(a0)
5616 ; RV64ZVE32F-NEXT: .LBB50_4: # %else2
5617 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
5618 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
5619 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
5620 ; RV64ZVE32F-NEXT: andi a0, a4, 4
5621 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
5622 ; RV64ZVE32F-NEXT: bnez a0, .LBB50_12
5623 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
5624 ; RV64ZVE32F-NEXT: andi a0, a4, 8
5625 ; RV64ZVE32F-NEXT: bnez a0, .LBB50_13
5626 ; RV64ZVE32F-NEXT: .LBB50_6: # %else6
5627 ; RV64ZVE32F-NEXT: andi a0, a4, 16
5628 ; RV64ZVE32F-NEXT: bnez a0, .LBB50_14
5629 ; RV64ZVE32F-NEXT: .LBB50_7: # %else8
5630 ; RV64ZVE32F-NEXT: andi a0, a4, 32
5631 ; RV64ZVE32F-NEXT: beqz a0, .LBB50_9
5632 ; RV64ZVE32F-NEXT: .LBB50_8: # %cond.store9
5633 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
5634 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
5635 ; RV64ZVE32F-NEXT: slli a0, a0, 32
5636 ; RV64ZVE32F-NEXT: srli a0, a0, 29
5637 ; RV64ZVE32F-NEXT: add a0, a1, a0
5638 ; RV64ZVE32F-NEXT: sd a5, 0(a0)
5639 ; RV64ZVE32F-NEXT: .LBB50_9: # %else10
5640 ; RV64ZVE32F-NEXT: andi a0, a4, 64
5641 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
5642 ; RV64ZVE32F-NEXT: bnez a0, .LBB50_15
5643 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
5644 ; RV64ZVE32F-NEXT: andi a0, a4, -128
5645 ; RV64ZVE32F-NEXT: bnez a0, .LBB50_16
5646 ; RV64ZVE32F-NEXT: .LBB50_11: # %else14
5647 ; RV64ZVE32F-NEXT: ret
5648 ; RV64ZVE32F-NEXT: .LBB50_12: # %cond.store3
5649 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
5650 ; RV64ZVE32F-NEXT: slli a0, a0, 32
5651 ; RV64ZVE32F-NEXT: srli a0, a0, 29
5652 ; RV64ZVE32F-NEXT: add a0, a1, a0
5653 ; RV64ZVE32F-NEXT: sd t0, 0(a0)
5654 ; RV64ZVE32F-NEXT: andi a0, a4, 8
5655 ; RV64ZVE32F-NEXT: beqz a0, .LBB50_6
5656 ; RV64ZVE32F-NEXT: .LBB50_13: # %cond.store5
5657 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
5658 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
5659 ; RV64ZVE32F-NEXT: slli a0, a0, 32
5660 ; RV64ZVE32F-NEXT: srli a0, a0, 29
5661 ; RV64ZVE32F-NEXT: add a0, a1, a0
5662 ; RV64ZVE32F-NEXT: sd a7, 0(a0)
5663 ; RV64ZVE32F-NEXT: andi a0, a4, 16
5664 ; RV64ZVE32F-NEXT: beqz a0, .LBB50_7
5665 ; RV64ZVE32F-NEXT: .LBB50_14: # %cond.store7
5666 ; RV64ZVE32F-NEXT: vmv.x.s a0, v10
5667 ; RV64ZVE32F-NEXT: slli a0, a0, 32
5668 ; RV64ZVE32F-NEXT: srli a0, a0, 29
5669 ; RV64ZVE32F-NEXT: add a0, a1, a0
5670 ; RV64ZVE32F-NEXT: sd a6, 0(a0)
5671 ; RV64ZVE32F-NEXT: andi a0, a4, 32
5672 ; RV64ZVE32F-NEXT: bnez a0, .LBB50_8
5673 ; RV64ZVE32F-NEXT: j .LBB50_9
5674 ; RV64ZVE32F-NEXT: .LBB50_15: # %cond.store11
5675 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
5676 ; RV64ZVE32F-NEXT: slli a0, a0, 32
5677 ; RV64ZVE32F-NEXT: srli a0, a0, 29
5678 ; RV64ZVE32F-NEXT: add a0, a1, a0
5679 ; RV64ZVE32F-NEXT: sd a3, 0(a0)
5680 ; RV64ZVE32F-NEXT: andi a0, a4, -128
5681 ; RV64ZVE32F-NEXT: beqz a0, .LBB50_11
5682 ; RV64ZVE32F-NEXT: .LBB50_16: # %cond.store13
5683 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
5684 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
5685 ; RV64ZVE32F-NEXT: slli a0, a0, 32
5686 ; RV64ZVE32F-NEXT: srli a0, a0, 29
5687 ; RV64ZVE32F-NEXT: add a0, a1, a0
5688 ; RV64ZVE32F-NEXT: sd a2, 0(a0)
5689 ; RV64ZVE32F-NEXT: ret
5690 %eidxs = zext <8 x i32> %idxs to <8 x i64>
5691 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs
5692 call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m)
5696 define void @mscatter_baseidx_v8i64(<8 x i64> %val, ptr %base, <8 x i64> %idxs, <8 x i1> %m) {
5697 ; RV32V-LABEL: mscatter_baseidx_v8i64:
5699 ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
5700 ; RV32V-NEXT: vnsrl.wi v16, v12, 0
5701 ; RV32V-NEXT: vsll.vi v12, v16, 3
5702 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
5703 ; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t
5706 ; RV64-LABEL: mscatter_baseidx_v8i64:
5708 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
5709 ; RV64-NEXT: vsll.vi v12, v12, 3
5710 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
5713 ; RV32ZVE32F-LABEL: mscatter_baseidx_v8i64:
5714 ; RV32ZVE32F: # %bb.0:
5715 ; RV32ZVE32F-NEXT: addi sp, sp, -48
5716 ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 48
5717 ; RV32ZVE32F-NEXT: sw s0, 44(sp) # 4-byte Folded Spill
5718 ; RV32ZVE32F-NEXT: sw s1, 40(sp) # 4-byte Folded Spill
5719 ; RV32ZVE32F-NEXT: sw s2, 36(sp) # 4-byte Folded Spill
5720 ; RV32ZVE32F-NEXT: sw s3, 32(sp) # 4-byte Folded Spill
5721 ; RV32ZVE32F-NEXT: sw s4, 28(sp) # 4-byte Folded Spill
5722 ; RV32ZVE32F-NEXT: sw s5, 24(sp) # 4-byte Folded Spill
5723 ; RV32ZVE32F-NEXT: sw s6, 20(sp) # 4-byte Folded Spill
5724 ; RV32ZVE32F-NEXT: sw s7, 16(sp) # 4-byte Folded Spill
5725 ; RV32ZVE32F-NEXT: sw s8, 12(sp) # 4-byte Folded Spill
5726 ; RV32ZVE32F-NEXT: .cfi_offset s0, -4
5727 ; RV32ZVE32F-NEXT: .cfi_offset s1, -8
5728 ; RV32ZVE32F-NEXT: .cfi_offset s2, -12
5729 ; RV32ZVE32F-NEXT: .cfi_offset s3, -16
5730 ; RV32ZVE32F-NEXT: .cfi_offset s4, -20
5731 ; RV32ZVE32F-NEXT: .cfi_offset s5, -24
5732 ; RV32ZVE32F-NEXT: .cfi_offset s6, -28
5733 ; RV32ZVE32F-NEXT: .cfi_offset s7, -32
5734 ; RV32ZVE32F-NEXT: .cfi_offset s8, -36
5735 ; RV32ZVE32F-NEXT: lw a3, 60(a0)
5736 ; RV32ZVE32F-NEXT: lw a4, 56(a0)
5737 ; RV32ZVE32F-NEXT: lw a5, 52(a0)
5738 ; RV32ZVE32F-NEXT: lw a6, 48(a0)
5739 ; RV32ZVE32F-NEXT: lw a7, 44(a0)
5740 ; RV32ZVE32F-NEXT: lw t0, 40(a0)
5741 ; RV32ZVE32F-NEXT: lw t1, 36(a0)
5742 ; RV32ZVE32F-NEXT: lw t2, 32(a0)
5743 ; RV32ZVE32F-NEXT: lw t3, 28(a0)
5744 ; RV32ZVE32F-NEXT: lw t4, 24(a0)
5745 ; RV32ZVE32F-NEXT: lw t5, 20(a0)
5746 ; RV32ZVE32F-NEXT: lw t6, 16(a0)
5747 ; RV32ZVE32F-NEXT: lw s1, 12(a0)
5748 ; RV32ZVE32F-NEXT: lw s0, 8(a0)
5749 ; RV32ZVE32F-NEXT: lw s2, 56(a2)
5750 ; RV32ZVE32F-NEXT: lw s3, 48(a2)
5751 ; RV32ZVE32F-NEXT: lw s4, 40(a2)
5752 ; RV32ZVE32F-NEXT: lw s5, 32(a2)
5753 ; RV32ZVE32F-NEXT: lw s6, 24(a2)
5754 ; RV32ZVE32F-NEXT: lw s7, 0(a2)
5755 ; RV32ZVE32F-NEXT: lw s8, 8(a2)
5756 ; RV32ZVE32F-NEXT: lw a2, 16(a2)
5757 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
5758 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, s7
5759 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, s8
5760 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a2
5761 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, s6
5762 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, s5
5763 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, s4
5764 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, s3
5765 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, s2
5766 ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3
5767 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
5768 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
5769 ; RV32ZVE32F-NEXT: vmv.x.s a1, v0
5770 ; RV32ZVE32F-NEXT: andi a2, a1, 1
5771 ; RV32ZVE32F-NEXT: bnez a2, .LBB51_10
5772 ; RV32ZVE32F-NEXT: # %bb.1: # %else
5773 ; RV32ZVE32F-NEXT: andi a0, a1, 2
5774 ; RV32ZVE32F-NEXT: bnez a0, .LBB51_11
5775 ; RV32ZVE32F-NEXT: .LBB51_2: # %else2
5776 ; RV32ZVE32F-NEXT: andi a0, a1, 4
5777 ; RV32ZVE32F-NEXT: bnez a0, .LBB51_12
5778 ; RV32ZVE32F-NEXT: .LBB51_3: # %else4
5779 ; RV32ZVE32F-NEXT: andi a0, a1, 8
5780 ; RV32ZVE32F-NEXT: bnez a0, .LBB51_13
5781 ; RV32ZVE32F-NEXT: .LBB51_4: # %else6
5782 ; RV32ZVE32F-NEXT: andi a0, a1, 16
5783 ; RV32ZVE32F-NEXT: bnez a0, .LBB51_14
5784 ; RV32ZVE32F-NEXT: .LBB51_5: # %else8
5785 ; RV32ZVE32F-NEXT: andi a0, a1, 32
5786 ; RV32ZVE32F-NEXT: bnez a0, .LBB51_15
5787 ; RV32ZVE32F-NEXT: .LBB51_6: # %else10
5788 ; RV32ZVE32F-NEXT: andi a0, a1, 64
5789 ; RV32ZVE32F-NEXT: bnez a0, .LBB51_16
5790 ; RV32ZVE32F-NEXT: .LBB51_7: # %else12
5791 ; RV32ZVE32F-NEXT: andi a0, a1, -128
5792 ; RV32ZVE32F-NEXT: beqz a0, .LBB51_9
5793 ; RV32ZVE32F-NEXT: .LBB51_8: # %cond.store13
5794 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
5795 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
5796 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
5797 ; RV32ZVE32F-NEXT: sw a4, 0(a0)
5798 ; RV32ZVE32F-NEXT: sw a3, 4(a0)
5799 ; RV32ZVE32F-NEXT: .LBB51_9: # %else14
5800 ; RV32ZVE32F-NEXT: lw s0, 44(sp) # 4-byte Folded Reload
5801 ; RV32ZVE32F-NEXT: lw s1, 40(sp) # 4-byte Folded Reload
5802 ; RV32ZVE32F-NEXT: lw s2, 36(sp) # 4-byte Folded Reload
5803 ; RV32ZVE32F-NEXT: lw s3, 32(sp) # 4-byte Folded Reload
5804 ; RV32ZVE32F-NEXT: lw s4, 28(sp) # 4-byte Folded Reload
5805 ; RV32ZVE32F-NEXT: lw s5, 24(sp) # 4-byte Folded Reload
5806 ; RV32ZVE32F-NEXT: lw s6, 20(sp) # 4-byte Folded Reload
5807 ; RV32ZVE32F-NEXT: lw s7, 16(sp) # 4-byte Folded Reload
5808 ; RV32ZVE32F-NEXT: lw s8, 12(sp) # 4-byte Folded Reload
5809 ; RV32ZVE32F-NEXT: addi sp, sp, 48
5810 ; RV32ZVE32F-NEXT: ret
5811 ; RV32ZVE32F-NEXT: .LBB51_10: # %cond.store
5812 ; RV32ZVE32F-NEXT: lw a2, 4(a0)
5813 ; RV32ZVE32F-NEXT: lw a0, 0(a0)
5814 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
5815 ; RV32ZVE32F-NEXT: vmv.x.s s2, v8
5816 ; RV32ZVE32F-NEXT: sw a2, 4(s2)
5817 ; RV32ZVE32F-NEXT: sw a0, 0(s2)
5818 ; RV32ZVE32F-NEXT: andi a0, a1, 2
5819 ; RV32ZVE32F-NEXT: beqz a0, .LBB51_2
5820 ; RV32ZVE32F-NEXT: .LBB51_11: # %cond.store1
5821 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
5822 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
5823 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
5824 ; RV32ZVE32F-NEXT: sw s1, 4(a0)
5825 ; RV32ZVE32F-NEXT: sw s0, 0(a0)
5826 ; RV32ZVE32F-NEXT: andi a0, a1, 4
5827 ; RV32ZVE32F-NEXT: beqz a0, .LBB51_3
5828 ; RV32ZVE32F-NEXT: .LBB51_12: # %cond.store3
5829 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
5830 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
5831 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
5832 ; RV32ZVE32F-NEXT: sw t6, 0(a0)
5833 ; RV32ZVE32F-NEXT: sw t5, 4(a0)
5834 ; RV32ZVE32F-NEXT: andi a0, a1, 8
5835 ; RV32ZVE32F-NEXT: beqz a0, .LBB51_4
5836 ; RV32ZVE32F-NEXT: .LBB51_13: # %cond.store5
5837 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
5838 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
5839 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
5840 ; RV32ZVE32F-NEXT: sw t4, 0(a0)
5841 ; RV32ZVE32F-NEXT: sw t3, 4(a0)
5842 ; RV32ZVE32F-NEXT: andi a0, a1, 16
5843 ; RV32ZVE32F-NEXT: beqz a0, .LBB51_5
5844 ; RV32ZVE32F-NEXT: .LBB51_14: # %cond.store7
5845 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
5846 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
5847 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
5848 ; RV32ZVE32F-NEXT: sw t2, 0(a0)
5849 ; RV32ZVE32F-NEXT: sw t1, 4(a0)
5850 ; RV32ZVE32F-NEXT: andi a0, a1, 32
5851 ; RV32ZVE32F-NEXT: beqz a0, .LBB51_6
5852 ; RV32ZVE32F-NEXT: .LBB51_15: # %cond.store9
5853 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
5854 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
5855 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
5856 ; RV32ZVE32F-NEXT: sw t0, 0(a0)
5857 ; RV32ZVE32F-NEXT: sw a7, 4(a0)
5858 ; RV32ZVE32F-NEXT: andi a0, a1, 64
5859 ; RV32ZVE32F-NEXT: beqz a0, .LBB51_7
5860 ; RV32ZVE32F-NEXT: .LBB51_16: # %cond.store11
5861 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
5862 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
5863 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
5864 ; RV32ZVE32F-NEXT: sw a6, 0(a0)
5865 ; RV32ZVE32F-NEXT: sw a5, 4(a0)
5866 ; RV32ZVE32F-NEXT: andi a0, a1, -128
5867 ; RV32ZVE32F-NEXT: bnez a0, .LBB51_8
5868 ; RV32ZVE32F-NEXT: j .LBB51_9
5870 ; RV64ZVE32F-LABEL: mscatter_baseidx_v8i64:
5871 ; RV64ZVE32F: # %bb.0:
5872 ; RV64ZVE32F-NEXT: addi sp, sp, -32
5873 ; RV64ZVE32F-NEXT: .cfi_def_cfa_offset 32
5874 ; RV64ZVE32F-NEXT: sd s0, 24(sp) # 8-byte Folded Spill
5875 ; RV64ZVE32F-NEXT: sd s1, 16(sp) # 8-byte Folded Spill
5876 ; RV64ZVE32F-NEXT: sd s2, 8(sp) # 8-byte Folded Spill
5877 ; RV64ZVE32F-NEXT: sd s3, 0(sp) # 8-byte Folded Spill
5878 ; RV64ZVE32F-NEXT: .cfi_offset s0, -8
5879 ; RV64ZVE32F-NEXT: .cfi_offset s1, -16
5880 ; RV64ZVE32F-NEXT: .cfi_offset s2, -24
5881 ; RV64ZVE32F-NEXT: .cfi_offset s3, -32
5882 ; RV64ZVE32F-NEXT: ld a3, 56(a0)
5883 ; RV64ZVE32F-NEXT: ld a4, 48(a0)
5884 ; RV64ZVE32F-NEXT: ld a6, 40(a0)
5885 ; RV64ZVE32F-NEXT: ld t1, 32(a0)
5886 ; RV64ZVE32F-NEXT: ld t3, 24(a0)
5887 ; RV64ZVE32F-NEXT: ld t6, 16(a0)
5888 ; RV64ZVE32F-NEXT: ld s1, 8(a0)
5889 ; RV64ZVE32F-NEXT: ld s2, 8(a2)
5890 ; RV64ZVE32F-NEXT: ld s0, 16(a2)
5891 ; RV64ZVE32F-NEXT: ld t5, 24(a2)
5892 ; RV64ZVE32F-NEXT: ld t4, 32(a2)
5893 ; RV64ZVE32F-NEXT: ld t2, 40(a2)
5894 ; RV64ZVE32F-NEXT: ld t0, 48(a2)
5895 ; RV64ZVE32F-NEXT: ld a5, 56(a2)
5896 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
5897 ; RV64ZVE32F-NEXT: vmv.x.s a7, v0
5898 ; RV64ZVE32F-NEXT: andi s3, a7, 1
5899 ; RV64ZVE32F-NEXT: bnez s3, .LBB51_10
5900 ; RV64ZVE32F-NEXT: # %bb.1: # %else
5901 ; RV64ZVE32F-NEXT: andi a0, a7, 2
5902 ; RV64ZVE32F-NEXT: bnez a0, .LBB51_11
5903 ; RV64ZVE32F-NEXT: .LBB51_2: # %else2
5904 ; RV64ZVE32F-NEXT: andi a0, a7, 4
5905 ; RV64ZVE32F-NEXT: bnez a0, .LBB51_12
5906 ; RV64ZVE32F-NEXT: .LBB51_3: # %else4
5907 ; RV64ZVE32F-NEXT: andi a0, a7, 8
5908 ; RV64ZVE32F-NEXT: bnez a0, .LBB51_13
5909 ; RV64ZVE32F-NEXT: .LBB51_4: # %else6
5910 ; RV64ZVE32F-NEXT: andi a0, a7, 16
5911 ; RV64ZVE32F-NEXT: bnez a0, .LBB51_14
5912 ; RV64ZVE32F-NEXT: .LBB51_5: # %else8
5913 ; RV64ZVE32F-NEXT: andi a0, a7, 32
5914 ; RV64ZVE32F-NEXT: bnez a0, .LBB51_15
5915 ; RV64ZVE32F-NEXT: .LBB51_6: # %else10
5916 ; RV64ZVE32F-NEXT: andi a0, a7, 64
5917 ; RV64ZVE32F-NEXT: bnez a0, .LBB51_16
5918 ; RV64ZVE32F-NEXT: .LBB51_7: # %else12
5919 ; RV64ZVE32F-NEXT: andi a0, a7, -128
5920 ; RV64ZVE32F-NEXT: beqz a0, .LBB51_9
5921 ; RV64ZVE32F-NEXT: .LBB51_8: # %cond.store13
5922 ; RV64ZVE32F-NEXT: slli a5, a5, 3
5923 ; RV64ZVE32F-NEXT: add a1, a1, a5
5924 ; RV64ZVE32F-NEXT: sd a3, 0(a1)
5925 ; RV64ZVE32F-NEXT: .LBB51_9: # %else14
5926 ; RV64ZVE32F-NEXT: ld s0, 24(sp) # 8-byte Folded Reload
5927 ; RV64ZVE32F-NEXT: ld s1, 16(sp) # 8-byte Folded Reload
5928 ; RV64ZVE32F-NEXT: ld s2, 8(sp) # 8-byte Folded Reload
5929 ; RV64ZVE32F-NEXT: ld s3, 0(sp) # 8-byte Folded Reload
5930 ; RV64ZVE32F-NEXT: addi sp, sp, 32
5931 ; RV64ZVE32F-NEXT: ret
5932 ; RV64ZVE32F-NEXT: .LBB51_10: # %cond.store
5933 ; RV64ZVE32F-NEXT: ld a2, 0(a2)
5934 ; RV64ZVE32F-NEXT: ld a0, 0(a0)
5935 ; RV64ZVE32F-NEXT: slli a2, a2, 3
5936 ; RV64ZVE32F-NEXT: add a2, a1, a2
5937 ; RV64ZVE32F-NEXT: sd a0, 0(a2)
5938 ; RV64ZVE32F-NEXT: andi a0, a7, 2
5939 ; RV64ZVE32F-NEXT: beqz a0, .LBB51_2
5940 ; RV64ZVE32F-NEXT: .LBB51_11: # %cond.store1
5941 ; RV64ZVE32F-NEXT: slli s2, s2, 3
5942 ; RV64ZVE32F-NEXT: add s2, a1, s2
5943 ; RV64ZVE32F-NEXT: sd s1, 0(s2)
5944 ; RV64ZVE32F-NEXT: andi a0, a7, 4
5945 ; RV64ZVE32F-NEXT: beqz a0, .LBB51_3
5946 ; RV64ZVE32F-NEXT: .LBB51_12: # %cond.store3
5947 ; RV64ZVE32F-NEXT: slli s0, s0, 3
5948 ; RV64ZVE32F-NEXT: add s0, a1, s0
5949 ; RV64ZVE32F-NEXT: sd t6, 0(s0)
5950 ; RV64ZVE32F-NEXT: andi a0, a7, 8
5951 ; RV64ZVE32F-NEXT: beqz a0, .LBB51_4
5952 ; RV64ZVE32F-NEXT: .LBB51_13: # %cond.store5
5953 ; RV64ZVE32F-NEXT: slli t5, t5, 3
5954 ; RV64ZVE32F-NEXT: add t5, a1, t5
5955 ; RV64ZVE32F-NEXT: sd t3, 0(t5)
5956 ; RV64ZVE32F-NEXT: andi a0, a7, 16
5957 ; RV64ZVE32F-NEXT: beqz a0, .LBB51_5
5958 ; RV64ZVE32F-NEXT: .LBB51_14: # %cond.store7
5959 ; RV64ZVE32F-NEXT: slli t4, t4, 3
5960 ; RV64ZVE32F-NEXT: add t4, a1, t4
5961 ; RV64ZVE32F-NEXT: sd t1, 0(t4)
5962 ; RV64ZVE32F-NEXT: andi a0, a7, 32
5963 ; RV64ZVE32F-NEXT: beqz a0, .LBB51_6
5964 ; RV64ZVE32F-NEXT: .LBB51_15: # %cond.store9
5965 ; RV64ZVE32F-NEXT: slli t2, t2, 3
5966 ; RV64ZVE32F-NEXT: add t2, a1, t2
5967 ; RV64ZVE32F-NEXT: sd a6, 0(t2)
5968 ; RV64ZVE32F-NEXT: andi a0, a7, 64
5969 ; RV64ZVE32F-NEXT: beqz a0, .LBB51_7
5970 ; RV64ZVE32F-NEXT: .LBB51_16: # %cond.store11
5971 ; RV64ZVE32F-NEXT: slli t0, t0, 3
5972 ; RV64ZVE32F-NEXT: add t0, a1, t0
5973 ; RV64ZVE32F-NEXT: sd a4, 0(t0)
5974 ; RV64ZVE32F-NEXT: andi a0, a7, -128
5975 ; RV64ZVE32F-NEXT: bnez a0, .LBB51_8
5976 ; RV64ZVE32F-NEXT: j .LBB51_9
5977 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %idxs
5978 call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m)
5982 declare void @llvm.masked.scatter.v1f16.v1p0(<1 x half>, <1 x ptr>, i32, <1 x i1>)
5984 define void @mscatter_v1f16(<1 x half> %val, <1 x ptr> %ptrs, <1 x i1> %m) {
5985 ; RV32V-LABEL: mscatter_v1f16:
5987 ; RV32V-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
5988 ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t
5991 ; RV64-LABEL: mscatter_v1f16:
5993 ; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
5994 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
5997 ; RV32ZVE32F-LABEL: mscatter_v1f16:
5998 ; RV32ZVE32F: # %bb.0:
5999 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
6000 ; RV32ZVE32F-NEXT: vsoxei32.v v8, (zero), v9, v0.t
6001 ; RV32ZVE32F-NEXT: ret
6003 ; RV64ZVE32F-LABEL: mscatter_v1f16:
6004 ; RV64ZVE32F: # %bb.0:
6005 ; RV64ZVE32F-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
6006 ; RV64ZVE32F-NEXT: vfirst.m a1, v0
6007 ; RV64ZVE32F-NEXT: bnez a1, .LBB52_2
6008 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
6009 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
6010 ; RV64ZVE32F-NEXT: vse16.v v8, (a0)
6011 ; RV64ZVE32F-NEXT: .LBB52_2: # %else
6012 ; RV64ZVE32F-NEXT: ret
6013 call void @llvm.masked.scatter.v1f16.v1p0(<1 x half> %val, <1 x ptr> %ptrs, i32 2, <1 x i1> %m)
6017 declare void @llvm.masked.scatter.v2f16.v2p0(<2 x half>, <2 x ptr>, i32, <2 x i1>)
6019 define void @mscatter_v2f16(<2 x half> %val, <2 x ptr> %ptrs, <2 x i1> %m) {
6020 ; RV32V-LABEL: mscatter_v2f16:
6022 ; RV32V-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
6023 ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t
6026 ; RV64-LABEL: mscatter_v2f16:
6028 ; RV64-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
6029 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
6032 ; RV32ZVE32F-LABEL: mscatter_v2f16:
6033 ; RV32ZVE32F: # %bb.0:
6034 ; RV32ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
6035 ; RV32ZVE32F-NEXT: vsoxei32.v v8, (zero), v9, v0.t
6036 ; RV32ZVE32F-NEXT: ret
6038 ; RV64ZVE32F-LABEL: mscatter_v2f16:
6039 ; RV64ZVE32F: # %bb.0:
6040 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
6041 ; RV64ZVE32F-NEXT: vmv.x.s a2, v0
6042 ; RV64ZVE32F-NEXT: andi a3, a2, 1
6043 ; RV64ZVE32F-NEXT: bnez a3, .LBB53_3
6044 ; RV64ZVE32F-NEXT: # %bb.1: # %else
6045 ; RV64ZVE32F-NEXT: andi a2, a2, 2
6046 ; RV64ZVE32F-NEXT: bnez a2, .LBB53_4
6047 ; RV64ZVE32F-NEXT: .LBB53_2: # %else2
6048 ; RV64ZVE32F-NEXT: ret
6049 ; RV64ZVE32F-NEXT: .LBB53_3: # %cond.store
6050 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
6051 ; RV64ZVE32F-NEXT: vse16.v v8, (a0)
6052 ; RV64ZVE32F-NEXT: andi a2, a2, 2
6053 ; RV64ZVE32F-NEXT: beqz a2, .LBB53_2
6054 ; RV64ZVE32F-NEXT: .LBB53_4: # %cond.store1
6055 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
6056 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
6057 ; RV64ZVE32F-NEXT: vse16.v v8, (a1)
6058 ; RV64ZVE32F-NEXT: ret
6059 call void @llvm.masked.scatter.v2f16.v2p0(<2 x half> %val, <2 x ptr> %ptrs, i32 2, <2 x i1> %m)
6063 declare void @llvm.masked.scatter.v4f16.v4p0(<4 x half>, <4 x ptr>, i32, <4 x i1>)
6065 define void @mscatter_v4f16(<4 x half> %val, <4 x ptr> %ptrs, <4 x i1> %m) {
6066 ; RV32-LABEL: mscatter_v4f16:
6068 ; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
6069 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
6072 ; RV64-LABEL: mscatter_v4f16:
6074 ; RV64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
6075 ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
6078 ; RV64ZVE32F-LABEL: mscatter_v4f16:
6079 ; RV64ZVE32F: # %bb.0:
6080 ; RV64ZVE32F-NEXT: ld a1, 24(a0)
6081 ; RV64ZVE32F-NEXT: ld a2, 16(a0)
6082 ; RV64ZVE32F-NEXT: ld a4, 8(a0)
6083 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
6084 ; RV64ZVE32F-NEXT: vmv.x.s a3, v0
6085 ; RV64ZVE32F-NEXT: andi a5, a3, 1
6086 ; RV64ZVE32F-NEXT: bnez a5, .LBB54_5
6087 ; RV64ZVE32F-NEXT: # %bb.1: # %else
6088 ; RV64ZVE32F-NEXT: andi a0, a3, 2
6089 ; RV64ZVE32F-NEXT: bnez a0, .LBB54_6
6090 ; RV64ZVE32F-NEXT: .LBB54_2: # %else2
6091 ; RV64ZVE32F-NEXT: andi a0, a3, 4
6092 ; RV64ZVE32F-NEXT: bnez a0, .LBB54_7
6093 ; RV64ZVE32F-NEXT: .LBB54_3: # %else4
6094 ; RV64ZVE32F-NEXT: andi a3, a3, 8
6095 ; RV64ZVE32F-NEXT: bnez a3, .LBB54_8
6096 ; RV64ZVE32F-NEXT: .LBB54_4: # %else6
6097 ; RV64ZVE32F-NEXT: ret
6098 ; RV64ZVE32F-NEXT: .LBB54_5: # %cond.store
6099 ; RV64ZVE32F-NEXT: ld a0, 0(a0)
6100 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
6101 ; RV64ZVE32F-NEXT: vse16.v v8, (a0)
6102 ; RV64ZVE32F-NEXT: andi a0, a3, 2
6103 ; RV64ZVE32F-NEXT: beqz a0, .LBB54_2
6104 ; RV64ZVE32F-NEXT: .LBB54_6: # %cond.store1
6105 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
6106 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
6107 ; RV64ZVE32F-NEXT: vse16.v v9, (a4)
6108 ; RV64ZVE32F-NEXT: andi a0, a3, 4
6109 ; RV64ZVE32F-NEXT: beqz a0, .LBB54_3
6110 ; RV64ZVE32F-NEXT: .LBB54_7: # %cond.store3
6111 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
6112 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
6113 ; RV64ZVE32F-NEXT: vse16.v v9, (a2)
6114 ; RV64ZVE32F-NEXT: andi a3, a3, 8
6115 ; RV64ZVE32F-NEXT: beqz a3, .LBB54_4
6116 ; RV64ZVE32F-NEXT: .LBB54_8: # %cond.store5
6117 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
6118 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 3
6119 ; RV64ZVE32F-NEXT: vse16.v v8, (a1)
6120 ; RV64ZVE32F-NEXT: ret
6121 call void @llvm.masked.scatter.v4f16.v4p0(<4 x half> %val, <4 x ptr> %ptrs, i32 2, <4 x i1> %m)
6125 define void @mscatter_truemask_v4f16(<4 x half> %val, <4 x ptr> %ptrs) {
6126 ; RV32-LABEL: mscatter_truemask_v4f16:
6128 ; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
6129 ; RV32-NEXT: vsoxei32.v v8, (zero), v9
6132 ; RV64-LABEL: mscatter_truemask_v4f16:
6134 ; RV64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
6135 ; RV64-NEXT: vsoxei64.v v8, (zero), v10
6138 ; RV64ZVE32F-LABEL: mscatter_truemask_v4f16:
6139 ; RV64ZVE32F: # %bb.0:
6140 ; RV64ZVE32F-NEXT: ld a1, 24(a0)
6141 ; RV64ZVE32F-NEXT: ld a2, 16(a0)
6142 ; RV64ZVE32F-NEXT: ld a4, 8(a0)
6143 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
6144 ; RV64ZVE32F-NEXT: vmset.m v9
6145 ; RV64ZVE32F-NEXT: vmv.x.s a3, v9
6146 ; RV64ZVE32F-NEXT: beqz zero, .LBB55_5
6147 ; RV64ZVE32F-NEXT: # %bb.1: # %else
6148 ; RV64ZVE32F-NEXT: andi a0, a3, 2
6149 ; RV64ZVE32F-NEXT: bnez a0, .LBB55_6
6150 ; RV64ZVE32F-NEXT: .LBB55_2: # %else2
6151 ; RV64ZVE32F-NEXT: andi a0, a3, 4
6152 ; RV64ZVE32F-NEXT: bnez a0, .LBB55_7
6153 ; RV64ZVE32F-NEXT: .LBB55_3: # %else4
6154 ; RV64ZVE32F-NEXT: andi a3, a3, 8
6155 ; RV64ZVE32F-NEXT: bnez a3, .LBB55_8
6156 ; RV64ZVE32F-NEXT: .LBB55_4: # %else6
6157 ; RV64ZVE32F-NEXT: ret
6158 ; RV64ZVE32F-NEXT: .LBB55_5: # %cond.store
6159 ; RV64ZVE32F-NEXT: ld a0, 0(a0)
6160 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
6161 ; RV64ZVE32F-NEXT: vse16.v v8, (a0)
6162 ; RV64ZVE32F-NEXT: andi a0, a3, 2
6163 ; RV64ZVE32F-NEXT: beqz a0, .LBB55_2
6164 ; RV64ZVE32F-NEXT: .LBB55_6: # %cond.store1
6165 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
6166 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
6167 ; RV64ZVE32F-NEXT: vse16.v v9, (a4)
6168 ; RV64ZVE32F-NEXT: andi a0, a3, 4
6169 ; RV64ZVE32F-NEXT: beqz a0, .LBB55_3
6170 ; RV64ZVE32F-NEXT: .LBB55_7: # %cond.store3
6171 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
6172 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
6173 ; RV64ZVE32F-NEXT: vse16.v v9, (a2)
6174 ; RV64ZVE32F-NEXT: andi a3, a3, 8
6175 ; RV64ZVE32F-NEXT: beqz a3, .LBB55_4
6176 ; RV64ZVE32F-NEXT: .LBB55_8: # %cond.store5
6177 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
6178 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 3
6179 ; RV64ZVE32F-NEXT: vse16.v v8, (a1)
6180 ; RV64ZVE32F-NEXT: ret
6181 %mhead = insertelement <4 x i1> poison, i1 1, i32 0
6182 %mtrue = shufflevector <4 x i1> %mhead, <4 x i1> poison, <4 x i32> zeroinitializer
6183 call void @llvm.masked.scatter.v4f16.v4p0(<4 x half> %val, <4 x ptr> %ptrs, i32 2, <4 x i1> %mtrue)
6187 define void @mscatter_falsemask_v4f16(<4 x half> %val, <4 x ptr> %ptrs) {
6188 ; CHECK-LABEL: mscatter_falsemask_v4f16:
6191 call void @llvm.masked.scatter.v4f16.v4p0(<4 x half> %val, <4 x ptr> %ptrs, i32 2, <4 x i1> zeroinitializer)
6195 declare void @llvm.masked.scatter.v8f16.v8p0(<8 x half>, <8 x ptr>, i32, <8 x i1>)
6197 define void @mscatter_v8f16(<8 x half> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
6198 ; RV32-LABEL: mscatter_v8f16:
6200 ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
6201 ; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t
6204 ; RV64-LABEL: mscatter_v8f16:
6206 ; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
6207 ; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t
6210 ; RV64ZVE32F-LABEL: mscatter_v8f16:
6211 ; RV64ZVE32F: # %bb.0:
6212 ; RV64ZVE32F-NEXT: ld a1, 56(a0)
6213 ; RV64ZVE32F-NEXT: ld a2, 48(a0)
6214 ; RV64ZVE32F-NEXT: ld a4, 40(a0)
6215 ; RV64ZVE32F-NEXT: ld a5, 32(a0)
6216 ; RV64ZVE32F-NEXT: ld a6, 24(a0)
6217 ; RV64ZVE32F-NEXT: ld a7, 16(a0)
6218 ; RV64ZVE32F-NEXT: ld t0, 8(a0)
6219 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
6220 ; RV64ZVE32F-NEXT: vmv.x.s a3, v0
6221 ; RV64ZVE32F-NEXT: andi t1, a3, 1
6222 ; RV64ZVE32F-NEXT: bnez t1, .LBB57_9
6223 ; RV64ZVE32F-NEXT: # %bb.1: # %else
6224 ; RV64ZVE32F-NEXT: andi a0, a3, 2
6225 ; RV64ZVE32F-NEXT: bnez a0, .LBB57_10
6226 ; RV64ZVE32F-NEXT: .LBB57_2: # %else2
6227 ; RV64ZVE32F-NEXT: andi a0, a3, 4
6228 ; RV64ZVE32F-NEXT: bnez a0, .LBB57_11
6229 ; RV64ZVE32F-NEXT: .LBB57_3: # %else4
6230 ; RV64ZVE32F-NEXT: andi a0, a3, 8
6231 ; RV64ZVE32F-NEXT: bnez a0, .LBB57_12
6232 ; RV64ZVE32F-NEXT: .LBB57_4: # %else6
6233 ; RV64ZVE32F-NEXT: andi a0, a3, 16
6234 ; RV64ZVE32F-NEXT: bnez a0, .LBB57_13
6235 ; RV64ZVE32F-NEXT: .LBB57_5: # %else8
6236 ; RV64ZVE32F-NEXT: andi a0, a3, 32
6237 ; RV64ZVE32F-NEXT: bnez a0, .LBB57_14
6238 ; RV64ZVE32F-NEXT: .LBB57_6: # %else10
6239 ; RV64ZVE32F-NEXT: andi a0, a3, 64
6240 ; RV64ZVE32F-NEXT: bnez a0, .LBB57_15
6241 ; RV64ZVE32F-NEXT: .LBB57_7: # %else12
6242 ; RV64ZVE32F-NEXT: andi a0, a3, -128
6243 ; RV64ZVE32F-NEXT: bnez a0, .LBB57_16
6244 ; RV64ZVE32F-NEXT: .LBB57_8: # %else14
6245 ; RV64ZVE32F-NEXT: ret
6246 ; RV64ZVE32F-NEXT: .LBB57_9: # %cond.store
6247 ; RV64ZVE32F-NEXT: ld a0, 0(a0)
6248 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6249 ; RV64ZVE32F-NEXT: vse16.v v8, (a0)
6250 ; RV64ZVE32F-NEXT: andi a0, a3, 2
6251 ; RV64ZVE32F-NEXT: beqz a0, .LBB57_2
6252 ; RV64ZVE32F-NEXT: .LBB57_10: # %cond.store1
6253 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6254 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
6255 ; RV64ZVE32F-NEXT: vse16.v v9, (t0)
6256 ; RV64ZVE32F-NEXT: andi a0, a3, 4
6257 ; RV64ZVE32F-NEXT: beqz a0, .LBB57_3
6258 ; RV64ZVE32F-NEXT: .LBB57_11: # %cond.store3
6259 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6260 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
6261 ; RV64ZVE32F-NEXT: vse16.v v9, (a7)
6262 ; RV64ZVE32F-NEXT: andi a0, a3, 8
6263 ; RV64ZVE32F-NEXT: beqz a0, .LBB57_4
6264 ; RV64ZVE32F-NEXT: .LBB57_12: # %cond.store5
6265 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6266 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 3
6267 ; RV64ZVE32F-NEXT: vse16.v v9, (a6)
6268 ; RV64ZVE32F-NEXT: andi a0, a3, 16
6269 ; RV64ZVE32F-NEXT: beqz a0, .LBB57_5
6270 ; RV64ZVE32F-NEXT: .LBB57_13: # %cond.store7
6271 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6272 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
6273 ; RV64ZVE32F-NEXT: vse16.v v9, (a5)
6274 ; RV64ZVE32F-NEXT: andi a0, a3, 32
6275 ; RV64ZVE32F-NEXT: beqz a0, .LBB57_6
6276 ; RV64ZVE32F-NEXT: .LBB57_14: # %cond.store9
6277 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6278 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5
6279 ; RV64ZVE32F-NEXT: vse16.v v9, (a4)
6280 ; RV64ZVE32F-NEXT: andi a0, a3, 64
6281 ; RV64ZVE32F-NEXT: beqz a0, .LBB57_7
6282 ; RV64ZVE32F-NEXT: .LBB57_15: # %cond.store11
6283 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6284 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 6
6285 ; RV64ZVE32F-NEXT: vse16.v v9, (a2)
6286 ; RV64ZVE32F-NEXT: andi a0, a3, -128
6287 ; RV64ZVE32F-NEXT: beqz a0, .LBB57_8
6288 ; RV64ZVE32F-NEXT: .LBB57_16: # %cond.store13
6289 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6290 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
6291 ; RV64ZVE32F-NEXT: vse16.v v8, (a1)
6292 ; RV64ZVE32F-NEXT: ret
6293 call void @llvm.masked.scatter.v8f16.v8p0(<8 x half> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> %m)
6297 define void @mscatter_baseidx_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) {
6298 ; RV32-LABEL: mscatter_baseidx_v8i8_v8f16:
6300 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
6301 ; RV32-NEXT: vsext.vf4 v10, v9
6302 ; RV32-NEXT: vadd.vv v10, v10, v10
6303 ; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma
6304 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
6307 ; RV64-LABEL: mscatter_baseidx_v8i8_v8f16:
6309 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
6310 ; RV64-NEXT: vsext.vf8 v12, v9
6311 ; RV64-NEXT: vadd.vv v12, v12, v12
6312 ; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, ma
6313 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
6316 ; RV64ZVE32F-LABEL: mscatter_baseidx_v8i8_v8f16:
6317 ; RV64ZVE32F: # %bb.0:
6318 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
6319 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
6320 ; RV64ZVE32F-NEXT: andi a2, a1, 1
6321 ; RV64ZVE32F-NEXT: beqz a2, .LBB58_2
6322 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
6323 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
6324 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6325 ; RV64ZVE32F-NEXT: add a2, a0, a2
6326 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6327 ; RV64ZVE32F-NEXT: vse16.v v8, (a2)
6328 ; RV64ZVE32F-NEXT: .LBB58_2: # %else
6329 ; RV64ZVE32F-NEXT: andi a2, a1, 2
6330 ; RV64ZVE32F-NEXT: beqz a2, .LBB58_4
6331 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
6332 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
6333 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1
6334 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
6335 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6336 ; RV64ZVE32F-NEXT: add a2, a0, a2
6337 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6338 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
6339 ; RV64ZVE32F-NEXT: vse16.v v10, (a2)
6340 ; RV64ZVE32F-NEXT: .LBB58_4: # %else2
6341 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
6342 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4
6343 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
6344 ; RV64ZVE32F-NEXT: andi a2, a1, 4
6345 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2
6346 ; RV64ZVE32F-NEXT: bnez a2, .LBB58_12
6347 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
6348 ; RV64ZVE32F-NEXT: andi a2, a1, 8
6349 ; RV64ZVE32F-NEXT: bnez a2, .LBB58_13
6350 ; RV64ZVE32F-NEXT: .LBB58_6: # %else6
6351 ; RV64ZVE32F-NEXT: andi a2, a1, 16
6352 ; RV64ZVE32F-NEXT: bnez a2, .LBB58_14
6353 ; RV64ZVE32F-NEXT: .LBB58_7: # %else8
6354 ; RV64ZVE32F-NEXT: andi a2, a1, 32
6355 ; RV64ZVE32F-NEXT: beqz a2, .LBB58_9
6356 ; RV64ZVE32F-NEXT: .LBB58_8: # %cond.store9
6357 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
6358 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 1
6359 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
6360 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6361 ; RV64ZVE32F-NEXT: add a2, a0, a2
6362 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6363 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5
6364 ; RV64ZVE32F-NEXT: vse16.v v9, (a2)
6365 ; RV64ZVE32F-NEXT: .LBB58_9: # %else10
6366 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
6367 ; RV64ZVE32F-NEXT: andi a2, a1, 64
6368 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 2
6369 ; RV64ZVE32F-NEXT: bnez a2, .LBB58_15
6370 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
6371 ; RV64ZVE32F-NEXT: andi a1, a1, -128
6372 ; RV64ZVE32F-NEXT: bnez a1, .LBB58_16
6373 ; RV64ZVE32F-NEXT: .LBB58_11: # %else14
6374 ; RV64ZVE32F-NEXT: ret
6375 ; RV64ZVE32F-NEXT: .LBB58_12: # %cond.store3
6376 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
6377 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6378 ; RV64ZVE32F-NEXT: add a2, a0, a2
6379 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6380 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2
6381 ; RV64ZVE32F-NEXT: vse16.v v11, (a2)
6382 ; RV64ZVE32F-NEXT: andi a2, a1, 8
6383 ; RV64ZVE32F-NEXT: beqz a2, .LBB58_6
6384 ; RV64ZVE32F-NEXT: .LBB58_13: # %cond.store5
6385 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
6386 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
6387 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
6388 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6389 ; RV64ZVE32F-NEXT: add a2, a0, a2
6390 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6391 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 3
6392 ; RV64ZVE32F-NEXT: vse16.v v9, (a2)
6393 ; RV64ZVE32F-NEXT: andi a2, a1, 16
6394 ; RV64ZVE32F-NEXT: beqz a2, .LBB58_7
6395 ; RV64ZVE32F-NEXT: .LBB58_14: # %cond.store7
6396 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
6397 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
6398 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6399 ; RV64ZVE32F-NEXT: add a2, a0, a2
6400 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6401 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
6402 ; RV64ZVE32F-NEXT: vse16.v v9, (a2)
6403 ; RV64ZVE32F-NEXT: andi a2, a1, 32
6404 ; RV64ZVE32F-NEXT: bnez a2, .LBB58_8
6405 ; RV64ZVE32F-NEXT: j .LBB58_9
6406 ; RV64ZVE32F-NEXT: .LBB58_15: # %cond.store11
6407 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
6408 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6409 ; RV64ZVE32F-NEXT: add a2, a0, a2
6410 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6411 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 6
6412 ; RV64ZVE32F-NEXT: vse16.v v10, (a2)
6413 ; RV64ZVE32F-NEXT: andi a1, a1, -128
6414 ; RV64ZVE32F-NEXT: beqz a1, .LBB58_11
6415 ; RV64ZVE32F-NEXT: .LBB58_16: # %cond.store13
6416 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
6417 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
6418 ; RV64ZVE32F-NEXT: vmv.x.s a1, v9
6419 ; RV64ZVE32F-NEXT: slli a1, a1, 1
6420 ; RV64ZVE32F-NEXT: add a0, a0, a1
6421 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6422 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
6423 ; RV64ZVE32F-NEXT: vse16.v v8, (a0)
6424 ; RV64ZVE32F-NEXT: ret
6425 %ptrs = getelementptr inbounds half, ptr %base, <8 x i8> %idxs
6426 call void @llvm.masked.scatter.v8f16.v8p0(<8 x half> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> %m)
6430 define void @mscatter_baseidx_sext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) {
6431 ; RV32-LABEL: mscatter_baseidx_sext_v8i8_v8f16:
6433 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
6434 ; RV32-NEXT: vsext.vf4 v10, v9
6435 ; RV32-NEXT: vadd.vv v10, v10, v10
6436 ; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma
6437 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
6440 ; RV64-LABEL: mscatter_baseidx_sext_v8i8_v8f16:
6442 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
6443 ; RV64-NEXT: vsext.vf8 v12, v9
6444 ; RV64-NEXT: vadd.vv v12, v12, v12
6445 ; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, ma
6446 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
6449 ; RV64ZVE32F-LABEL: mscatter_baseidx_sext_v8i8_v8f16:
6450 ; RV64ZVE32F: # %bb.0:
6451 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
6452 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
6453 ; RV64ZVE32F-NEXT: andi a2, a1, 1
6454 ; RV64ZVE32F-NEXT: beqz a2, .LBB59_2
6455 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
6456 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
6457 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6458 ; RV64ZVE32F-NEXT: add a2, a0, a2
6459 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6460 ; RV64ZVE32F-NEXT: vse16.v v8, (a2)
6461 ; RV64ZVE32F-NEXT: .LBB59_2: # %else
6462 ; RV64ZVE32F-NEXT: andi a2, a1, 2
6463 ; RV64ZVE32F-NEXT: beqz a2, .LBB59_4
6464 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
6465 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
6466 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1
6467 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
6468 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6469 ; RV64ZVE32F-NEXT: add a2, a0, a2
6470 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6471 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
6472 ; RV64ZVE32F-NEXT: vse16.v v10, (a2)
6473 ; RV64ZVE32F-NEXT: .LBB59_4: # %else2
6474 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
6475 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4
6476 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
6477 ; RV64ZVE32F-NEXT: andi a2, a1, 4
6478 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2
6479 ; RV64ZVE32F-NEXT: bnez a2, .LBB59_12
6480 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
6481 ; RV64ZVE32F-NEXT: andi a2, a1, 8
6482 ; RV64ZVE32F-NEXT: bnez a2, .LBB59_13
6483 ; RV64ZVE32F-NEXT: .LBB59_6: # %else6
6484 ; RV64ZVE32F-NEXT: andi a2, a1, 16
6485 ; RV64ZVE32F-NEXT: bnez a2, .LBB59_14
6486 ; RV64ZVE32F-NEXT: .LBB59_7: # %else8
6487 ; RV64ZVE32F-NEXT: andi a2, a1, 32
6488 ; RV64ZVE32F-NEXT: beqz a2, .LBB59_9
6489 ; RV64ZVE32F-NEXT: .LBB59_8: # %cond.store9
6490 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
6491 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 1
6492 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
6493 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6494 ; RV64ZVE32F-NEXT: add a2, a0, a2
6495 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6496 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5
6497 ; RV64ZVE32F-NEXT: vse16.v v9, (a2)
6498 ; RV64ZVE32F-NEXT: .LBB59_9: # %else10
6499 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
6500 ; RV64ZVE32F-NEXT: andi a2, a1, 64
6501 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 2
6502 ; RV64ZVE32F-NEXT: bnez a2, .LBB59_15
6503 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
6504 ; RV64ZVE32F-NEXT: andi a1, a1, -128
6505 ; RV64ZVE32F-NEXT: bnez a1, .LBB59_16
6506 ; RV64ZVE32F-NEXT: .LBB59_11: # %else14
6507 ; RV64ZVE32F-NEXT: ret
6508 ; RV64ZVE32F-NEXT: .LBB59_12: # %cond.store3
6509 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
6510 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6511 ; RV64ZVE32F-NEXT: add a2, a0, a2
6512 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6513 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2
6514 ; RV64ZVE32F-NEXT: vse16.v v11, (a2)
6515 ; RV64ZVE32F-NEXT: andi a2, a1, 8
6516 ; RV64ZVE32F-NEXT: beqz a2, .LBB59_6
6517 ; RV64ZVE32F-NEXT: .LBB59_13: # %cond.store5
6518 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
6519 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
6520 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
6521 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6522 ; RV64ZVE32F-NEXT: add a2, a0, a2
6523 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6524 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 3
6525 ; RV64ZVE32F-NEXT: vse16.v v9, (a2)
6526 ; RV64ZVE32F-NEXT: andi a2, a1, 16
6527 ; RV64ZVE32F-NEXT: beqz a2, .LBB59_7
6528 ; RV64ZVE32F-NEXT: .LBB59_14: # %cond.store7
6529 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
6530 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
6531 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6532 ; RV64ZVE32F-NEXT: add a2, a0, a2
6533 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6534 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
6535 ; RV64ZVE32F-NEXT: vse16.v v9, (a2)
6536 ; RV64ZVE32F-NEXT: andi a2, a1, 32
6537 ; RV64ZVE32F-NEXT: bnez a2, .LBB59_8
6538 ; RV64ZVE32F-NEXT: j .LBB59_9
6539 ; RV64ZVE32F-NEXT: .LBB59_15: # %cond.store11
6540 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
6541 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6542 ; RV64ZVE32F-NEXT: add a2, a0, a2
6543 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6544 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 6
6545 ; RV64ZVE32F-NEXT: vse16.v v10, (a2)
6546 ; RV64ZVE32F-NEXT: andi a1, a1, -128
6547 ; RV64ZVE32F-NEXT: beqz a1, .LBB59_11
6548 ; RV64ZVE32F-NEXT: .LBB59_16: # %cond.store13
6549 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
6550 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
6551 ; RV64ZVE32F-NEXT: vmv.x.s a1, v9
6552 ; RV64ZVE32F-NEXT: slli a1, a1, 1
6553 ; RV64ZVE32F-NEXT: add a0, a0, a1
6554 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6555 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
6556 ; RV64ZVE32F-NEXT: vse16.v v8, (a0)
6557 ; RV64ZVE32F-NEXT: ret
6558 %eidxs = sext <8 x i8> %idxs to <8 x i16>
6559 %ptrs = getelementptr inbounds half, ptr %base, <8 x i16> %eidxs
6560 call void @llvm.masked.scatter.v8f16.v8p0(<8 x half> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> %m)
6564 define void @mscatter_baseidx_zext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) {
6565 ; RV32-LABEL: mscatter_baseidx_zext_v8i8_v8f16:
6567 ; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
6568 ; RV32-NEXT: vwaddu.vv v10, v9, v9
6569 ; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma
6570 ; RV32-NEXT: vsoxei16.v v8, (a0), v10, v0.t
6573 ; RV64-LABEL: mscatter_baseidx_zext_v8i8_v8f16:
6575 ; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
6576 ; RV64-NEXT: vwaddu.vv v10, v9, v9
6577 ; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, ma
6578 ; RV64-NEXT: vsoxei16.v v8, (a0), v10, v0.t
6581 ; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i8_v8f16:
6582 ; RV64ZVE32F: # %bb.0:
6583 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
6584 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
6585 ; RV64ZVE32F-NEXT: andi a2, a1, 1
6586 ; RV64ZVE32F-NEXT: beqz a2, .LBB60_2
6587 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
6588 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
6589 ; RV64ZVE32F-NEXT: andi a2, a2, 255
6590 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6591 ; RV64ZVE32F-NEXT: add a2, a0, a2
6592 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6593 ; RV64ZVE32F-NEXT: vse16.v v8, (a2)
6594 ; RV64ZVE32F-NEXT: .LBB60_2: # %else
6595 ; RV64ZVE32F-NEXT: andi a2, a1, 2
6596 ; RV64ZVE32F-NEXT: beqz a2, .LBB60_4
6597 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
6598 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
6599 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1
6600 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
6601 ; RV64ZVE32F-NEXT: andi a2, a2, 255
6602 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6603 ; RV64ZVE32F-NEXT: add a2, a0, a2
6604 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6605 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
6606 ; RV64ZVE32F-NEXT: vse16.v v10, (a2)
6607 ; RV64ZVE32F-NEXT: .LBB60_4: # %else2
6608 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
6609 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4
6610 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
6611 ; RV64ZVE32F-NEXT: andi a2, a1, 4
6612 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2
6613 ; RV64ZVE32F-NEXT: bnez a2, .LBB60_12
6614 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
6615 ; RV64ZVE32F-NEXT: andi a2, a1, 8
6616 ; RV64ZVE32F-NEXT: bnez a2, .LBB60_13
6617 ; RV64ZVE32F-NEXT: .LBB60_6: # %else6
6618 ; RV64ZVE32F-NEXT: andi a2, a1, 16
6619 ; RV64ZVE32F-NEXT: bnez a2, .LBB60_14
6620 ; RV64ZVE32F-NEXT: .LBB60_7: # %else8
6621 ; RV64ZVE32F-NEXT: andi a2, a1, 32
6622 ; RV64ZVE32F-NEXT: beqz a2, .LBB60_9
6623 ; RV64ZVE32F-NEXT: .LBB60_8: # %cond.store9
6624 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
6625 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 1
6626 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
6627 ; RV64ZVE32F-NEXT: andi a2, a2, 255
6628 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6629 ; RV64ZVE32F-NEXT: add a2, a0, a2
6630 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6631 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5
6632 ; RV64ZVE32F-NEXT: vse16.v v9, (a2)
6633 ; RV64ZVE32F-NEXT: .LBB60_9: # %else10
6634 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
6635 ; RV64ZVE32F-NEXT: andi a2, a1, 64
6636 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 2
6637 ; RV64ZVE32F-NEXT: bnez a2, .LBB60_15
6638 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
6639 ; RV64ZVE32F-NEXT: andi a1, a1, -128
6640 ; RV64ZVE32F-NEXT: bnez a1, .LBB60_16
6641 ; RV64ZVE32F-NEXT: .LBB60_11: # %else14
6642 ; RV64ZVE32F-NEXT: ret
6643 ; RV64ZVE32F-NEXT: .LBB60_12: # %cond.store3
6644 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
6645 ; RV64ZVE32F-NEXT: andi a2, a2, 255
6646 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6647 ; RV64ZVE32F-NEXT: add a2, a0, a2
6648 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6649 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2
6650 ; RV64ZVE32F-NEXT: vse16.v v11, (a2)
6651 ; RV64ZVE32F-NEXT: andi a2, a1, 8
6652 ; RV64ZVE32F-NEXT: beqz a2, .LBB60_6
6653 ; RV64ZVE32F-NEXT: .LBB60_13: # %cond.store5
6654 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
6655 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
6656 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
6657 ; RV64ZVE32F-NEXT: andi a2, a2, 255
6658 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6659 ; RV64ZVE32F-NEXT: add a2, a0, a2
6660 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6661 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 3
6662 ; RV64ZVE32F-NEXT: vse16.v v9, (a2)
6663 ; RV64ZVE32F-NEXT: andi a2, a1, 16
6664 ; RV64ZVE32F-NEXT: beqz a2, .LBB60_7
6665 ; RV64ZVE32F-NEXT: .LBB60_14: # %cond.store7
6666 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
6667 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
6668 ; RV64ZVE32F-NEXT: andi a2, a2, 255
6669 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6670 ; RV64ZVE32F-NEXT: add a2, a0, a2
6671 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6672 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
6673 ; RV64ZVE32F-NEXT: vse16.v v9, (a2)
6674 ; RV64ZVE32F-NEXT: andi a2, a1, 32
6675 ; RV64ZVE32F-NEXT: bnez a2, .LBB60_8
6676 ; RV64ZVE32F-NEXT: j .LBB60_9
6677 ; RV64ZVE32F-NEXT: .LBB60_15: # %cond.store11
6678 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
6679 ; RV64ZVE32F-NEXT: andi a2, a2, 255
6680 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6681 ; RV64ZVE32F-NEXT: add a2, a0, a2
6682 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6683 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 6
6684 ; RV64ZVE32F-NEXT: vse16.v v10, (a2)
6685 ; RV64ZVE32F-NEXT: andi a1, a1, -128
6686 ; RV64ZVE32F-NEXT: beqz a1, .LBB60_11
6687 ; RV64ZVE32F-NEXT: .LBB60_16: # %cond.store13
6688 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
6689 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
6690 ; RV64ZVE32F-NEXT: vmv.x.s a1, v9
6691 ; RV64ZVE32F-NEXT: andi a1, a1, 255
6692 ; RV64ZVE32F-NEXT: slli a1, a1, 1
6693 ; RV64ZVE32F-NEXT: add a0, a0, a1
6694 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6695 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
6696 ; RV64ZVE32F-NEXT: vse16.v v8, (a0)
6697 ; RV64ZVE32F-NEXT: ret
6698 %eidxs = zext <8 x i8> %idxs to <8 x i16>
6699 %ptrs = getelementptr inbounds half, ptr %base, <8 x i16> %eidxs
6700 call void @llvm.masked.scatter.v8f16.v8p0(<8 x half> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> %m)
6704 define void @mscatter_baseidx_v8f16(<8 x half> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m) {
6705 ; RV32-LABEL: mscatter_baseidx_v8f16:
6707 ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
6708 ; RV32-NEXT: vwadd.vv v10, v9, v9
6709 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
6712 ; RV64-LABEL: mscatter_baseidx_v8f16:
6714 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
6715 ; RV64-NEXT: vsext.vf4 v12, v9
6716 ; RV64-NEXT: vadd.vv v12, v12, v12
6717 ; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, ma
6718 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
6721 ; RV64ZVE32F-LABEL: mscatter_baseidx_v8f16:
6722 ; RV64ZVE32F: # %bb.0:
6723 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
6724 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
6725 ; RV64ZVE32F-NEXT: andi a2, a1, 1
6726 ; RV64ZVE32F-NEXT: beqz a2, .LBB61_2
6727 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
6728 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6729 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
6730 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6731 ; RV64ZVE32F-NEXT: add a2, a0, a2
6732 ; RV64ZVE32F-NEXT: vse16.v v8, (a2)
6733 ; RV64ZVE32F-NEXT: .LBB61_2: # %else
6734 ; RV64ZVE32F-NEXT: andi a2, a1, 2
6735 ; RV64ZVE32F-NEXT: beqz a2, .LBB61_4
6736 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
6737 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
6738 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1
6739 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
6740 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6741 ; RV64ZVE32F-NEXT: add a2, a0, a2
6742 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6743 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
6744 ; RV64ZVE32F-NEXT: vse16.v v10, (a2)
6745 ; RV64ZVE32F-NEXT: .LBB61_4: # %else2
6746 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
6747 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4
6748 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
6749 ; RV64ZVE32F-NEXT: andi a2, a1, 4
6750 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2
6751 ; RV64ZVE32F-NEXT: bnez a2, .LBB61_12
6752 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
6753 ; RV64ZVE32F-NEXT: andi a2, a1, 8
6754 ; RV64ZVE32F-NEXT: bnez a2, .LBB61_13
6755 ; RV64ZVE32F-NEXT: .LBB61_6: # %else6
6756 ; RV64ZVE32F-NEXT: andi a2, a1, 16
6757 ; RV64ZVE32F-NEXT: bnez a2, .LBB61_14
6758 ; RV64ZVE32F-NEXT: .LBB61_7: # %else8
6759 ; RV64ZVE32F-NEXT: andi a2, a1, 32
6760 ; RV64ZVE32F-NEXT: beqz a2, .LBB61_9
6761 ; RV64ZVE32F-NEXT: .LBB61_8: # %cond.store9
6762 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
6763 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 1
6764 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
6765 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6766 ; RV64ZVE32F-NEXT: add a2, a0, a2
6767 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6768 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5
6769 ; RV64ZVE32F-NEXT: vse16.v v9, (a2)
6770 ; RV64ZVE32F-NEXT: .LBB61_9: # %else10
6771 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
6772 ; RV64ZVE32F-NEXT: andi a2, a1, 64
6773 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 2
6774 ; RV64ZVE32F-NEXT: bnez a2, .LBB61_15
6775 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
6776 ; RV64ZVE32F-NEXT: andi a1, a1, -128
6777 ; RV64ZVE32F-NEXT: bnez a1, .LBB61_16
6778 ; RV64ZVE32F-NEXT: .LBB61_11: # %else14
6779 ; RV64ZVE32F-NEXT: ret
6780 ; RV64ZVE32F-NEXT: .LBB61_12: # %cond.store3
6781 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
6782 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6783 ; RV64ZVE32F-NEXT: add a2, a0, a2
6784 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6785 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2
6786 ; RV64ZVE32F-NEXT: vse16.v v11, (a2)
6787 ; RV64ZVE32F-NEXT: andi a2, a1, 8
6788 ; RV64ZVE32F-NEXT: beqz a2, .LBB61_6
6789 ; RV64ZVE32F-NEXT: .LBB61_13: # %cond.store5
6790 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
6791 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
6792 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
6793 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6794 ; RV64ZVE32F-NEXT: add a2, a0, a2
6795 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6796 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 3
6797 ; RV64ZVE32F-NEXT: vse16.v v9, (a2)
6798 ; RV64ZVE32F-NEXT: andi a2, a1, 16
6799 ; RV64ZVE32F-NEXT: beqz a2, .LBB61_7
6800 ; RV64ZVE32F-NEXT: .LBB61_14: # %cond.store7
6801 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6802 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
6803 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6804 ; RV64ZVE32F-NEXT: add a2, a0, a2
6805 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
6806 ; RV64ZVE32F-NEXT: vse16.v v9, (a2)
6807 ; RV64ZVE32F-NEXT: andi a2, a1, 32
6808 ; RV64ZVE32F-NEXT: bnez a2, .LBB61_8
6809 ; RV64ZVE32F-NEXT: j .LBB61_9
6810 ; RV64ZVE32F-NEXT: .LBB61_15: # %cond.store11
6811 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
6812 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6813 ; RV64ZVE32F-NEXT: add a2, a0, a2
6814 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6815 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 6
6816 ; RV64ZVE32F-NEXT: vse16.v v10, (a2)
6817 ; RV64ZVE32F-NEXT: andi a1, a1, -128
6818 ; RV64ZVE32F-NEXT: beqz a1, .LBB61_11
6819 ; RV64ZVE32F-NEXT: .LBB61_16: # %cond.store13
6820 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
6821 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
6822 ; RV64ZVE32F-NEXT: vmv.x.s a1, v9
6823 ; RV64ZVE32F-NEXT: slli a1, a1, 1
6824 ; RV64ZVE32F-NEXT: add a0, a0, a1
6825 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6826 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
6827 ; RV64ZVE32F-NEXT: vse16.v v8, (a0)
6828 ; RV64ZVE32F-NEXT: ret
6829 %ptrs = getelementptr inbounds half, ptr %base, <8 x i16> %idxs
6830 call void @llvm.masked.scatter.v8f16.v8p0(<8 x half> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> %m)
6834 declare void @llvm.masked.scatter.v1f32.v1p0(<1 x float>, <1 x ptr>, i32, <1 x i1>)
6836 define void @mscatter_v1f32(<1 x float> %val, <1 x ptr> %ptrs, <1 x i1> %m) {
6837 ; RV32V-LABEL: mscatter_v1f32:
6839 ; RV32V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
6840 ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t
6843 ; RV64-LABEL: mscatter_v1f32:
6845 ; RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
6846 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
6849 ; RV32ZVE32F-LABEL: mscatter_v1f32:
6850 ; RV32ZVE32F: # %bb.0:
6851 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
6852 ; RV32ZVE32F-NEXT: vsoxei32.v v8, (zero), v9, v0.t
6853 ; RV32ZVE32F-NEXT: ret
6855 ; RV64ZVE32F-LABEL: mscatter_v1f32:
6856 ; RV64ZVE32F: # %bb.0:
6857 ; RV64ZVE32F-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
6858 ; RV64ZVE32F-NEXT: vfirst.m a1, v0
6859 ; RV64ZVE32F-NEXT: bnez a1, .LBB62_2
6860 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
6861 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
6862 ; RV64ZVE32F-NEXT: vse32.v v8, (a0)
6863 ; RV64ZVE32F-NEXT: .LBB62_2: # %else
6864 ; RV64ZVE32F-NEXT: ret
6865 call void @llvm.masked.scatter.v1f32.v1p0(<1 x float> %val, <1 x ptr> %ptrs, i32 4, <1 x i1> %m)
6869 declare void @llvm.masked.scatter.v2f32.v2p0(<2 x float>, <2 x ptr>, i32, <2 x i1>)
6871 define void @mscatter_v2f32(<2 x float> %val, <2 x ptr> %ptrs, <2 x i1> %m) {
6872 ; RV32V-LABEL: mscatter_v2f32:
6874 ; RV32V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
6875 ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t
6878 ; RV64-LABEL: mscatter_v2f32:
6880 ; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
6881 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
6884 ; RV32ZVE32F-LABEL: mscatter_v2f32:
6885 ; RV32ZVE32F: # %bb.0:
6886 ; RV32ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
6887 ; RV32ZVE32F-NEXT: vsoxei32.v v8, (zero), v9, v0.t
6888 ; RV32ZVE32F-NEXT: ret
6890 ; RV64ZVE32F-LABEL: mscatter_v2f32:
6891 ; RV64ZVE32F: # %bb.0:
6892 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
6893 ; RV64ZVE32F-NEXT: vmv.x.s a2, v0
6894 ; RV64ZVE32F-NEXT: andi a3, a2, 1
6895 ; RV64ZVE32F-NEXT: bnez a3, .LBB63_3
6896 ; RV64ZVE32F-NEXT: # %bb.1: # %else
6897 ; RV64ZVE32F-NEXT: andi a2, a2, 2
6898 ; RV64ZVE32F-NEXT: bnez a2, .LBB63_4
6899 ; RV64ZVE32F-NEXT: .LBB63_2: # %else2
6900 ; RV64ZVE32F-NEXT: ret
6901 ; RV64ZVE32F-NEXT: .LBB63_3: # %cond.store
6902 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
6903 ; RV64ZVE32F-NEXT: vse32.v v8, (a0)
6904 ; RV64ZVE32F-NEXT: andi a2, a2, 2
6905 ; RV64ZVE32F-NEXT: beqz a2, .LBB63_2
6906 ; RV64ZVE32F-NEXT: .LBB63_4: # %cond.store1
6907 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
6908 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
6909 ; RV64ZVE32F-NEXT: vse32.v v8, (a1)
6910 ; RV64ZVE32F-NEXT: ret
6911 call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> %val, <2 x ptr> %ptrs, i32 4, <2 x i1> %m)
6915 declare void @llvm.masked.scatter.v4f32.v4p0(<4 x float>, <4 x ptr>, i32, <4 x i1>)
6917 define void @mscatter_v4f32(<4 x float> %val, <4 x ptr> %ptrs, <4 x i1> %m) {
6918 ; RV32-LABEL: mscatter_v4f32:
6920 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
6921 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
6924 ; RV64-LABEL: mscatter_v4f32:
6926 ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
6927 ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
6930 ; RV64ZVE32F-LABEL: mscatter_v4f32:
6931 ; RV64ZVE32F: # %bb.0:
6932 ; RV64ZVE32F-NEXT: ld a1, 24(a0)
6933 ; RV64ZVE32F-NEXT: ld a2, 16(a0)
6934 ; RV64ZVE32F-NEXT: ld a4, 8(a0)
6935 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
6936 ; RV64ZVE32F-NEXT: vmv.x.s a3, v0
6937 ; RV64ZVE32F-NEXT: andi a5, a3, 1
6938 ; RV64ZVE32F-NEXT: bnez a5, .LBB64_5
6939 ; RV64ZVE32F-NEXT: # %bb.1: # %else
6940 ; RV64ZVE32F-NEXT: andi a0, a3, 2
6941 ; RV64ZVE32F-NEXT: bnez a0, .LBB64_6
6942 ; RV64ZVE32F-NEXT: .LBB64_2: # %else2
6943 ; RV64ZVE32F-NEXT: andi a0, a3, 4
6944 ; RV64ZVE32F-NEXT: bnez a0, .LBB64_7
6945 ; RV64ZVE32F-NEXT: .LBB64_3: # %else4
6946 ; RV64ZVE32F-NEXT: andi a3, a3, 8
6947 ; RV64ZVE32F-NEXT: bnez a3, .LBB64_8
6948 ; RV64ZVE32F-NEXT: .LBB64_4: # %else6
6949 ; RV64ZVE32F-NEXT: ret
6950 ; RV64ZVE32F-NEXT: .LBB64_5: # %cond.store
6951 ; RV64ZVE32F-NEXT: ld a0, 0(a0)
6952 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
6953 ; RV64ZVE32F-NEXT: vse32.v v8, (a0)
6954 ; RV64ZVE32F-NEXT: andi a0, a3, 2
6955 ; RV64ZVE32F-NEXT: beqz a0, .LBB64_2
6956 ; RV64ZVE32F-NEXT: .LBB64_6: # %cond.store1
6957 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
6958 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
6959 ; RV64ZVE32F-NEXT: vse32.v v9, (a4)
6960 ; RV64ZVE32F-NEXT: andi a0, a3, 4
6961 ; RV64ZVE32F-NEXT: beqz a0, .LBB64_3
6962 ; RV64ZVE32F-NEXT: .LBB64_7: # %cond.store3
6963 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
6964 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
6965 ; RV64ZVE32F-NEXT: vse32.v v9, (a2)
6966 ; RV64ZVE32F-NEXT: andi a3, a3, 8
6967 ; RV64ZVE32F-NEXT: beqz a3, .LBB64_4
6968 ; RV64ZVE32F-NEXT: .LBB64_8: # %cond.store5
6969 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
6970 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 3
6971 ; RV64ZVE32F-NEXT: vse32.v v8, (a1)
6972 ; RV64ZVE32F-NEXT: ret
6973 call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> %val, <4 x ptr> %ptrs, i32 4, <4 x i1> %m)
6977 define void @mscatter_truemask_v4f32(<4 x float> %val, <4 x ptr> %ptrs) {
6978 ; RV32-LABEL: mscatter_truemask_v4f32:
6980 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
6981 ; RV32-NEXT: vsoxei32.v v8, (zero), v9
6984 ; RV64-LABEL: mscatter_truemask_v4f32:
6986 ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
6987 ; RV64-NEXT: vsoxei64.v v8, (zero), v10
6990 ; RV64ZVE32F-LABEL: mscatter_truemask_v4f32:
6991 ; RV64ZVE32F: # %bb.0:
6992 ; RV64ZVE32F-NEXT: ld a1, 24(a0)
6993 ; RV64ZVE32F-NEXT: ld a2, 16(a0)
6994 ; RV64ZVE32F-NEXT: ld a4, 8(a0)
6995 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
6996 ; RV64ZVE32F-NEXT: vmset.m v9
6997 ; RV64ZVE32F-NEXT: vmv.x.s a3, v9
6998 ; RV64ZVE32F-NEXT: beqz zero, .LBB65_5
6999 ; RV64ZVE32F-NEXT: # %bb.1: # %else
7000 ; RV64ZVE32F-NEXT: andi a0, a3, 2
7001 ; RV64ZVE32F-NEXT: bnez a0, .LBB65_6
7002 ; RV64ZVE32F-NEXT: .LBB65_2: # %else2
7003 ; RV64ZVE32F-NEXT: andi a0, a3, 4
7004 ; RV64ZVE32F-NEXT: bnez a0, .LBB65_7
7005 ; RV64ZVE32F-NEXT: .LBB65_3: # %else4
7006 ; RV64ZVE32F-NEXT: andi a3, a3, 8
7007 ; RV64ZVE32F-NEXT: bnez a3, .LBB65_8
7008 ; RV64ZVE32F-NEXT: .LBB65_4: # %else6
7009 ; RV64ZVE32F-NEXT: ret
7010 ; RV64ZVE32F-NEXT: .LBB65_5: # %cond.store
7011 ; RV64ZVE32F-NEXT: ld a0, 0(a0)
7012 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7013 ; RV64ZVE32F-NEXT: vse32.v v8, (a0)
7014 ; RV64ZVE32F-NEXT: andi a0, a3, 2
7015 ; RV64ZVE32F-NEXT: beqz a0, .LBB65_2
7016 ; RV64ZVE32F-NEXT: .LBB65_6: # %cond.store1
7017 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7018 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
7019 ; RV64ZVE32F-NEXT: vse32.v v9, (a4)
7020 ; RV64ZVE32F-NEXT: andi a0, a3, 4
7021 ; RV64ZVE32F-NEXT: beqz a0, .LBB65_3
7022 ; RV64ZVE32F-NEXT: .LBB65_7: # %cond.store3
7023 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7024 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
7025 ; RV64ZVE32F-NEXT: vse32.v v9, (a2)
7026 ; RV64ZVE32F-NEXT: andi a3, a3, 8
7027 ; RV64ZVE32F-NEXT: beqz a3, .LBB65_4
7028 ; RV64ZVE32F-NEXT: .LBB65_8: # %cond.store5
7029 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7030 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 3
7031 ; RV64ZVE32F-NEXT: vse32.v v8, (a1)
7032 ; RV64ZVE32F-NEXT: ret
7033 %mhead = insertelement <4 x i1> poison, i1 1, i32 0
7034 %mtrue = shufflevector <4 x i1> %mhead, <4 x i1> poison, <4 x i32> zeroinitializer
7035 call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> %val, <4 x ptr> %ptrs, i32 4, <4 x i1> %mtrue)
7039 define void @mscatter_falsemask_v4f32(<4 x float> %val, <4 x ptr> %ptrs) {
7040 ; CHECK-LABEL: mscatter_falsemask_v4f32:
7043 call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> %val, <4 x ptr> %ptrs, i32 4, <4 x i1> zeroinitializer)
7047 declare void @llvm.masked.scatter.v8f32.v8p0(<8 x float>, <8 x ptr>, i32, <8 x i1>)
7049 define void @mscatter_v8f32(<8 x float> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
7050 ; RV32-LABEL: mscatter_v8f32:
7052 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
7053 ; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t
7056 ; RV64-LABEL: mscatter_v8f32:
7058 ; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma
7059 ; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t
7062 ; RV64ZVE32F-LABEL: mscatter_v8f32:
7063 ; RV64ZVE32F: # %bb.0:
7064 ; RV64ZVE32F-NEXT: ld a1, 56(a0)
7065 ; RV64ZVE32F-NEXT: ld a2, 48(a0)
7066 ; RV64ZVE32F-NEXT: ld a4, 40(a0)
7067 ; RV64ZVE32F-NEXT: ld a5, 32(a0)
7068 ; RV64ZVE32F-NEXT: ld a6, 24(a0)
7069 ; RV64ZVE32F-NEXT: ld a7, 16(a0)
7070 ; RV64ZVE32F-NEXT: ld t0, 8(a0)
7071 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
7072 ; RV64ZVE32F-NEXT: vmv.x.s a3, v0
7073 ; RV64ZVE32F-NEXT: andi t1, a3, 1
7074 ; RV64ZVE32F-NEXT: bnez t1, .LBB67_9
7075 ; RV64ZVE32F-NEXT: # %bb.1: # %else
7076 ; RV64ZVE32F-NEXT: andi a0, a3, 2
7077 ; RV64ZVE32F-NEXT: bnez a0, .LBB67_10
7078 ; RV64ZVE32F-NEXT: .LBB67_2: # %else2
7079 ; RV64ZVE32F-NEXT: andi a0, a3, 4
7080 ; RV64ZVE32F-NEXT: bnez a0, .LBB67_11
7081 ; RV64ZVE32F-NEXT: .LBB67_3: # %else4
7082 ; RV64ZVE32F-NEXT: andi a0, a3, 8
7083 ; RV64ZVE32F-NEXT: bnez a0, .LBB67_12
7084 ; RV64ZVE32F-NEXT: .LBB67_4: # %else6
7085 ; RV64ZVE32F-NEXT: andi a0, a3, 16
7086 ; RV64ZVE32F-NEXT: bnez a0, .LBB67_13
7087 ; RV64ZVE32F-NEXT: .LBB67_5: # %else8
7088 ; RV64ZVE32F-NEXT: andi a0, a3, 32
7089 ; RV64ZVE32F-NEXT: bnez a0, .LBB67_14
7090 ; RV64ZVE32F-NEXT: .LBB67_6: # %else10
7091 ; RV64ZVE32F-NEXT: andi a0, a3, 64
7092 ; RV64ZVE32F-NEXT: bnez a0, .LBB67_15
7093 ; RV64ZVE32F-NEXT: .LBB67_7: # %else12
7094 ; RV64ZVE32F-NEXT: andi a0, a3, -128
7095 ; RV64ZVE32F-NEXT: bnez a0, .LBB67_16
7096 ; RV64ZVE32F-NEXT: .LBB67_8: # %else14
7097 ; RV64ZVE32F-NEXT: ret
7098 ; RV64ZVE32F-NEXT: .LBB67_9: # %cond.store
7099 ; RV64ZVE32F-NEXT: ld a0, 0(a0)
7100 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7101 ; RV64ZVE32F-NEXT: vse32.v v8, (a0)
7102 ; RV64ZVE32F-NEXT: andi a0, a3, 2
7103 ; RV64ZVE32F-NEXT: beqz a0, .LBB67_2
7104 ; RV64ZVE32F-NEXT: .LBB67_10: # %cond.store1
7105 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7106 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
7107 ; RV64ZVE32F-NEXT: vse32.v v10, (t0)
7108 ; RV64ZVE32F-NEXT: andi a0, a3, 4
7109 ; RV64ZVE32F-NEXT: beqz a0, .LBB67_3
7110 ; RV64ZVE32F-NEXT: .LBB67_11: # %cond.store3
7111 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7112 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 2
7113 ; RV64ZVE32F-NEXT: vse32.v v10, (a7)
7114 ; RV64ZVE32F-NEXT: andi a0, a3, 8
7115 ; RV64ZVE32F-NEXT: beqz a0, .LBB67_4
7116 ; RV64ZVE32F-NEXT: .LBB67_12: # %cond.store5
7117 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7118 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
7119 ; RV64ZVE32F-NEXT: vse32.v v10, (a6)
7120 ; RV64ZVE32F-NEXT: andi a0, a3, 16
7121 ; RV64ZVE32F-NEXT: beqz a0, .LBB67_5
7122 ; RV64ZVE32F-NEXT: .LBB67_13: # %cond.store7
7123 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
7124 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
7125 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7126 ; RV64ZVE32F-NEXT: vse32.v v10, (a5)
7127 ; RV64ZVE32F-NEXT: andi a0, a3, 32
7128 ; RV64ZVE32F-NEXT: beqz a0, .LBB67_6
7129 ; RV64ZVE32F-NEXT: .LBB67_14: # %cond.store9
7130 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
7131 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 5
7132 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7133 ; RV64ZVE32F-NEXT: vse32.v v10, (a4)
7134 ; RV64ZVE32F-NEXT: andi a0, a3, 64
7135 ; RV64ZVE32F-NEXT: beqz a0, .LBB67_7
7136 ; RV64ZVE32F-NEXT: .LBB67_15: # %cond.store11
7137 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
7138 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 6
7139 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7140 ; RV64ZVE32F-NEXT: vse32.v v10, (a2)
7141 ; RV64ZVE32F-NEXT: andi a0, a3, -128
7142 ; RV64ZVE32F-NEXT: beqz a0, .LBB67_8
7143 ; RV64ZVE32F-NEXT: .LBB67_16: # %cond.store13
7144 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
7145 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
7146 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7147 ; RV64ZVE32F-NEXT: vse32.v v8, (a1)
7148 ; RV64ZVE32F-NEXT: ret
7149 call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> %val, <8 x ptr> %ptrs, i32 4, <8 x i1> %m)
7153 define void @mscatter_baseidx_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) {
7154 ; RV32-LABEL: mscatter_baseidx_v8i8_v8f32:
7156 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
7157 ; RV32-NEXT: vsext.vf4 v12, v10
7158 ; RV32-NEXT: vsll.vi v10, v12, 2
7159 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
7162 ; RV64-LABEL: mscatter_baseidx_v8i8_v8f32:
7164 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
7165 ; RV64-NEXT: vsext.vf8 v12, v10
7166 ; RV64-NEXT: vsll.vi v12, v12, 2
7167 ; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
7168 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
7171 ; RV64ZVE32F-LABEL: mscatter_baseidx_v8i8_v8f32:
7172 ; RV64ZVE32F: # %bb.0:
7173 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
7174 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
7175 ; RV64ZVE32F-NEXT: andi a2, a1, 1
7176 ; RV64ZVE32F-NEXT: beqz a2, .LBB68_2
7177 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
7178 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
7179 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7180 ; RV64ZVE32F-NEXT: add a2, a0, a2
7181 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7182 ; RV64ZVE32F-NEXT: vse32.v v8, (a2)
7183 ; RV64ZVE32F-NEXT: .LBB68_2: # %else
7184 ; RV64ZVE32F-NEXT: andi a2, a1, 2
7185 ; RV64ZVE32F-NEXT: beqz a2, .LBB68_4
7186 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
7187 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
7188 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1
7189 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
7190 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7191 ; RV64ZVE32F-NEXT: add a2, a0, a2
7192 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
7193 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 1
7194 ; RV64ZVE32F-NEXT: vse32.v v11, (a2)
7195 ; RV64ZVE32F-NEXT: .LBB68_4: # %else2
7196 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
7197 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4
7198 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
7199 ; RV64ZVE32F-NEXT: andi a2, a1, 4
7200 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
7201 ; RV64ZVE32F-NEXT: bnez a2, .LBB68_12
7202 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
7203 ; RV64ZVE32F-NEXT: andi a2, a1, 8
7204 ; RV64ZVE32F-NEXT: bnez a2, .LBB68_13
7205 ; RV64ZVE32F-NEXT: .LBB68_6: # %else6
7206 ; RV64ZVE32F-NEXT: andi a2, a1, 16
7207 ; RV64ZVE32F-NEXT: bnez a2, .LBB68_14
7208 ; RV64ZVE32F-NEXT: .LBB68_7: # %else8
7209 ; RV64ZVE32F-NEXT: andi a2, a1, 32
7210 ; RV64ZVE32F-NEXT: beqz a2, .LBB68_9
7211 ; RV64ZVE32F-NEXT: .LBB68_8: # %cond.store9
7212 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
7213 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 1
7214 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
7215 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7216 ; RV64ZVE32F-NEXT: add a2, a0, a2
7217 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
7218 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5
7219 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7220 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
7221 ; RV64ZVE32F-NEXT: .LBB68_9: # %else10
7222 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
7223 ; RV64ZVE32F-NEXT: andi a2, a1, 64
7224 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2
7225 ; RV64ZVE32F-NEXT: bnez a2, .LBB68_15
7226 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
7227 ; RV64ZVE32F-NEXT: andi a1, a1, -128
7228 ; RV64ZVE32F-NEXT: bnez a1, .LBB68_16
7229 ; RV64ZVE32F-NEXT: .LBB68_11: # %else14
7230 ; RV64ZVE32F-NEXT: ret
7231 ; RV64ZVE32F-NEXT: .LBB68_12: # %cond.store3
7232 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
7233 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7234 ; RV64ZVE32F-NEXT: add a2, a0, a2
7235 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7236 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2
7237 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
7238 ; RV64ZVE32F-NEXT: andi a2, a1, 8
7239 ; RV64ZVE32F-NEXT: beqz a2, .LBB68_6
7240 ; RV64ZVE32F-NEXT: .LBB68_13: # %cond.store5
7241 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
7242 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
7243 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
7244 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7245 ; RV64ZVE32F-NEXT: add a2, a0, a2
7246 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
7247 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
7248 ; RV64ZVE32F-NEXT: vse32.v v10, (a2)
7249 ; RV64ZVE32F-NEXT: andi a2, a1, 16
7250 ; RV64ZVE32F-NEXT: beqz a2, .LBB68_7
7251 ; RV64ZVE32F-NEXT: .LBB68_14: # %cond.store7
7252 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
7253 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
7254 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7255 ; RV64ZVE32F-NEXT: add a2, a0, a2
7256 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
7257 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
7258 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7259 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
7260 ; RV64ZVE32F-NEXT: andi a2, a1, 32
7261 ; RV64ZVE32F-NEXT: bnez a2, .LBB68_8
7262 ; RV64ZVE32F-NEXT: j .LBB68_9
7263 ; RV64ZVE32F-NEXT: .LBB68_15: # %cond.store11
7264 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
7265 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7266 ; RV64ZVE32F-NEXT: add a2, a0, a2
7267 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
7268 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6
7269 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7270 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
7271 ; RV64ZVE32F-NEXT: andi a1, a1, -128
7272 ; RV64ZVE32F-NEXT: beqz a1, .LBB68_11
7273 ; RV64ZVE32F-NEXT: .LBB68_16: # %cond.store13
7274 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
7275 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
7276 ; RV64ZVE32F-NEXT: vmv.x.s a1, v10
7277 ; RV64ZVE32F-NEXT: slli a1, a1, 2
7278 ; RV64ZVE32F-NEXT: add a0, a0, a1
7279 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
7280 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
7281 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7282 ; RV64ZVE32F-NEXT: vse32.v v8, (a0)
7283 ; RV64ZVE32F-NEXT: ret
7284 %ptrs = getelementptr inbounds float, ptr %base, <8 x i8> %idxs
7285 call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> %val, <8 x ptr> %ptrs, i32 4, <8 x i1> %m)
7289 define void @mscatter_baseidx_sext_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) {
7290 ; RV32-LABEL: mscatter_baseidx_sext_v8i8_v8f32:
7292 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
7293 ; RV32-NEXT: vsext.vf4 v12, v10
7294 ; RV32-NEXT: vsll.vi v10, v12, 2
7295 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
7298 ; RV64-LABEL: mscatter_baseidx_sext_v8i8_v8f32:
7300 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
7301 ; RV64-NEXT: vsext.vf8 v12, v10
7302 ; RV64-NEXT: vsll.vi v12, v12, 2
7303 ; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
7304 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
7307 ; RV64ZVE32F-LABEL: mscatter_baseidx_sext_v8i8_v8f32:
7308 ; RV64ZVE32F: # %bb.0:
7309 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
7310 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
7311 ; RV64ZVE32F-NEXT: andi a2, a1, 1
7312 ; RV64ZVE32F-NEXT: beqz a2, .LBB69_2
7313 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
7314 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
7315 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7316 ; RV64ZVE32F-NEXT: add a2, a0, a2
7317 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7318 ; RV64ZVE32F-NEXT: vse32.v v8, (a2)
7319 ; RV64ZVE32F-NEXT: .LBB69_2: # %else
7320 ; RV64ZVE32F-NEXT: andi a2, a1, 2
7321 ; RV64ZVE32F-NEXT: beqz a2, .LBB69_4
7322 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
7323 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
7324 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1
7325 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
7326 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7327 ; RV64ZVE32F-NEXT: add a2, a0, a2
7328 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
7329 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 1
7330 ; RV64ZVE32F-NEXT: vse32.v v11, (a2)
7331 ; RV64ZVE32F-NEXT: .LBB69_4: # %else2
7332 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
7333 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4
7334 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
7335 ; RV64ZVE32F-NEXT: andi a2, a1, 4
7336 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
7337 ; RV64ZVE32F-NEXT: bnez a2, .LBB69_12
7338 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
7339 ; RV64ZVE32F-NEXT: andi a2, a1, 8
7340 ; RV64ZVE32F-NEXT: bnez a2, .LBB69_13
7341 ; RV64ZVE32F-NEXT: .LBB69_6: # %else6
7342 ; RV64ZVE32F-NEXT: andi a2, a1, 16
7343 ; RV64ZVE32F-NEXT: bnez a2, .LBB69_14
7344 ; RV64ZVE32F-NEXT: .LBB69_7: # %else8
7345 ; RV64ZVE32F-NEXT: andi a2, a1, 32
7346 ; RV64ZVE32F-NEXT: beqz a2, .LBB69_9
7347 ; RV64ZVE32F-NEXT: .LBB69_8: # %cond.store9
7348 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
7349 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 1
7350 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
7351 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7352 ; RV64ZVE32F-NEXT: add a2, a0, a2
7353 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
7354 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5
7355 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7356 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
7357 ; RV64ZVE32F-NEXT: .LBB69_9: # %else10
7358 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
7359 ; RV64ZVE32F-NEXT: andi a2, a1, 64
7360 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2
7361 ; RV64ZVE32F-NEXT: bnez a2, .LBB69_15
7362 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
7363 ; RV64ZVE32F-NEXT: andi a1, a1, -128
7364 ; RV64ZVE32F-NEXT: bnez a1, .LBB69_16
7365 ; RV64ZVE32F-NEXT: .LBB69_11: # %else14
7366 ; RV64ZVE32F-NEXT: ret
7367 ; RV64ZVE32F-NEXT: .LBB69_12: # %cond.store3
7368 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
7369 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7370 ; RV64ZVE32F-NEXT: add a2, a0, a2
7371 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7372 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2
7373 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
7374 ; RV64ZVE32F-NEXT: andi a2, a1, 8
7375 ; RV64ZVE32F-NEXT: beqz a2, .LBB69_6
7376 ; RV64ZVE32F-NEXT: .LBB69_13: # %cond.store5
7377 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
7378 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
7379 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
7380 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7381 ; RV64ZVE32F-NEXT: add a2, a0, a2
7382 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
7383 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
7384 ; RV64ZVE32F-NEXT: vse32.v v10, (a2)
7385 ; RV64ZVE32F-NEXT: andi a2, a1, 16
7386 ; RV64ZVE32F-NEXT: beqz a2, .LBB69_7
7387 ; RV64ZVE32F-NEXT: .LBB69_14: # %cond.store7
7388 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
7389 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
7390 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7391 ; RV64ZVE32F-NEXT: add a2, a0, a2
7392 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
7393 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
7394 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7395 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
7396 ; RV64ZVE32F-NEXT: andi a2, a1, 32
7397 ; RV64ZVE32F-NEXT: bnez a2, .LBB69_8
7398 ; RV64ZVE32F-NEXT: j .LBB69_9
7399 ; RV64ZVE32F-NEXT: .LBB69_15: # %cond.store11
7400 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
7401 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7402 ; RV64ZVE32F-NEXT: add a2, a0, a2
7403 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
7404 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6
7405 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7406 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
7407 ; RV64ZVE32F-NEXT: andi a1, a1, -128
7408 ; RV64ZVE32F-NEXT: beqz a1, .LBB69_11
7409 ; RV64ZVE32F-NEXT: .LBB69_16: # %cond.store13
7410 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
7411 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
7412 ; RV64ZVE32F-NEXT: vmv.x.s a1, v10
7413 ; RV64ZVE32F-NEXT: slli a1, a1, 2
7414 ; RV64ZVE32F-NEXT: add a0, a0, a1
7415 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
7416 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
7417 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7418 ; RV64ZVE32F-NEXT: vse32.v v8, (a0)
7419 ; RV64ZVE32F-NEXT: ret
7420 %eidxs = sext <8 x i8> %idxs to <8 x i32>
7421 %ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %eidxs
7422 call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> %val, <8 x ptr> %ptrs, i32 4, <8 x i1> %m)
7426 define void @mscatter_baseidx_zext_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) {
7427 ; RV32-LABEL: mscatter_baseidx_zext_v8i8_v8f32:
7429 ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
7430 ; RV32-NEXT: vzext.vf2 v11, v10
7431 ; RV32-NEXT: vsll.vi v10, v11, 2
7432 ; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
7433 ; RV32-NEXT: vsoxei16.v v8, (a0), v10, v0.t
7436 ; RV64-LABEL: mscatter_baseidx_zext_v8i8_v8f32:
7438 ; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
7439 ; RV64-NEXT: vzext.vf2 v11, v10
7440 ; RV64-NEXT: vsll.vi v10, v11, 2
7441 ; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
7442 ; RV64-NEXT: vsoxei16.v v8, (a0), v10, v0.t
7445 ; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i8_v8f32:
7446 ; RV64ZVE32F: # %bb.0:
7447 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
7448 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
7449 ; RV64ZVE32F-NEXT: andi a2, a1, 1
7450 ; RV64ZVE32F-NEXT: beqz a2, .LBB70_2
7451 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
7452 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
7453 ; RV64ZVE32F-NEXT: andi a2, a2, 255
7454 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7455 ; RV64ZVE32F-NEXT: add a2, a0, a2
7456 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7457 ; RV64ZVE32F-NEXT: vse32.v v8, (a2)
7458 ; RV64ZVE32F-NEXT: .LBB70_2: # %else
7459 ; RV64ZVE32F-NEXT: andi a2, a1, 2
7460 ; RV64ZVE32F-NEXT: beqz a2, .LBB70_4
7461 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
7462 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
7463 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1
7464 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
7465 ; RV64ZVE32F-NEXT: andi a2, a2, 255
7466 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7467 ; RV64ZVE32F-NEXT: add a2, a0, a2
7468 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
7469 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 1
7470 ; RV64ZVE32F-NEXT: vse32.v v11, (a2)
7471 ; RV64ZVE32F-NEXT: .LBB70_4: # %else2
7472 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
7473 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4
7474 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
7475 ; RV64ZVE32F-NEXT: andi a2, a1, 4
7476 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
7477 ; RV64ZVE32F-NEXT: bnez a2, .LBB70_12
7478 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
7479 ; RV64ZVE32F-NEXT: andi a2, a1, 8
7480 ; RV64ZVE32F-NEXT: bnez a2, .LBB70_13
7481 ; RV64ZVE32F-NEXT: .LBB70_6: # %else6
7482 ; RV64ZVE32F-NEXT: andi a2, a1, 16
7483 ; RV64ZVE32F-NEXT: bnez a2, .LBB70_14
7484 ; RV64ZVE32F-NEXT: .LBB70_7: # %else8
7485 ; RV64ZVE32F-NEXT: andi a2, a1, 32
7486 ; RV64ZVE32F-NEXT: beqz a2, .LBB70_9
7487 ; RV64ZVE32F-NEXT: .LBB70_8: # %cond.store9
7488 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
7489 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 1
7490 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
7491 ; RV64ZVE32F-NEXT: andi a2, a2, 255
7492 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7493 ; RV64ZVE32F-NEXT: add a2, a0, a2
7494 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
7495 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5
7496 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7497 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
7498 ; RV64ZVE32F-NEXT: .LBB70_9: # %else10
7499 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
7500 ; RV64ZVE32F-NEXT: andi a2, a1, 64
7501 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2
7502 ; RV64ZVE32F-NEXT: bnez a2, .LBB70_15
7503 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
7504 ; RV64ZVE32F-NEXT: andi a1, a1, -128
7505 ; RV64ZVE32F-NEXT: bnez a1, .LBB70_16
7506 ; RV64ZVE32F-NEXT: .LBB70_11: # %else14
7507 ; RV64ZVE32F-NEXT: ret
7508 ; RV64ZVE32F-NEXT: .LBB70_12: # %cond.store3
7509 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
7510 ; RV64ZVE32F-NEXT: andi a2, a2, 255
7511 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7512 ; RV64ZVE32F-NEXT: add a2, a0, a2
7513 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7514 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2
7515 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
7516 ; RV64ZVE32F-NEXT: andi a2, a1, 8
7517 ; RV64ZVE32F-NEXT: beqz a2, .LBB70_6
7518 ; RV64ZVE32F-NEXT: .LBB70_13: # %cond.store5
7519 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
7520 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
7521 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
7522 ; RV64ZVE32F-NEXT: andi a2, a2, 255
7523 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7524 ; RV64ZVE32F-NEXT: add a2, a0, a2
7525 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
7526 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
7527 ; RV64ZVE32F-NEXT: vse32.v v10, (a2)
7528 ; RV64ZVE32F-NEXT: andi a2, a1, 16
7529 ; RV64ZVE32F-NEXT: beqz a2, .LBB70_7
7530 ; RV64ZVE32F-NEXT: .LBB70_14: # %cond.store7
7531 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
7532 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
7533 ; RV64ZVE32F-NEXT: andi a2, a2, 255
7534 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7535 ; RV64ZVE32F-NEXT: add a2, a0, a2
7536 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
7537 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
7538 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7539 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
7540 ; RV64ZVE32F-NEXT: andi a2, a1, 32
7541 ; RV64ZVE32F-NEXT: bnez a2, .LBB70_8
7542 ; RV64ZVE32F-NEXT: j .LBB70_9
7543 ; RV64ZVE32F-NEXT: .LBB70_15: # %cond.store11
7544 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
7545 ; RV64ZVE32F-NEXT: andi a2, a2, 255
7546 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7547 ; RV64ZVE32F-NEXT: add a2, a0, a2
7548 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
7549 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6
7550 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7551 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
7552 ; RV64ZVE32F-NEXT: andi a1, a1, -128
7553 ; RV64ZVE32F-NEXT: beqz a1, .LBB70_11
7554 ; RV64ZVE32F-NEXT: .LBB70_16: # %cond.store13
7555 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
7556 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
7557 ; RV64ZVE32F-NEXT: vmv.x.s a1, v10
7558 ; RV64ZVE32F-NEXT: andi a1, a1, 255
7559 ; RV64ZVE32F-NEXT: slli a1, a1, 2
7560 ; RV64ZVE32F-NEXT: add a0, a0, a1
7561 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
7562 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
7563 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7564 ; RV64ZVE32F-NEXT: vse32.v v8, (a0)
7565 ; RV64ZVE32F-NEXT: ret
7566 %eidxs = zext <8 x i8> %idxs to <8 x i32>
7567 %ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %eidxs
7568 call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> %val, <8 x ptr> %ptrs, i32 4, <8 x i1> %m)
7572 define void @mscatter_baseidx_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m) {
7573 ; RV32-LABEL: mscatter_baseidx_v8i16_v8f32:
7575 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
7576 ; RV32-NEXT: vsext.vf2 v12, v10
7577 ; RV32-NEXT: vsll.vi v10, v12, 2
7578 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
7581 ; RV64-LABEL: mscatter_baseidx_v8i16_v8f32:
7583 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
7584 ; RV64-NEXT: vsext.vf4 v12, v10
7585 ; RV64-NEXT: vsll.vi v12, v12, 2
7586 ; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
7587 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
7590 ; RV64ZVE32F-LABEL: mscatter_baseidx_v8i16_v8f32:
7591 ; RV64ZVE32F: # %bb.0:
7592 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
7593 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
7594 ; RV64ZVE32F-NEXT: andi a2, a1, 1
7595 ; RV64ZVE32F-NEXT: beqz a2, .LBB71_2
7596 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
7597 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
7598 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
7599 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7600 ; RV64ZVE32F-NEXT: add a2, a0, a2
7601 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7602 ; RV64ZVE32F-NEXT: vse32.v v8, (a2)
7603 ; RV64ZVE32F-NEXT: .LBB71_2: # %else
7604 ; RV64ZVE32F-NEXT: andi a2, a1, 2
7605 ; RV64ZVE32F-NEXT: beqz a2, .LBB71_4
7606 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
7607 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
7608 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1
7609 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
7610 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7611 ; RV64ZVE32F-NEXT: add a2, a0, a2
7612 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
7613 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 1
7614 ; RV64ZVE32F-NEXT: vse32.v v11, (a2)
7615 ; RV64ZVE32F-NEXT: .LBB71_4: # %else2
7616 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
7617 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4
7618 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
7619 ; RV64ZVE32F-NEXT: andi a2, a1, 4
7620 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
7621 ; RV64ZVE32F-NEXT: bnez a2, .LBB71_12
7622 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
7623 ; RV64ZVE32F-NEXT: andi a2, a1, 8
7624 ; RV64ZVE32F-NEXT: bnez a2, .LBB71_13
7625 ; RV64ZVE32F-NEXT: .LBB71_6: # %else6
7626 ; RV64ZVE32F-NEXT: andi a2, a1, 16
7627 ; RV64ZVE32F-NEXT: bnez a2, .LBB71_14
7628 ; RV64ZVE32F-NEXT: .LBB71_7: # %else8
7629 ; RV64ZVE32F-NEXT: andi a2, a1, 32
7630 ; RV64ZVE32F-NEXT: beqz a2, .LBB71_9
7631 ; RV64ZVE32F-NEXT: .LBB71_8: # %cond.store9
7632 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
7633 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 1
7634 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
7635 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7636 ; RV64ZVE32F-NEXT: add a2, a0, a2
7637 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
7638 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5
7639 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7640 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
7641 ; RV64ZVE32F-NEXT: .LBB71_9: # %else10
7642 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
7643 ; RV64ZVE32F-NEXT: andi a2, a1, 64
7644 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2
7645 ; RV64ZVE32F-NEXT: bnez a2, .LBB71_15
7646 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
7647 ; RV64ZVE32F-NEXT: andi a1, a1, -128
7648 ; RV64ZVE32F-NEXT: bnez a1, .LBB71_16
7649 ; RV64ZVE32F-NEXT: .LBB71_11: # %else14
7650 ; RV64ZVE32F-NEXT: ret
7651 ; RV64ZVE32F-NEXT: .LBB71_12: # %cond.store3
7652 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
7653 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7654 ; RV64ZVE32F-NEXT: add a2, a0, a2
7655 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7656 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2
7657 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
7658 ; RV64ZVE32F-NEXT: andi a2, a1, 8
7659 ; RV64ZVE32F-NEXT: beqz a2, .LBB71_6
7660 ; RV64ZVE32F-NEXT: .LBB71_13: # %cond.store5
7661 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
7662 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
7663 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
7664 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7665 ; RV64ZVE32F-NEXT: add a2, a0, a2
7666 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
7667 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
7668 ; RV64ZVE32F-NEXT: vse32.v v10, (a2)
7669 ; RV64ZVE32F-NEXT: andi a2, a1, 16
7670 ; RV64ZVE32F-NEXT: beqz a2, .LBB71_7
7671 ; RV64ZVE32F-NEXT: .LBB71_14: # %cond.store7
7672 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
7673 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
7674 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7675 ; RV64ZVE32F-NEXT: add a2, a0, a2
7676 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
7677 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
7678 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7679 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
7680 ; RV64ZVE32F-NEXT: andi a2, a1, 32
7681 ; RV64ZVE32F-NEXT: bnez a2, .LBB71_8
7682 ; RV64ZVE32F-NEXT: j .LBB71_9
7683 ; RV64ZVE32F-NEXT: .LBB71_15: # %cond.store11
7684 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
7685 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7686 ; RV64ZVE32F-NEXT: add a2, a0, a2
7687 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
7688 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6
7689 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7690 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
7691 ; RV64ZVE32F-NEXT: andi a1, a1, -128
7692 ; RV64ZVE32F-NEXT: beqz a1, .LBB71_11
7693 ; RV64ZVE32F-NEXT: .LBB71_16: # %cond.store13
7694 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
7695 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
7696 ; RV64ZVE32F-NEXT: vmv.x.s a1, v10
7697 ; RV64ZVE32F-NEXT: slli a1, a1, 2
7698 ; RV64ZVE32F-NEXT: add a0, a0, a1
7699 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
7700 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
7701 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7702 ; RV64ZVE32F-NEXT: vse32.v v8, (a0)
7703 ; RV64ZVE32F-NEXT: ret
7704 %ptrs = getelementptr inbounds float, ptr %base, <8 x i16> %idxs
7705 call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> %val, <8 x ptr> %ptrs, i32 4, <8 x i1> %m)
7709 define void @mscatter_baseidx_sext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m) {
7710 ; RV32-LABEL: mscatter_baseidx_sext_v8i16_v8f32:
7712 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
7713 ; RV32-NEXT: vsext.vf2 v12, v10
7714 ; RV32-NEXT: vsll.vi v10, v12, 2
7715 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
7718 ; RV64-LABEL: mscatter_baseidx_sext_v8i16_v8f32:
7720 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
7721 ; RV64-NEXT: vsext.vf4 v12, v10
7722 ; RV64-NEXT: vsll.vi v12, v12, 2
7723 ; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
7724 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
7727 ; RV64ZVE32F-LABEL: mscatter_baseidx_sext_v8i16_v8f32:
7728 ; RV64ZVE32F: # %bb.0:
7729 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
7730 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
7731 ; RV64ZVE32F-NEXT: andi a2, a1, 1
7732 ; RV64ZVE32F-NEXT: beqz a2, .LBB72_2
7733 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
7734 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
7735 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
7736 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7737 ; RV64ZVE32F-NEXT: add a2, a0, a2
7738 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7739 ; RV64ZVE32F-NEXT: vse32.v v8, (a2)
7740 ; RV64ZVE32F-NEXT: .LBB72_2: # %else
7741 ; RV64ZVE32F-NEXT: andi a2, a1, 2
7742 ; RV64ZVE32F-NEXT: beqz a2, .LBB72_4
7743 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
7744 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
7745 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1
7746 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
7747 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7748 ; RV64ZVE32F-NEXT: add a2, a0, a2
7749 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
7750 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 1
7751 ; RV64ZVE32F-NEXT: vse32.v v11, (a2)
7752 ; RV64ZVE32F-NEXT: .LBB72_4: # %else2
7753 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
7754 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4
7755 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
7756 ; RV64ZVE32F-NEXT: andi a2, a1, 4
7757 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
7758 ; RV64ZVE32F-NEXT: bnez a2, .LBB72_12
7759 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
7760 ; RV64ZVE32F-NEXT: andi a2, a1, 8
7761 ; RV64ZVE32F-NEXT: bnez a2, .LBB72_13
7762 ; RV64ZVE32F-NEXT: .LBB72_6: # %else6
7763 ; RV64ZVE32F-NEXT: andi a2, a1, 16
7764 ; RV64ZVE32F-NEXT: bnez a2, .LBB72_14
7765 ; RV64ZVE32F-NEXT: .LBB72_7: # %else8
7766 ; RV64ZVE32F-NEXT: andi a2, a1, 32
7767 ; RV64ZVE32F-NEXT: beqz a2, .LBB72_9
7768 ; RV64ZVE32F-NEXT: .LBB72_8: # %cond.store9
7769 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
7770 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 1
7771 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
7772 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7773 ; RV64ZVE32F-NEXT: add a2, a0, a2
7774 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
7775 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5
7776 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7777 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
7778 ; RV64ZVE32F-NEXT: .LBB72_9: # %else10
7779 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
7780 ; RV64ZVE32F-NEXT: andi a2, a1, 64
7781 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2
7782 ; RV64ZVE32F-NEXT: bnez a2, .LBB72_15
7783 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
7784 ; RV64ZVE32F-NEXT: andi a1, a1, -128
7785 ; RV64ZVE32F-NEXT: bnez a1, .LBB72_16
7786 ; RV64ZVE32F-NEXT: .LBB72_11: # %else14
7787 ; RV64ZVE32F-NEXT: ret
7788 ; RV64ZVE32F-NEXT: .LBB72_12: # %cond.store3
7789 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
7790 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7791 ; RV64ZVE32F-NEXT: add a2, a0, a2
7792 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7793 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2
7794 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
7795 ; RV64ZVE32F-NEXT: andi a2, a1, 8
7796 ; RV64ZVE32F-NEXT: beqz a2, .LBB72_6
7797 ; RV64ZVE32F-NEXT: .LBB72_13: # %cond.store5
7798 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
7799 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
7800 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
7801 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7802 ; RV64ZVE32F-NEXT: add a2, a0, a2
7803 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
7804 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
7805 ; RV64ZVE32F-NEXT: vse32.v v10, (a2)
7806 ; RV64ZVE32F-NEXT: andi a2, a1, 16
7807 ; RV64ZVE32F-NEXT: beqz a2, .LBB72_7
7808 ; RV64ZVE32F-NEXT: .LBB72_14: # %cond.store7
7809 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
7810 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
7811 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7812 ; RV64ZVE32F-NEXT: add a2, a0, a2
7813 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
7814 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
7815 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7816 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
7817 ; RV64ZVE32F-NEXT: andi a2, a1, 32
7818 ; RV64ZVE32F-NEXT: bnez a2, .LBB72_8
7819 ; RV64ZVE32F-NEXT: j .LBB72_9
7820 ; RV64ZVE32F-NEXT: .LBB72_15: # %cond.store11
7821 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
7822 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7823 ; RV64ZVE32F-NEXT: add a2, a0, a2
7824 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
7825 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6
7826 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7827 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
7828 ; RV64ZVE32F-NEXT: andi a1, a1, -128
7829 ; RV64ZVE32F-NEXT: beqz a1, .LBB72_11
7830 ; RV64ZVE32F-NEXT: .LBB72_16: # %cond.store13
7831 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
7832 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
7833 ; RV64ZVE32F-NEXT: vmv.x.s a1, v10
7834 ; RV64ZVE32F-NEXT: slli a1, a1, 2
7835 ; RV64ZVE32F-NEXT: add a0, a0, a1
7836 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
7837 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
7838 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7839 ; RV64ZVE32F-NEXT: vse32.v v8, (a0)
7840 ; RV64ZVE32F-NEXT: ret
7841 %eidxs = sext <8 x i16> %idxs to <8 x i32>
7842 %ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %eidxs
7843 call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> %val, <8 x ptr> %ptrs, i32 4, <8 x i1> %m)
7847 define void @mscatter_baseidx_zext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m) {
7848 ; RV32-LABEL: mscatter_baseidx_zext_v8i16_v8f32:
7850 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
7851 ; RV32-NEXT: vzext.vf2 v12, v10
7852 ; RV32-NEXT: vsll.vi v10, v12, 2
7853 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
7856 ; RV64-LABEL: mscatter_baseidx_zext_v8i16_v8f32:
7858 ; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma
7859 ; RV64-NEXT: vzext.vf2 v12, v10
7860 ; RV64-NEXT: vsll.vi v10, v12, 2
7861 ; RV64-NEXT: vsoxei32.v v8, (a0), v10, v0.t
7864 ; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i16_v8f32:
7865 ; RV64ZVE32F: # %bb.0:
7866 ; RV64ZVE32F-NEXT: lui a1, 16
7867 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
7868 ; RV64ZVE32F-NEXT: vmv.x.s a2, v0
7869 ; RV64ZVE32F-NEXT: andi a3, a2, 1
7870 ; RV64ZVE32F-NEXT: addiw a1, a1, -1
7871 ; RV64ZVE32F-NEXT: beqz a3, .LBB73_2
7872 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
7873 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
7874 ; RV64ZVE32F-NEXT: vmv.x.s a3, v10
7875 ; RV64ZVE32F-NEXT: and a3, a3, a1
7876 ; RV64ZVE32F-NEXT: slli a3, a3, 2
7877 ; RV64ZVE32F-NEXT: add a3, a0, a3
7878 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7879 ; RV64ZVE32F-NEXT: vse32.v v8, (a3)
7880 ; RV64ZVE32F-NEXT: .LBB73_2: # %else
7881 ; RV64ZVE32F-NEXT: andi a3, a2, 2
7882 ; RV64ZVE32F-NEXT: beqz a3, .LBB73_4
7883 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
7884 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
7885 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1
7886 ; RV64ZVE32F-NEXT: vmv.x.s a3, v11
7887 ; RV64ZVE32F-NEXT: and a3, a3, a1
7888 ; RV64ZVE32F-NEXT: slli a3, a3, 2
7889 ; RV64ZVE32F-NEXT: add a3, a0, a3
7890 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
7891 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 1
7892 ; RV64ZVE32F-NEXT: vse32.v v11, (a3)
7893 ; RV64ZVE32F-NEXT: .LBB73_4: # %else2
7894 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
7895 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4
7896 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
7897 ; RV64ZVE32F-NEXT: andi a3, a2, 4
7898 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
7899 ; RV64ZVE32F-NEXT: bnez a3, .LBB73_12
7900 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
7901 ; RV64ZVE32F-NEXT: andi a3, a2, 8
7902 ; RV64ZVE32F-NEXT: bnez a3, .LBB73_13
7903 ; RV64ZVE32F-NEXT: .LBB73_6: # %else6
7904 ; RV64ZVE32F-NEXT: andi a3, a2, 16
7905 ; RV64ZVE32F-NEXT: bnez a3, .LBB73_14
7906 ; RV64ZVE32F-NEXT: .LBB73_7: # %else8
7907 ; RV64ZVE32F-NEXT: andi a3, a2, 32
7908 ; RV64ZVE32F-NEXT: beqz a3, .LBB73_9
7909 ; RV64ZVE32F-NEXT: .LBB73_8: # %cond.store9
7910 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
7911 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 1
7912 ; RV64ZVE32F-NEXT: vmv.x.s a3, v10
7913 ; RV64ZVE32F-NEXT: and a3, a3, a1
7914 ; RV64ZVE32F-NEXT: slli a3, a3, 2
7915 ; RV64ZVE32F-NEXT: add a3, a0, a3
7916 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
7917 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5
7918 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7919 ; RV64ZVE32F-NEXT: vse32.v v12, (a3)
7920 ; RV64ZVE32F-NEXT: .LBB73_9: # %else10
7921 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
7922 ; RV64ZVE32F-NEXT: andi a3, a2, 64
7923 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2
7924 ; RV64ZVE32F-NEXT: bnez a3, .LBB73_15
7925 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
7926 ; RV64ZVE32F-NEXT: andi a2, a2, -128
7927 ; RV64ZVE32F-NEXT: bnez a2, .LBB73_16
7928 ; RV64ZVE32F-NEXT: .LBB73_11: # %else14
7929 ; RV64ZVE32F-NEXT: ret
7930 ; RV64ZVE32F-NEXT: .LBB73_12: # %cond.store3
7931 ; RV64ZVE32F-NEXT: vmv.x.s a3, v10
7932 ; RV64ZVE32F-NEXT: and a3, a3, a1
7933 ; RV64ZVE32F-NEXT: slli a3, a3, 2
7934 ; RV64ZVE32F-NEXT: add a3, a0, a3
7935 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7936 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2
7937 ; RV64ZVE32F-NEXT: vse32.v v12, (a3)
7938 ; RV64ZVE32F-NEXT: andi a3, a2, 8
7939 ; RV64ZVE32F-NEXT: beqz a3, .LBB73_6
7940 ; RV64ZVE32F-NEXT: .LBB73_13: # %cond.store5
7941 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
7942 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
7943 ; RV64ZVE32F-NEXT: vmv.x.s a3, v10
7944 ; RV64ZVE32F-NEXT: and a3, a3, a1
7945 ; RV64ZVE32F-NEXT: slli a3, a3, 2
7946 ; RV64ZVE32F-NEXT: add a3, a0, a3
7947 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
7948 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
7949 ; RV64ZVE32F-NEXT: vse32.v v10, (a3)
7950 ; RV64ZVE32F-NEXT: andi a3, a2, 16
7951 ; RV64ZVE32F-NEXT: beqz a3, .LBB73_7
7952 ; RV64ZVE32F-NEXT: .LBB73_14: # %cond.store7
7953 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
7954 ; RV64ZVE32F-NEXT: vmv.x.s a3, v11
7955 ; RV64ZVE32F-NEXT: and a3, a3, a1
7956 ; RV64ZVE32F-NEXT: slli a3, a3, 2
7957 ; RV64ZVE32F-NEXT: add a3, a0, a3
7958 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
7959 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
7960 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7961 ; RV64ZVE32F-NEXT: vse32.v v12, (a3)
7962 ; RV64ZVE32F-NEXT: andi a3, a2, 32
7963 ; RV64ZVE32F-NEXT: bnez a3, .LBB73_8
7964 ; RV64ZVE32F-NEXT: j .LBB73_9
7965 ; RV64ZVE32F-NEXT: .LBB73_15: # %cond.store11
7966 ; RV64ZVE32F-NEXT: vmv.x.s a3, v10
7967 ; RV64ZVE32F-NEXT: and a3, a3, a1
7968 ; RV64ZVE32F-NEXT: slli a3, a3, 2
7969 ; RV64ZVE32F-NEXT: add a3, a0, a3
7970 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
7971 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6
7972 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7973 ; RV64ZVE32F-NEXT: vse32.v v12, (a3)
7974 ; RV64ZVE32F-NEXT: andi a2, a2, -128
7975 ; RV64ZVE32F-NEXT: beqz a2, .LBB73_11
7976 ; RV64ZVE32F-NEXT: .LBB73_16: # %cond.store13
7977 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
7978 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
7979 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
7980 ; RV64ZVE32F-NEXT: and a1, a2, a1
7981 ; RV64ZVE32F-NEXT: slli a1, a1, 2
7982 ; RV64ZVE32F-NEXT: add a0, a0, a1
7983 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
7984 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
7985 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7986 ; RV64ZVE32F-NEXT: vse32.v v8, (a0)
7987 ; RV64ZVE32F-NEXT: ret
7988 %eidxs = zext <8 x i16> %idxs to <8 x i32>
7989 %ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %eidxs
7990 call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> %val, <8 x ptr> %ptrs, i32 4, <8 x i1> %m)
7994 define void @mscatter_baseidx_v8f32(<8 x float> %val, ptr %base, <8 x i32> %idxs, <8 x i1> %m) {
7995 ; RV32-LABEL: mscatter_baseidx_v8f32:
7997 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
7998 ; RV32-NEXT: vsll.vi v10, v10, 2
7999 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
8002 ; RV64-LABEL: mscatter_baseidx_v8f32:
8004 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
8005 ; RV64-NEXT: vsext.vf2 v12, v10
8006 ; RV64-NEXT: vsll.vi v12, v12, 2
8007 ; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
8008 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
8011 ; RV64ZVE32F-LABEL: mscatter_baseidx_v8f32:
8012 ; RV64ZVE32F: # %bb.0:
8013 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
8014 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
8015 ; RV64ZVE32F-NEXT: andi a2, a1, 1
8016 ; RV64ZVE32F-NEXT: beqz a2, .LBB74_2
8017 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
8018 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
8019 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
8020 ; RV64ZVE32F-NEXT: slli a2, a2, 2
8021 ; RV64ZVE32F-NEXT: add a2, a0, a2
8022 ; RV64ZVE32F-NEXT: vse32.v v8, (a2)
8023 ; RV64ZVE32F-NEXT: .LBB74_2: # %else
8024 ; RV64ZVE32F-NEXT: andi a2, a1, 2
8025 ; RV64ZVE32F-NEXT: beqz a2, .LBB74_4
8026 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
8027 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
8028 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v10, 1
8029 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12
8030 ; RV64ZVE32F-NEXT: slli a2, a2, 2
8031 ; RV64ZVE32F-NEXT: add a2, a0, a2
8032 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 1
8033 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
8034 ; RV64ZVE32F-NEXT: .LBB74_4: # %else2
8035 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
8036 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v10, 4
8037 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
8038 ; RV64ZVE32F-NEXT: andi a2, a1, 4
8039 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
8040 ; RV64ZVE32F-NEXT: bnez a2, .LBB74_12
8041 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
8042 ; RV64ZVE32F-NEXT: andi a2, a1, 8
8043 ; RV64ZVE32F-NEXT: bnez a2, .LBB74_13
8044 ; RV64ZVE32F-NEXT: .LBB74_6: # %else6
8045 ; RV64ZVE32F-NEXT: andi a2, a1, 16
8046 ; RV64ZVE32F-NEXT: bnez a2, .LBB74_14
8047 ; RV64ZVE32F-NEXT: .LBB74_7: # %else8
8048 ; RV64ZVE32F-NEXT: andi a2, a1, 32
8049 ; RV64ZVE32F-NEXT: beqz a2, .LBB74_9
8050 ; RV64ZVE32F-NEXT: .LBB74_8: # %cond.store9
8051 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
8052 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v12, 1
8053 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
8054 ; RV64ZVE32F-NEXT: slli a2, a2, 2
8055 ; RV64ZVE32F-NEXT: add a2, a0, a2
8056 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
8057 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 5
8058 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
8059 ; RV64ZVE32F-NEXT: vse32.v v10, (a2)
8060 ; RV64ZVE32F-NEXT: .LBB74_9: # %else10
8061 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
8062 ; RV64ZVE32F-NEXT: andi a2, a1, 64
8063 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v12, 2
8064 ; RV64ZVE32F-NEXT: bnez a2, .LBB74_15
8065 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
8066 ; RV64ZVE32F-NEXT: andi a1, a1, -128
8067 ; RV64ZVE32F-NEXT: bnez a1, .LBB74_16
8068 ; RV64ZVE32F-NEXT: .LBB74_11: # %else14
8069 ; RV64ZVE32F-NEXT: ret
8070 ; RV64ZVE32F-NEXT: .LBB74_12: # %cond.store3
8071 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
8072 ; RV64ZVE32F-NEXT: slli a2, a2, 2
8073 ; RV64ZVE32F-NEXT: add a2, a0, a2
8074 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2
8075 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
8076 ; RV64ZVE32F-NEXT: vse32.v v11, (a2)
8077 ; RV64ZVE32F-NEXT: andi a2, a1, 8
8078 ; RV64ZVE32F-NEXT: beqz a2, .LBB74_6
8079 ; RV64ZVE32F-NEXT: .LBB74_13: # %cond.store5
8080 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
8081 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
8082 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
8083 ; RV64ZVE32F-NEXT: slli a2, a2, 2
8084 ; RV64ZVE32F-NEXT: add a2, a0, a2
8085 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
8086 ; RV64ZVE32F-NEXT: vse32.v v10, (a2)
8087 ; RV64ZVE32F-NEXT: andi a2, a1, 16
8088 ; RV64ZVE32F-NEXT: beqz a2, .LBB74_7
8089 ; RV64ZVE32F-NEXT: .LBB74_14: # %cond.store7
8090 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
8091 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12
8092 ; RV64ZVE32F-NEXT: slli a2, a2, 2
8093 ; RV64ZVE32F-NEXT: add a2, a0, a2
8094 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
8095 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
8096 ; RV64ZVE32F-NEXT: vse32.v v10, (a2)
8097 ; RV64ZVE32F-NEXT: andi a2, a1, 32
8098 ; RV64ZVE32F-NEXT: bnez a2, .LBB74_8
8099 ; RV64ZVE32F-NEXT: j .LBB74_9
8100 ; RV64ZVE32F-NEXT: .LBB74_15: # %cond.store11
8101 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
8102 ; RV64ZVE32F-NEXT: slli a2, a2, 2
8103 ; RV64ZVE32F-NEXT: add a2, a0, a2
8104 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
8105 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6
8106 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
8107 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
8108 ; RV64ZVE32F-NEXT: andi a1, a1, -128
8109 ; RV64ZVE32F-NEXT: beqz a1, .LBB74_11
8110 ; RV64ZVE32F-NEXT: .LBB74_16: # %cond.store13
8111 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
8112 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
8113 ; RV64ZVE32F-NEXT: vmv.x.s a1, v10
8114 ; RV64ZVE32F-NEXT: slli a1, a1, 2
8115 ; RV64ZVE32F-NEXT: add a0, a0, a1
8116 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
8117 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
8118 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
8119 ; RV64ZVE32F-NEXT: vse32.v v8, (a0)
8120 ; RV64ZVE32F-NEXT: ret
8121 %ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %idxs
8122 call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> %val, <8 x ptr> %ptrs, i32 4, <8 x i1> %m)
8126 declare void @llvm.masked.scatter.v1f64.v1p0(<1 x double>, <1 x ptr>, i32, <1 x i1>)
8128 define void @mscatter_v1f64(<1 x double> %val, <1 x ptr> %ptrs, <1 x i1> %m) {
8129 ; RV32V-LABEL: mscatter_v1f64:
8131 ; RV32V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
8132 ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t
8135 ; RV64-LABEL: mscatter_v1f64:
8137 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
8138 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
8141 ; RV32ZVE32F-LABEL: mscatter_v1f64:
8142 ; RV32ZVE32F: # %bb.0:
8143 ; RV32ZVE32F-NEXT: vsetvli a0, zero, e8, mf4, ta, ma
8144 ; RV32ZVE32F-NEXT: vfirst.m a0, v0
8145 ; RV32ZVE32F-NEXT: bnez a0, .LBB75_2
8146 ; RV32ZVE32F-NEXT: # %bb.1: # %cond.store
8147 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
8148 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
8149 ; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
8150 ; RV32ZVE32F-NEXT: .LBB75_2: # %else
8151 ; RV32ZVE32F-NEXT: ret
8153 ; RV64ZVE32F-LABEL: mscatter_v1f64:
8154 ; RV64ZVE32F: # %bb.0:
8155 ; RV64ZVE32F-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
8156 ; RV64ZVE32F-NEXT: vfirst.m a1, v0
8157 ; RV64ZVE32F-NEXT: bnez a1, .LBB75_2
8158 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
8159 ; RV64ZVE32F-NEXT: fsd fa0, 0(a0)
8160 ; RV64ZVE32F-NEXT: .LBB75_2: # %else
8161 ; RV64ZVE32F-NEXT: ret
8162 call void @llvm.masked.scatter.v1f64.v1p0(<1 x double> %val, <1 x ptr> %ptrs, i32 8, <1 x i1> %m)
8166 declare void @llvm.masked.scatter.v2f64.v2p0(<2 x double>, <2 x ptr>, i32, <2 x i1>)
8168 define void @mscatter_v2f64(<2 x double> %val, <2 x ptr> %ptrs, <2 x i1> %m) {
8169 ; RV32V-LABEL: mscatter_v2f64:
8171 ; RV32V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
8172 ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t
8175 ; RV64-LABEL: mscatter_v2f64:
8177 ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
8178 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
8181 ; RV32ZVE32F-LABEL: mscatter_v2f64:
8182 ; RV32ZVE32F: # %bb.0:
8183 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
8184 ; RV32ZVE32F-NEXT: vmv.x.s a0, v0
8185 ; RV32ZVE32F-NEXT: andi a1, a0, 1
8186 ; RV32ZVE32F-NEXT: bnez a1, .LBB76_3
8187 ; RV32ZVE32F-NEXT: # %bb.1: # %else
8188 ; RV32ZVE32F-NEXT: andi a0, a0, 2
8189 ; RV32ZVE32F-NEXT: bnez a0, .LBB76_4
8190 ; RV32ZVE32F-NEXT: .LBB76_2: # %else2
8191 ; RV32ZVE32F-NEXT: ret
8192 ; RV32ZVE32F-NEXT: .LBB76_3: # %cond.store
8193 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
8194 ; RV32ZVE32F-NEXT: vmv.x.s a1, v8
8195 ; RV32ZVE32F-NEXT: fsd fa0, 0(a1)
8196 ; RV32ZVE32F-NEXT: andi a0, a0, 2
8197 ; RV32ZVE32F-NEXT: beqz a0, .LBB76_2
8198 ; RV32ZVE32F-NEXT: .LBB76_4: # %cond.store1
8199 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
8200 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 1
8201 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
8202 ; RV32ZVE32F-NEXT: fsd fa1, 0(a0)
8203 ; RV32ZVE32F-NEXT: ret
8205 ; RV64ZVE32F-LABEL: mscatter_v2f64:
8206 ; RV64ZVE32F: # %bb.0:
8207 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
8208 ; RV64ZVE32F-NEXT: vmv.x.s a2, v0
8209 ; RV64ZVE32F-NEXT: andi a3, a2, 1
8210 ; RV64ZVE32F-NEXT: bnez a3, .LBB76_3
8211 ; RV64ZVE32F-NEXT: # %bb.1: # %else
8212 ; RV64ZVE32F-NEXT: andi a2, a2, 2
8213 ; RV64ZVE32F-NEXT: bnez a2, .LBB76_4
8214 ; RV64ZVE32F-NEXT: .LBB76_2: # %else2
8215 ; RV64ZVE32F-NEXT: ret
8216 ; RV64ZVE32F-NEXT: .LBB76_3: # %cond.store
8217 ; RV64ZVE32F-NEXT: fsd fa0, 0(a0)
8218 ; RV64ZVE32F-NEXT: andi a2, a2, 2
8219 ; RV64ZVE32F-NEXT: beqz a2, .LBB76_2
8220 ; RV64ZVE32F-NEXT: .LBB76_4: # %cond.store1
8221 ; RV64ZVE32F-NEXT: fsd fa1, 0(a1)
8222 ; RV64ZVE32F-NEXT: ret
8223 call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> %val, <2 x ptr> %ptrs, i32 8, <2 x i1> %m)
8227 declare void @llvm.masked.scatter.v4f64.v4p0(<4 x double>, <4 x ptr>, i32, <4 x i1>)
8229 define void @mscatter_v4f64(<4 x double> %val, <4 x ptr> %ptrs, <4 x i1> %m) {
8230 ; RV32V-LABEL: mscatter_v4f64:
8232 ; RV32V-NEXT: vsetivli zero, 4, e64, m2, ta, ma
8233 ; RV32V-NEXT: vsoxei32.v v8, (zero), v10, v0.t
8236 ; RV64-LABEL: mscatter_v4f64:
8238 ; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
8239 ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
8242 ; RV32ZVE32F-LABEL: mscatter_v4f64:
8243 ; RV32ZVE32F: # %bb.0:
8244 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
8245 ; RV32ZVE32F-NEXT: vmv.x.s a0, v0
8246 ; RV32ZVE32F-NEXT: andi a1, a0, 1
8247 ; RV32ZVE32F-NEXT: bnez a1, .LBB77_5
8248 ; RV32ZVE32F-NEXT: # %bb.1: # %else
8249 ; RV32ZVE32F-NEXT: andi a1, a0, 2
8250 ; RV32ZVE32F-NEXT: bnez a1, .LBB77_6
8251 ; RV32ZVE32F-NEXT: .LBB77_2: # %else2
8252 ; RV32ZVE32F-NEXT: andi a1, a0, 4
8253 ; RV32ZVE32F-NEXT: bnez a1, .LBB77_7
8254 ; RV32ZVE32F-NEXT: .LBB77_3: # %else4
8255 ; RV32ZVE32F-NEXT: andi a0, a0, 8
8256 ; RV32ZVE32F-NEXT: bnez a0, .LBB77_8
8257 ; RV32ZVE32F-NEXT: .LBB77_4: # %else6
8258 ; RV32ZVE32F-NEXT: ret
8259 ; RV32ZVE32F-NEXT: .LBB77_5: # %cond.store
8260 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
8261 ; RV32ZVE32F-NEXT: vmv.x.s a1, v8
8262 ; RV32ZVE32F-NEXT: fsd fa0, 0(a1)
8263 ; RV32ZVE32F-NEXT: andi a1, a0, 2
8264 ; RV32ZVE32F-NEXT: beqz a1, .LBB77_2
8265 ; RV32ZVE32F-NEXT: .LBB77_6: # %cond.store1
8266 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
8267 ; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 1
8268 ; RV32ZVE32F-NEXT: vmv.x.s a1, v9
8269 ; RV32ZVE32F-NEXT: fsd fa1, 0(a1)
8270 ; RV32ZVE32F-NEXT: andi a1, a0, 4
8271 ; RV32ZVE32F-NEXT: beqz a1, .LBB77_3
8272 ; RV32ZVE32F-NEXT: .LBB77_7: # %cond.store3
8273 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
8274 ; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 2
8275 ; RV32ZVE32F-NEXT: vmv.x.s a1, v9
8276 ; RV32ZVE32F-NEXT: fsd fa2, 0(a1)
8277 ; RV32ZVE32F-NEXT: andi a0, a0, 8
8278 ; RV32ZVE32F-NEXT: beqz a0, .LBB77_4
8279 ; RV32ZVE32F-NEXT: .LBB77_8: # %cond.store5
8280 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
8281 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 3
8282 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
8283 ; RV32ZVE32F-NEXT: fsd fa3, 0(a0)
8284 ; RV32ZVE32F-NEXT: ret
8286 ; RV64ZVE32F-LABEL: mscatter_v4f64:
8287 ; RV64ZVE32F: # %bb.0:
8288 ; RV64ZVE32F-NEXT: ld a1, 24(a0)
8289 ; RV64ZVE32F-NEXT: ld a2, 16(a0)
8290 ; RV64ZVE32F-NEXT: ld a4, 8(a0)
8291 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
8292 ; RV64ZVE32F-NEXT: vmv.x.s a3, v0
8293 ; RV64ZVE32F-NEXT: andi a5, a3, 1
8294 ; RV64ZVE32F-NEXT: bnez a5, .LBB77_5
8295 ; RV64ZVE32F-NEXT: # %bb.1: # %else
8296 ; RV64ZVE32F-NEXT: andi a0, a3, 2
8297 ; RV64ZVE32F-NEXT: bnez a0, .LBB77_6
8298 ; RV64ZVE32F-NEXT: .LBB77_2: # %else2
8299 ; RV64ZVE32F-NEXT: andi a0, a3, 4
8300 ; RV64ZVE32F-NEXT: bnez a0, .LBB77_7
8301 ; RV64ZVE32F-NEXT: .LBB77_3: # %else4
8302 ; RV64ZVE32F-NEXT: andi a3, a3, 8
8303 ; RV64ZVE32F-NEXT: bnez a3, .LBB77_8
8304 ; RV64ZVE32F-NEXT: .LBB77_4: # %else6
8305 ; RV64ZVE32F-NEXT: ret
8306 ; RV64ZVE32F-NEXT: .LBB77_5: # %cond.store
8307 ; RV64ZVE32F-NEXT: ld a0, 0(a0)
8308 ; RV64ZVE32F-NEXT: fsd fa0, 0(a0)
8309 ; RV64ZVE32F-NEXT: andi a0, a3, 2
8310 ; RV64ZVE32F-NEXT: beqz a0, .LBB77_2
8311 ; RV64ZVE32F-NEXT: .LBB77_6: # %cond.store1
8312 ; RV64ZVE32F-NEXT: fsd fa1, 0(a4)
8313 ; RV64ZVE32F-NEXT: andi a0, a3, 4
8314 ; RV64ZVE32F-NEXT: beqz a0, .LBB77_3
8315 ; RV64ZVE32F-NEXT: .LBB77_7: # %cond.store3
8316 ; RV64ZVE32F-NEXT: fsd fa2, 0(a2)
8317 ; RV64ZVE32F-NEXT: andi a3, a3, 8
8318 ; RV64ZVE32F-NEXT: beqz a3, .LBB77_4
8319 ; RV64ZVE32F-NEXT: .LBB77_8: # %cond.store5
8320 ; RV64ZVE32F-NEXT: fsd fa3, 0(a1)
8321 ; RV64ZVE32F-NEXT: ret
8322 call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> %val, <4 x ptr> %ptrs, i32 8, <4 x i1> %m)
8326 define void @mscatter_truemask_v4f64(<4 x double> %val, <4 x ptr> %ptrs) {
8327 ; RV32V-LABEL: mscatter_truemask_v4f64:
8329 ; RV32V-NEXT: vsetivli zero, 4, e64, m2, ta, ma
8330 ; RV32V-NEXT: vsoxei32.v v8, (zero), v10
8333 ; RV64-LABEL: mscatter_truemask_v4f64:
8335 ; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
8336 ; RV64-NEXT: vsoxei64.v v8, (zero), v10
8339 ; RV32ZVE32F-LABEL: mscatter_truemask_v4f64:
8340 ; RV32ZVE32F: # %bb.0:
8341 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
8342 ; RV32ZVE32F-NEXT: vmset.m v9
8343 ; RV32ZVE32F-NEXT: vmv.x.s a0, v9
8344 ; RV32ZVE32F-NEXT: beqz zero, .LBB78_5
8345 ; RV32ZVE32F-NEXT: # %bb.1: # %else
8346 ; RV32ZVE32F-NEXT: andi a1, a0, 2
8347 ; RV32ZVE32F-NEXT: bnez a1, .LBB78_6
8348 ; RV32ZVE32F-NEXT: .LBB78_2: # %else2
8349 ; RV32ZVE32F-NEXT: andi a1, a0, 4
8350 ; RV32ZVE32F-NEXT: bnez a1, .LBB78_7
8351 ; RV32ZVE32F-NEXT: .LBB78_3: # %else4
8352 ; RV32ZVE32F-NEXT: andi a0, a0, 8
8353 ; RV32ZVE32F-NEXT: bnez a0, .LBB78_8
8354 ; RV32ZVE32F-NEXT: .LBB78_4: # %else6
8355 ; RV32ZVE32F-NEXT: ret
8356 ; RV32ZVE32F-NEXT: .LBB78_5: # %cond.store
8357 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
8358 ; RV32ZVE32F-NEXT: vmv.x.s a1, v8
8359 ; RV32ZVE32F-NEXT: fsd fa0, 0(a1)
8360 ; RV32ZVE32F-NEXT: andi a1, a0, 2
8361 ; RV32ZVE32F-NEXT: beqz a1, .LBB78_2
8362 ; RV32ZVE32F-NEXT: .LBB78_6: # %cond.store1
8363 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
8364 ; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 1
8365 ; RV32ZVE32F-NEXT: vmv.x.s a1, v9
8366 ; RV32ZVE32F-NEXT: fsd fa1, 0(a1)
8367 ; RV32ZVE32F-NEXT: andi a1, a0, 4
8368 ; RV32ZVE32F-NEXT: beqz a1, .LBB78_3
8369 ; RV32ZVE32F-NEXT: .LBB78_7: # %cond.store3
8370 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
8371 ; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 2
8372 ; RV32ZVE32F-NEXT: vmv.x.s a1, v9
8373 ; RV32ZVE32F-NEXT: fsd fa2, 0(a1)
8374 ; RV32ZVE32F-NEXT: andi a0, a0, 8
8375 ; RV32ZVE32F-NEXT: beqz a0, .LBB78_4
8376 ; RV32ZVE32F-NEXT: .LBB78_8: # %cond.store5
8377 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
8378 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 3
8379 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
8380 ; RV32ZVE32F-NEXT: fsd fa3, 0(a0)
8381 ; RV32ZVE32F-NEXT: ret
8383 ; RV64ZVE32F-LABEL: mscatter_truemask_v4f64:
8384 ; RV64ZVE32F: # %bb.0:
8385 ; RV64ZVE32F-NEXT: ld a1, 24(a0)
8386 ; RV64ZVE32F-NEXT: ld a2, 16(a0)
8387 ; RV64ZVE32F-NEXT: ld a4, 8(a0)
8388 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
8389 ; RV64ZVE32F-NEXT: vmset.m v8
8390 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
8391 ; RV64ZVE32F-NEXT: beqz zero, .LBB78_5
8392 ; RV64ZVE32F-NEXT: # %bb.1: # %else
8393 ; RV64ZVE32F-NEXT: andi a0, a3, 2
8394 ; RV64ZVE32F-NEXT: bnez a0, .LBB78_6
8395 ; RV64ZVE32F-NEXT: .LBB78_2: # %else2
8396 ; RV64ZVE32F-NEXT: andi a0, a3, 4
8397 ; RV64ZVE32F-NEXT: bnez a0, .LBB78_7
8398 ; RV64ZVE32F-NEXT: .LBB78_3: # %else4
8399 ; RV64ZVE32F-NEXT: andi a3, a3, 8
8400 ; RV64ZVE32F-NEXT: bnez a3, .LBB78_8
8401 ; RV64ZVE32F-NEXT: .LBB78_4: # %else6
8402 ; RV64ZVE32F-NEXT: ret
8403 ; RV64ZVE32F-NEXT: .LBB78_5: # %cond.store
8404 ; RV64ZVE32F-NEXT: ld a0, 0(a0)
8405 ; RV64ZVE32F-NEXT: fsd fa0, 0(a0)
8406 ; RV64ZVE32F-NEXT: andi a0, a3, 2
8407 ; RV64ZVE32F-NEXT: beqz a0, .LBB78_2
8408 ; RV64ZVE32F-NEXT: .LBB78_6: # %cond.store1
8409 ; RV64ZVE32F-NEXT: fsd fa1, 0(a4)
8410 ; RV64ZVE32F-NEXT: andi a0, a3, 4
8411 ; RV64ZVE32F-NEXT: beqz a0, .LBB78_3
8412 ; RV64ZVE32F-NEXT: .LBB78_7: # %cond.store3
8413 ; RV64ZVE32F-NEXT: fsd fa2, 0(a2)
8414 ; RV64ZVE32F-NEXT: andi a3, a3, 8
8415 ; RV64ZVE32F-NEXT: beqz a3, .LBB78_4
8416 ; RV64ZVE32F-NEXT: .LBB78_8: # %cond.store5
8417 ; RV64ZVE32F-NEXT: fsd fa3, 0(a1)
8418 ; RV64ZVE32F-NEXT: ret
8419 %mhead = insertelement <4 x i1> poison, i1 1, i32 0
8420 %mtrue = shufflevector <4 x i1> %mhead, <4 x i1> poison, <4 x i32> zeroinitializer
8421 call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> %val, <4 x ptr> %ptrs, i32 8, <4 x i1> %mtrue)
8425 define void @mscatter_falsemask_v4f64(<4 x double> %val, <4 x ptr> %ptrs) {
8426 ; CHECK-LABEL: mscatter_falsemask_v4f64:
8429 call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> %val, <4 x ptr> %ptrs, i32 8, <4 x i1> zeroinitializer)
8433 declare void @llvm.masked.scatter.v8f64.v8p0(<8 x double>, <8 x ptr>, i32, <8 x i1>)
8435 define void @mscatter_v8f64(<8 x double> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
8436 ; RV32V-LABEL: mscatter_v8f64:
8438 ; RV32V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
8439 ; RV32V-NEXT: vsoxei32.v v8, (zero), v12, v0.t
8442 ; RV64-LABEL: mscatter_v8f64:
8444 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
8445 ; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t
8448 ; RV32ZVE32F-LABEL: mscatter_v8f64:
8449 ; RV32ZVE32F: # %bb.0:
8450 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
8451 ; RV32ZVE32F-NEXT: vmv.x.s a0, v0
8452 ; RV32ZVE32F-NEXT: andi a1, a0, 1
8453 ; RV32ZVE32F-NEXT: bnez a1, .LBB80_9
8454 ; RV32ZVE32F-NEXT: # %bb.1: # %else
8455 ; RV32ZVE32F-NEXT: andi a1, a0, 2
8456 ; RV32ZVE32F-NEXT: bnez a1, .LBB80_10
8457 ; RV32ZVE32F-NEXT: .LBB80_2: # %else2
8458 ; RV32ZVE32F-NEXT: andi a1, a0, 4
8459 ; RV32ZVE32F-NEXT: bnez a1, .LBB80_11
8460 ; RV32ZVE32F-NEXT: .LBB80_3: # %else4
8461 ; RV32ZVE32F-NEXT: andi a1, a0, 8
8462 ; RV32ZVE32F-NEXT: bnez a1, .LBB80_12
8463 ; RV32ZVE32F-NEXT: .LBB80_4: # %else6
8464 ; RV32ZVE32F-NEXT: andi a1, a0, 16
8465 ; RV32ZVE32F-NEXT: bnez a1, .LBB80_13
8466 ; RV32ZVE32F-NEXT: .LBB80_5: # %else8
8467 ; RV32ZVE32F-NEXT: andi a1, a0, 32
8468 ; RV32ZVE32F-NEXT: bnez a1, .LBB80_14
8469 ; RV32ZVE32F-NEXT: .LBB80_6: # %else10
8470 ; RV32ZVE32F-NEXT: andi a1, a0, 64
8471 ; RV32ZVE32F-NEXT: bnez a1, .LBB80_15
8472 ; RV32ZVE32F-NEXT: .LBB80_7: # %else12
8473 ; RV32ZVE32F-NEXT: andi a0, a0, -128
8474 ; RV32ZVE32F-NEXT: bnez a0, .LBB80_16
8475 ; RV32ZVE32F-NEXT: .LBB80_8: # %else14
8476 ; RV32ZVE32F-NEXT: ret
8477 ; RV32ZVE32F-NEXT: .LBB80_9: # %cond.store
8478 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
8479 ; RV32ZVE32F-NEXT: vmv.x.s a1, v8
8480 ; RV32ZVE32F-NEXT: fsd fa0, 0(a1)
8481 ; RV32ZVE32F-NEXT: andi a1, a0, 2
8482 ; RV32ZVE32F-NEXT: beqz a1, .LBB80_2
8483 ; RV32ZVE32F-NEXT: .LBB80_10: # %cond.store1
8484 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
8485 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
8486 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
8487 ; RV32ZVE32F-NEXT: fsd fa1, 0(a1)
8488 ; RV32ZVE32F-NEXT: andi a1, a0, 4
8489 ; RV32ZVE32F-NEXT: beqz a1, .LBB80_3
8490 ; RV32ZVE32F-NEXT: .LBB80_11: # %cond.store3
8491 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
8492 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
8493 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
8494 ; RV32ZVE32F-NEXT: fsd fa2, 0(a1)
8495 ; RV32ZVE32F-NEXT: andi a1, a0, 8
8496 ; RV32ZVE32F-NEXT: beqz a1, .LBB80_4
8497 ; RV32ZVE32F-NEXT: .LBB80_12: # %cond.store5
8498 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
8499 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
8500 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
8501 ; RV32ZVE32F-NEXT: fsd fa3, 0(a1)
8502 ; RV32ZVE32F-NEXT: andi a1, a0, 16
8503 ; RV32ZVE32F-NEXT: beqz a1, .LBB80_5
8504 ; RV32ZVE32F-NEXT: .LBB80_13: # %cond.store7
8505 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
8506 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
8507 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
8508 ; RV32ZVE32F-NEXT: fsd fa4, 0(a1)
8509 ; RV32ZVE32F-NEXT: andi a1, a0, 32
8510 ; RV32ZVE32F-NEXT: beqz a1, .LBB80_6
8511 ; RV32ZVE32F-NEXT: .LBB80_14: # %cond.store9
8512 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
8513 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
8514 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
8515 ; RV32ZVE32F-NEXT: fsd fa5, 0(a1)
8516 ; RV32ZVE32F-NEXT: andi a1, a0, 64
8517 ; RV32ZVE32F-NEXT: beqz a1, .LBB80_7
8518 ; RV32ZVE32F-NEXT: .LBB80_15: # %cond.store11
8519 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
8520 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
8521 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
8522 ; RV32ZVE32F-NEXT: fsd fa6, 0(a1)
8523 ; RV32ZVE32F-NEXT: andi a0, a0, -128
8524 ; RV32ZVE32F-NEXT: beqz a0, .LBB80_8
8525 ; RV32ZVE32F-NEXT: .LBB80_16: # %cond.store13
8526 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
8527 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
8528 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
8529 ; RV32ZVE32F-NEXT: fsd fa7, 0(a0)
8530 ; RV32ZVE32F-NEXT: ret
8532 ; RV64ZVE32F-LABEL: mscatter_v8f64:
8533 ; RV64ZVE32F: # %bb.0:
8534 ; RV64ZVE32F-NEXT: ld a1, 56(a0)
8535 ; RV64ZVE32F-NEXT: ld a2, 48(a0)
8536 ; RV64ZVE32F-NEXT: ld a4, 40(a0)
8537 ; RV64ZVE32F-NEXT: ld a5, 32(a0)
8538 ; RV64ZVE32F-NEXT: ld a6, 24(a0)
8539 ; RV64ZVE32F-NEXT: ld a7, 16(a0)
8540 ; RV64ZVE32F-NEXT: ld t0, 8(a0)
8541 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
8542 ; RV64ZVE32F-NEXT: vmv.x.s a3, v0
8543 ; RV64ZVE32F-NEXT: andi t1, a3, 1
8544 ; RV64ZVE32F-NEXT: bnez t1, .LBB80_9
8545 ; RV64ZVE32F-NEXT: # %bb.1: # %else
8546 ; RV64ZVE32F-NEXT: andi a0, a3, 2
8547 ; RV64ZVE32F-NEXT: bnez a0, .LBB80_10
8548 ; RV64ZVE32F-NEXT: .LBB80_2: # %else2
8549 ; RV64ZVE32F-NEXT: andi a0, a3, 4
8550 ; RV64ZVE32F-NEXT: bnez a0, .LBB80_11
8551 ; RV64ZVE32F-NEXT: .LBB80_3: # %else4
8552 ; RV64ZVE32F-NEXT: andi a0, a3, 8
8553 ; RV64ZVE32F-NEXT: bnez a0, .LBB80_12
8554 ; RV64ZVE32F-NEXT: .LBB80_4: # %else6
8555 ; RV64ZVE32F-NEXT: andi a0, a3, 16
8556 ; RV64ZVE32F-NEXT: bnez a0, .LBB80_13
8557 ; RV64ZVE32F-NEXT: .LBB80_5: # %else8
8558 ; RV64ZVE32F-NEXT: andi a0, a3, 32
8559 ; RV64ZVE32F-NEXT: bnez a0, .LBB80_14
8560 ; RV64ZVE32F-NEXT: .LBB80_6: # %else10
8561 ; RV64ZVE32F-NEXT: andi a0, a3, 64
8562 ; RV64ZVE32F-NEXT: bnez a0, .LBB80_15
8563 ; RV64ZVE32F-NEXT: .LBB80_7: # %else12
8564 ; RV64ZVE32F-NEXT: andi a0, a3, -128
8565 ; RV64ZVE32F-NEXT: bnez a0, .LBB80_16
8566 ; RV64ZVE32F-NEXT: .LBB80_8: # %else14
8567 ; RV64ZVE32F-NEXT: ret
8568 ; RV64ZVE32F-NEXT: .LBB80_9: # %cond.store
8569 ; RV64ZVE32F-NEXT: ld a0, 0(a0)
8570 ; RV64ZVE32F-NEXT: fsd fa0, 0(a0)
8571 ; RV64ZVE32F-NEXT: andi a0, a3, 2
8572 ; RV64ZVE32F-NEXT: beqz a0, .LBB80_2
8573 ; RV64ZVE32F-NEXT: .LBB80_10: # %cond.store1
8574 ; RV64ZVE32F-NEXT: fsd fa1, 0(t0)
8575 ; RV64ZVE32F-NEXT: andi a0, a3, 4
8576 ; RV64ZVE32F-NEXT: beqz a0, .LBB80_3
8577 ; RV64ZVE32F-NEXT: .LBB80_11: # %cond.store3
8578 ; RV64ZVE32F-NEXT: fsd fa2, 0(a7)
8579 ; RV64ZVE32F-NEXT: andi a0, a3, 8
8580 ; RV64ZVE32F-NEXT: beqz a0, .LBB80_4
8581 ; RV64ZVE32F-NEXT: .LBB80_12: # %cond.store5
8582 ; RV64ZVE32F-NEXT: fsd fa3, 0(a6)
8583 ; RV64ZVE32F-NEXT: andi a0, a3, 16
8584 ; RV64ZVE32F-NEXT: beqz a0, .LBB80_5
8585 ; RV64ZVE32F-NEXT: .LBB80_13: # %cond.store7
8586 ; RV64ZVE32F-NEXT: fsd fa4, 0(a5)
8587 ; RV64ZVE32F-NEXT: andi a0, a3, 32
8588 ; RV64ZVE32F-NEXT: beqz a0, .LBB80_6
8589 ; RV64ZVE32F-NEXT: .LBB80_14: # %cond.store9
8590 ; RV64ZVE32F-NEXT: fsd fa5, 0(a4)
8591 ; RV64ZVE32F-NEXT: andi a0, a3, 64
8592 ; RV64ZVE32F-NEXT: beqz a0, .LBB80_7
8593 ; RV64ZVE32F-NEXT: .LBB80_15: # %cond.store11
8594 ; RV64ZVE32F-NEXT: fsd fa6, 0(a2)
8595 ; RV64ZVE32F-NEXT: andi a0, a3, -128
8596 ; RV64ZVE32F-NEXT: beqz a0, .LBB80_8
8597 ; RV64ZVE32F-NEXT: .LBB80_16: # %cond.store13
8598 ; RV64ZVE32F-NEXT: fsd fa7, 0(a1)
8599 ; RV64ZVE32F-NEXT: ret
8600 call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m)
8604 define void @mscatter_baseidx_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) {
8605 ; RV32V-LABEL: mscatter_baseidx_v8i8_v8f64:
8607 ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
8608 ; RV32V-NEXT: vsext.vf4 v14, v12
8609 ; RV32V-NEXT: vsll.vi v12, v14, 3
8610 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
8611 ; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t
8614 ; RV64-LABEL: mscatter_baseidx_v8i8_v8f64:
8616 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
8617 ; RV64-NEXT: vsext.vf8 v16, v12
8618 ; RV64-NEXT: vsll.vi v12, v16, 3
8619 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
8622 ; RV32ZVE32F-LABEL: mscatter_baseidx_v8i8_v8f64:
8623 ; RV32ZVE32F: # %bb.0:
8624 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
8625 ; RV32ZVE32F-NEXT: vsext.vf4 v10, v8
8626 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3
8627 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0
8628 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
8629 ; RV32ZVE32F-NEXT: vmv.x.s a0, v0
8630 ; RV32ZVE32F-NEXT: andi a1, a0, 1
8631 ; RV32ZVE32F-NEXT: bnez a1, .LBB81_9
8632 ; RV32ZVE32F-NEXT: # %bb.1: # %else
8633 ; RV32ZVE32F-NEXT: andi a1, a0, 2
8634 ; RV32ZVE32F-NEXT: bnez a1, .LBB81_10
8635 ; RV32ZVE32F-NEXT: .LBB81_2: # %else2
8636 ; RV32ZVE32F-NEXT: andi a1, a0, 4
8637 ; RV32ZVE32F-NEXT: bnez a1, .LBB81_11
8638 ; RV32ZVE32F-NEXT: .LBB81_3: # %else4
8639 ; RV32ZVE32F-NEXT: andi a1, a0, 8
8640 ; RV32ZVE32F-NEXT: bnez a1, .LBB81_12
8641 ; RV32ZVE32F-NEXT: .LBB81_4: # %else6
8642 ; RV32ZVE32F-NEXT: andi a1, a0, 16
8643 ; RV32ZVE32F-NEXT: bnez a1, .LBB81_13
8644 ; RV32ZVE32F-NEXT: .LBB81_5: # %else8
8645 ; RV32ZVE32F-NEXT: andi a1, a0, 32
8646 ; RV32ZVE32F-NEXT: bnez a1, .LBB81_14
8647 ; RV32ZVE32F-NEXT: .LBB81_6: # %else10
8648 ; RV32ZVE32F-NEXT: andi a1, a0, 64
8649 ; RV32ZVE32F-NEXT: bnez a1, .LBB81_15
8650 ; RV32ZVE32F-NEXT: .LBB81_7: # %else12
8651 ; RV32ZVE32F-NEXT: andi a0, a0, -128
8652 ; RV32ZVE32F-NEXT: bnez a0, .LBB81_16
8653 ; RV32ZVE32F-NEXT: .LBB81_8: # %else14
8654 ; RV32ZVE32F-NEXT: ret
8655 ; RV32ZVE32F-NEXT: .LBB81_9: # %cond.store
8656 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
8657 ; RV32ZVE32F-NEXT: vmv.x.s a1, v8
8658 ; RV32ZVE32F-NEXT: fsd fa0, 0(a1)
8659 ; RV32ZVE32F-NEXT: andi a1, a0, 2
8660 ; RV32ZVE32F-NEXT: beqz a1, .LBB81_2
8661 ; RV32ZVE32F-NEXT: .LBB81_10: # %cond.store1
8662 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
8663 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
8664 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
8665 ; RV32ZVE32F-NEXT: fsd fa1, 0(a1)
8666 ; RV32ZVE32F-NEXT: andi a1, a0, 4
8667 ; RV32ZVE32F-NEXT: beqz a1, .LBB81_3
8668 ; RV32ZVE32F-NEXT: .LBB81_11: # %cond.store3
8669 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
8670 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
8671 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
8672 ; RV32ZVE32F-NEXT: fsd fa2, 0(a1)
8673 ; RV32ZVE32F-NEXT: andi a1, a0, 8
8674 ; RV32ZVE32F-NEXT: beqz a1, .LBB81_4
8675 ; RV32ZVE32F-NEXT: .LBB81_12: # %cond.store5
8676 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
8677 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
8678 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
8679 ; RV32ZVE32F-NEXT: fsd fa3, 0(a1)
8680 ; RV32ZVE32F-NEXT: andi a1, a0, 16
8681 ; RV32ZVE32F-NEXT: beqz a1, .LBB81_5
8682 ; RV32ZVE32F-NEXT: .LBB81_13: # %cond.store7
8683 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
8684 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
8685 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
8686 ; RV32ZVE32F-NEXT: fsd fa4, 0(a1)
8687 ; RV32ZVE32F-NEXT: andi a1, a0, 32
8688 ; RV32ZVE32F-NEXT: beqz a1, .LBB81_6
8689 ; RV32ZVE32F-NEXT: .LBB81_14: # %cond.store9
8690 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
8691 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
8692 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
8693 ; RV32ZVE32F-NEXT: fsd fa5, 0(a1)
8694 ; RV32ZVE32F-NEXT: andi a1, a0, 64
8695 ; RV32ZVE32F-NEXT: beqz a1, .LBB81_7
8696 ; RV32ZVE32F-NEXT: .LBB81_15: # %cond.store11
8697 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
8698 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
8699 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
8700 ; RV32ZVE32F-NEXT: fsd fa6, 0(a1)
8701 ; RV32ZVE32F-NEXT: andi a0, a0, -128
8702 ; RV32ZVE32F-NEXT: beqz a0, .LBB81_8
8703 ; RV32ZVE32F-NEXT: .LBB81_16: # %cond.store13
8704 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
8705 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
8706 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
8707 ; RV32ZVE32F-NEXT: fsd fa7, 0(a0)
8708 ; RV32ZVE32F-NEXT: ret
8710 ; RV64ZVE32F-LABEL: mscatter_baseidx_v8i8_v8f64:
8711 ; RV64ZVE32F: # %bb.0:
8712 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
8713 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
8714 ; RV64ZVE32F-NEXT: andi a2, a1, 1
8715 ; RV64ZVE32F-NEXT: beqz a2, .LBB81_2
8716 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
8717 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
8718 ; RV64ZVE32F-NEXT: slli a2, a2, 3
8719 ; RV64ZVE32F-NEXT: add a2, a0, a2
8720 ; RV64ZVE32F-NEXT: fsd fa0, 0(a2)
8721 ; RV64ZVE32F-NEXT: .LBB81_2: # %else
8722 ; RV64ZVE32F-NEXT: andi a2, a1, 2
8723 ; RV64ZVE32F-NEXT: beqz a2, .LBB81_4
8724 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
8725 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
8726 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
8727 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
8728 ; RV64ZVE32F-NEXT: slli a2, a2, 3
8729 ; RV64ZVE32F-NEXT: add a2, a0, a2
8730 ; RV64ZVE32F-NEXT: fsd fa1, 0(a2)
8731 ; RV64ZVE32F-NEXT: .LBB81_4: # %else2
8732 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
8733 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
8734 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
8735 ; RV64ZVE32F-NEXT: andi a2, a1, 4
8736 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
8737 ; RV64ZVE32F-NEXT: bnez a2, .LBB81_12
8738 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
8739 ; RV64ZVE32F-NEXT: andi a2, a1, 8
8740 ; RV64ZVE32F-NEXT: bnez a2, .LBB81_13
8741 ; RV64ZVE32F-NEXT: .LBB81_6: # %else6
8742 ; RV64ZVE32F-NEXT: andi a2, a1, 16
8743 ; RV64ZVE32F-NEXT: bnez a2, .LBB81_14
8744 ; RV64ZVE32F-NEXT: .LBB81_7: # %else8
8745 ; RV64ZVE32F-NEXT: andi a2, a1, 32
8746 ; RV64ZVE32F-NEXT: beqz a2, .LBB81_9
8747 ; RV64ZVE32F-NEXT: .LBB81_8: # %cond.store9
8748 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
8749 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
8750 ; RV64ZVE32F-NEXT: slli a2, a2, 3
8751 ; RV64ZVE32F-NEXT: add a2, a0, a2
8752 ; RV64ZVE32F-NEXT: fsd fa5, 0(a2)
8753 ; RV64ZVE32F-NEXT: .LBB81_9: # %else10
8754 ; RV64ZVE32F-NEXT: andi a2, a1, 64
8755 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
8756 ; RV64ZVE32F-NEXT: bnez a2, .LBB81_15
8757 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
8758 ; RV64ZVE32F-NEXT: andi a1, a1, -128
8759 ; RV64ZVE32F-NEXT: bnez a1, .LBB81_16
8760 ; RV64ZVE32F-NEXT: .LBB81_11: # %else14
8761 ; RV64ZVE32F-NEXT: ret
8762 ; RV64ZVE32F-NEXT: .LBB81_12: # %cond.store3
8763 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
8764 ; RV64ZVE32F-NEXT: slli a2, a2, 3
8765 ; RV64ZVE32F-NEXT: add a2, a0, a2
8766 ; RV64ZVE32F-NEXT: fsd fa2, 0(a2)
8767 ; RV64ZVE32F-NEXT: andi a2, a1, 8
8768 ; RV64ZVE32F-NEXT: beqz a2, .LBB81_6
8769 ; RV64ZVE32F-NEXT: .LBB81_13: # %cond.store5
8770 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
8771 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
8772 ; RV64ZVE32F-NEXT: slli a2, a2, 3
8773 ; RV64ZVE32F-NEXT: add a2, a0, a2
8774 ; RV64ZVE32F-NEXT: fsd fa3, 0(a2)
8775 ; RV64ZVE32F-NEXT: andi a2, a1, 16
8776 ; RV64ZVE32F-NEXT: beqz a2, .LBB81_7
8777 ; RV64ZVE32F-NEXT: .LBB81_14: # %cond.store7
8778 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
8779 ; RV64ZVE32F-NEXT: slli a2, a2, 3
8780 ; RV64ZVE32F-NEXT: add a2, a0, a2
8781 ; RV64ZVE32F-NEXT: fsd fa4, 0(a2)
8782 ; RV64ZVE32F-NEXT: andi a2, a1, 32
8783 ; RV64ZVE32F-NEXT: bnez a2, .LBB81_8
8784 ; RV64ZVE32F-NEXT: j .LBB81_9
8785 ; RV64ZVE32F-NEXT: .LBB81_15: # %cond.store11
8786 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
8787 ; RV64ZVE32F-NEXT: slli a2, a2, 3
8788 ; RV64ZVE32F-NEXT: add a2, a0, a2
8789 ; RV64ZVE32F-NEXT: fsd fa6, 0(a2)
8790 ; RV64ZVE32F-NEXT: andi a1, a1, -128
8791 ; RV64ZVE32F-NEXT: beqz a1, .LBB81_11
8792 ; RV64ZVE32F-NEXT: .LBB81_16: # %cond.store13
8793 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
8794 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8
8795 ; RV64ZVE32F-NEXT: slli a1, a1, 3
8796 ; RV64ZVE32F-NEXT: add a0, a0, a1
8797 ; RV64ZVE32F-NEXT: fsd fa7, 0(a0)
8798 ; RV64ZVE32F-NEXT: ret
8799 %ptrs = getelementptr inbounds double, ptr %base, <8 x i8> %idxs
8800 call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m)
8804 define void @mscatter_baseidx_sext_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) {
8805 ; RV32V-LABEL: mscatter_baseidx_sext_v8i8_v8f64:
8807 ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
8808 ; RV32V-NEXT: vsext.vf4 v14, v12
8809 ; RV32V-NEXT: vsll.vi v12, v14, 3
8810 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
8811 ; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t
8814 ; RV64-LABEL: mscatter_baseidx_sext_v8i8_v8f64:
8816 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
8817 ; RV64-NEXT: vsext.vf8 v16, v12
8818 ; RV64-NEXT: vsll.vi v12, v16, 3
8819 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
8822 ; RV32ZVE32F-LABEL: mscatter_baseidx_sext_v8i8_v8f64:
8823 ; RV32ZVE32F: # %bb.0:
8824 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
8825 ; RV32ZVE32F-NEXT: vsext.vf4 v10, v8
8826 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3
8827 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0
8828 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
8829 ; RV32ZVE32F-NEXT: vmv.x.s a0, v0
8830 ; RV32ZVE32F-NEXT: andi a1, a0, 1
8831 ; RV32ZVE32F-NEXT: bnez a1, .LBB82_9
8832 ; RV32ZVE32F-NEXT: # %bb.1: # %else
8833 ; RV32ZVE32F-NEXT: andi a1, a0, 2
8834 ; RV32ZVE32F-NEXT: bnez a1, .LBB82_10
8835 ; RV32ZVE32F-NEXT: .LBB82_2: # %else2
8836 ; RV32ZVE32F-NEXT: andi a1, a0, 4
8837 ; RV32ZVE32F-NEXT: bnez a1, .LBB82_11
8838 ; RV32ZVE32F-NEXT: .LBB82_3: # %else4
8839 ; RV32ZVE32F-NEXT: andi a1, a0, 8
8840 ; RV32ZVE32F-NEXT: bnez a1, .LBB82_12
8841 ; RV32ZVE32F-NEXT: .LBB82_4: # %else6
8842 ; RV32ZVE32F-NEXT: andi a1, a0, 16
8843 ; RV32ZVE32F-NEXT: bnez a1, .LBB82_13
8844 ; RV32ZVE32F-NEXT: .LBB82_5: # %else8
8845 ; RV32ZVE32F-NEXT: andi a1, a0, 32
8846 ; RV32ZVE32F-NEXT: bnez a1, .LBB82_14
8847 ; RV32ZVE32F-NEXT: .LBB82_6: # %else10
8848 ; RV32ZVE32F-NEXT: andi a1, a0, 64
8849 ; RV32ZVE32F-NEXT: bnez a1, .LBB82_15
8850 ; RV32ZVE32F-NEXT: .LBB82_7: # %else12
8851 ; RV32ZVE32F-NEXT: andi a0, a0, -128
8852 ; RV32ZVE32F-NEXT: bnez a0, .LBB82_16
8853 ; RV32ZVE32F-NEXT: .LBB82_8: # %else14
8854 ; RV32ZVE32F-NEXT: ret
8855 ; RV32ZVE32F-NEXT: .LBB82_9: # %cond.store
8856 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
8857 ; RV32ZVE32F-NEXT: vmv.x.s a1, v8
8858 ; RV32ZVE32F-NEXT: fsd fa0, 0(a1)
8859 ; RV32ZVE32F-NEXT: andi a1, a0, 2
8860 ; RV32ZVE32F-NEXT: beqz a1, .LBB82_2
8861 ; RV32ZVE32F-NEXT: .LBB82_10: # %cond.store1
8862 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
8863 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
8864 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
8865 ; RV32ZVE32F-NEXT: fsd fa1, 0(a1)
8866 ; RV32ZVE32F-NEXT: andi a1, a0, 4
8867 ; RV32ZVE32F-NEXT: beqz a1, .LBB82_3
8868 ; RV32ZVE32F-NEXT: .LBB82_11: # %cond.store3
8869 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
8870 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
8871 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
8872 ; RV32ZVE32F-NEXT: fsd fa2, 0(a1)
8873 ; RV32ZVE32F-NEXT: andi a1, a0, 8
8874 ; RV32ZVE32F-NEXT: beqz a1, .LBB82_4
8875 ; RV32ZVE32F-NEXT: .LBB82_12: # %cond.store5
8876 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
8877 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
8878 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
8879 ; RV32ZVE32F-NEXT: fsd fa3, 0(a1)
8880 ; RV32ZVE32F-NEXT: andi a1, a0, 16
8881 ; RV32ZVE32F-NEXT: beqz a1, .LBB82_5
8882 ; RV32ZVE32F-NEXT: .LBB82_13: # %cond.store7
8883 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
8884 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
8885 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
8886 ; RV32ZVE32F-NEXT: fsd fa4, 0(a1)
8887 ; RV32ZVE32F-NEXT: andi a1, a0, 32
8888 ; RV32ZVE32F-NEXT: beqz a1, .LBB82_6
8889 ; RV32ZVE32F-NEXT: .LBB82_14: # %cond.store9
8890 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
8891 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
8892 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
8893 ; RV32ZVE32F-NEXT: fsd fa5, 0(a1)
8894 ; RV32ZVE32F-NEXT: andi a1, a0, 64
8895 ; RV32ZVE32F-NEXT: beqz a1, .LBB82_7
8896 ; RV32ZVE32F-NEXT: .LBB82_15: # %cond.store11
8897 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
8898 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
8899 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
8900 ; RV32ZVE32F-NEXT: fsd fa6, 0(a1)
8901 ; RV32ZVE32F-NEXT: andi a0, a0, -128
8902 ; RV32ZVE32F-NEXT: beqz a0, .LBB82_8
8903 ; RV32ZVE32F-NEXT: .LBB82_16: # %cond.store13
8904 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
8905 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
8906 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
8907 ; RV32ZVE32F-NEXT: fsd fa7, 0(a0)
8908 ; RV32ZVE32F-NEXT: ret
8910 ; RV64ZVE32F-LABEL: mscatter_baseidx_sext_v8i8_v8f64:
8911 ; RV64ZVE32F: # %bb.0:
8912 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
8913 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
8914 ; RV64ZVE32F-NEXT: andi a2, a1, 1
8915 ; RV64ZVE32F-NEXT: beqz a2, .LBB82_2
8916 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
8917 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
8918 ; RV64ZVE32F-NEXT: slli a2, a2, 3
8919 ; RV64ZVE32F-NEXT: add a2, a0, a2
8920 ; RV64ZVE32F-NEXT: fsd fa0, 0(a2)
8921 ; RV64ZVE32F-NEXT: .LBB82_2: # %else
8922 ; RV64ZVE32F-NEXT: andi a2, a1, 2
8923 ; RV64ZVE32F-NEXT: beqz a2, .LBB82_4
8924 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
8925 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
8926 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
8927 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
8928 ; RV64ZVE32F-NEXT: slli a2, a2, 3
8929 ; RV64ZVE32F-NEXT: add a2, a0, a2
8930 ; RV64ZVE32F-NEXT: fsd fa1, 0(a2)
8931 ; RV64ZVE32F-NEXT: .LBB82_4: # %else2
8932 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
8933 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
8934 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
8935 ; RV64ZVE32F-NEXT: andi a2, a1, 4
8936 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
8937 ; RV64ZVE32F-NEXT: bnez a2, .LBB82_12
8938 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
8939 ; RV64ZVE32F-NEXT: andi a2, a1, 8
8940 ; RV64ZVE32F-NEXT: bnez a2, .LBB82_13
8941 ; RV64ZVE32F-NEXT: .LBB82_6: # %else6
8942 ; RV64ZVE32F-NEXT: andi a2, a1, 16
8943 ; RV64ZVE32F-NEXT: bnez a2, .LBB82_14
8944 ; RV64ZVE32F-NEXT: .LBB82_7: # %else8
8945 ; RV64ZVE32F-NEXT: andi a2, a1, 32
8946 ; RV64ZVE32F-NEXT: beqz a2, .LBB82_9
8947 ; RV64ZVE32F-NEXT: .LBB82_8: # %cond.store9
8948 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
8949 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
8950 ; RV64ZVE32F-NEXT: slli a2, a2, 3
8951 ; RV64ZVE32F-NEXT: add a2, a0, a2
8952 ; RV64ZVE32F-NEXT: fsd fa5, 0(a2)
8953 ; RV64ZVE32F-NEXT: .LBB82_9: # %else10
8954 ; RV64ZVE32F-NEXT: andi a2, a1, 64
8955 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
8956 ; RV64ZVE32F-NEXT: bnez a2, .LBB82_15
8957 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
8958 ; RV64ZVE32F-NEXT: andi a1, a1, -128
8959 ; RV64ZVE32F-NEXT: bnez a1, .LBB82_16
8960 ; RV64ZVE32F-NEXT: .LBB82_11: # %else14
8961 ; RV64ZVE32F-NEXT: ret
8962 ; RV64ZVE32F-NEXT: .LBB82_12: # %cond.store3
8963 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
8964 ; RV64ZVE32F-NEXT: slli a2, a2, 3
8965 ; RV64ZVE32F-NEXT: add a2, a0, a2
8966 ; RV64ZVE32F-NEXT: fsd fa2, 0(a2)
8967 ; RV64ZVE32F-NEXT: andi a2, a1, 8
8968 ; RV64ZVE32F-NEXT: beqz a2, .LBB82_6
8969 ; RV64ZVE32F-NEXT: .LBB82_13: # %cond.store5
8970 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
8971 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
8972 ; RV64ZVE32F-NEXT: slli a2, a2, 3
8973 ; RV64ZVE32F-NEXT: add a2, a0, a2
8974 ; RV64ZVE32F-NEXT: fsd fa3, 0(a2)
8975 ; RV64ZVE32F-NEXT: andi a2, a1, 16
8976 ; RV64ZVE32F-NEXT: beqz a2, .LBB82_7
8977 ; RV64ZVE32F-NEXT: .LBB82_14: # %cond.store7
8978 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
8979 ; RV64ZVE32F-NEXT: slli a2, a2, 3
8980 ; RV64ZVE32F-NEXT: add a2, a0, a2
8981 ; RV64ZVE32F-NEXT: fsd fa4, 0(a2)
8982 ; RV64ZVE32F-NEXT: andi a2, a1, 32
8983 ; RV64ZVE32F-NEXT: bnez a2, .LBB82_8
8984 ; RV64ZVE32F-NEXT: j .LBB82_9
8985 ; RV64ZVE32F-NEXT: .LBB82_15: # %cond.store11
8986 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
8987 ; RV64ZVE32F-NEXT: slli a2, a2, 3
8988 ; RV64ZVE32F-NEXT: add a2, a0, a2
8989 ; RV64ZVE32F-NEXT: fsd fa6, 0(a2)
8990 ; RV64ZVE32F-NEXT: andi a1, a1, -128
8991 ; RV64ZVE32F-NEXT: beqz a1, .LBB82_11
8992 ; RV64ZVE32F-NEXT: .LBB82_16: # %cond.store13
8993 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
8994 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8
8995 ; RV64ZVE32F-NEXT: slli a1, a1, 3
8996 ; RV64ZVE32F-NEXT: add a0, a0, a1
8997 ; RV64ZVE32F-NEXT: fsd fa7, 0(a0)
8998 ; RV64ZVE32F-NEXT: ret
8999 %eidxs = sext <8 x i8> %idxs to <8 x i64>
9000 %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
9001 call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m)
9005 define void @mscatter_baseidx_zext_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) {
9006 ; RV32V-LABEL: mscatter_baseidx_zext_v8i8_v8f64:
9008 ; RV32V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
9009 ; RV32V-NEXT: vzext.vf2 v13, v12
9010 ; RV32V-NEXT: vsll.vi v12, v13, 3
9011 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
9012 ; RV32V-NEXT: vsoxei16.v v8, (a0), v12, v0.t
9015 ; RV64-LABEL: mscatter_baseidx_zext_v8i8_v8f64:
9017 ; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
9018 ; RV64-NEXT: vzext.vf2 v13, v12
9019 ; RV64-NEXT: vsll.vi v12, v13, 3
9020 ; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, ma
9021 ; RV64-NEXT: vsoxei16.v v8, (a0), v12, v0.t
9024 ; RV32ZVE32F-LABEL: mscatter_baseidx_zext_v8i8_v8f64:
9025 ; RV32ZVE32F: # %bb.0:
9026 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
9027 ; RV32ZVE32F-NEXT: vzext.vf4 v10, v8
9028 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3
9029 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0
9030 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
9031 ; RV32ZVE32F-NEXT: vmv.x.s a0, v0
9032 ; RV32ZVE32F-NEXT: andi a1, a0, 1
9033 ; RV32ZVE32F-NEXT: bnez a1, .LBB83_9
9034 ; RV32ZVE32F-NEXT: # %bb.1: # %else
9035 ; RV32ZVE32F-NEXT: andi a1, a0, 2
9036 ; RV32ZVE32F-NEXT: bnez a1, .LBB83_10
9037 ; RV32ZVE32F-NEXT: .LBB83_2: # %else2
9038 ; RV32ZVE32F-NEXT: andi a1, a0, 4
9039 ; RV32ZVE32F-NEXT: bnez a1, .LBB83_11
9040 ; RV32ZVE32F-NEXT: .LBB83_3: # %else4
9041 ; RV32ZVE32F-NEXT: andi a1, a0, 8
9042 ; RV32ZVE32F-NEXT: bnez a1, .LBB83_12
9043 ; RV32ZVE32F-NEXT: .LBB83_4: # %else6
9044 ; RV32ZVE32F-NEXT: andi a1, a0, 16
9045 ; RV32ZVE32F-NEXT: bnez a1, .LBB83_13
9046 ; RV32ZVE32F-NEXT: .LBB83_5: # %else8
9047 ; RV32ZVE32F-NEXT: andi a1, a0, 32
9048 ; RV32ZVE32F-NEXT: bnez a1, .LBB83_14
9049 ; RV32ZVE32F-NEXT: .LBB83_6: # %else10
9050 ; RV32ZVE32F-NEXT: andi a1, a0, 64
9051 ; RV32ZVE32F-NEXT: bnez a1, .LBB83_15
9052 ; RV32ZVE32F-NEXT: .LBB83_7: # %else12
9053 ; RV32ZVE32F-NEXT: andi a0, a0, -128
9054 ; RV32ZVE32F-NEXT: bnez a0, .LBB83_16
9055 ; RV32ZVE32F-NEXT: .LBB83_8: # %else14
9056 ; RV32ZVE32F-NEXT: ret
9057 ; RV32ZVE32F-NEXT: .LBB83_9: # %cond.store
9058 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
9059 ; RV32ZVE32F-NEXT: vmv.x.s a1, v8
9060 ; RV32ZVE32F-NEXT: fsd fa0, 0(a1)
9061 ; RV32ZVE32F-NEXT: andi a1, a0, 2
9062 ; RV32ZVE32F-NEXT: beqz a1, .LBB83_2
9063 ; RV32ZVE32F-NEXT: .LBB83_10: # %cond.store1
9064 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
9065 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
9066 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
9067 ; RV32ZVE32F-NEXT: fsd fa1, 0(a1)
9068 ; RV32ZVE32F-NEXT: andi a1, a0, 4
9069 ; RV32ZVE32F-NEXT: beqz a1, .LBB83_3
9070 ; RV32ZVE32F-NEXT: .LBB83_11: # %cond.store3
9071 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
9072 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
9073 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
9074 ; RV32ZVE32F-NEXT: fsd fa2, 0(a1)
9075 ; RV32ZVE32F-NEXT: andi a1, a0, 8
9076 ; RV32ZVE32F-NEXT: beqz a1, .LBB83_4
9077 ; RV32ZVE32F-NEXT: .LBB83_12: # %cond.store5
9078 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
9079 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
9080 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
9081 ; RV32ZVE32F-NEXT: fsd fa3, 0(a1)
9082 ; RV32ZVE32F-NEXT: andi a1, a0, 16
9083 ; RV32ZVE32F-NEXT: beqz a1, .LBB83_5
9084 ; RV32ZVE32F-NEXT: .LBB83_13: # %cond.store7
9085 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
9086 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
9087 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
9088 ; RV32ZVE32F-NEXT: fsd fa4, 0(a1)
9089 ; RV32ZVE32F-NEXT: andi a1, a0, 32
9090 ; RV32ZVE32F-NEXT: beqz a1, .LBB83_6
9091 ; RV32ZVE32F-NEXT: .LBB83_14: # %cond.store9
9092 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
9093 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
9094 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
9095 ; RV32ZVE32F-NEXT: fsd fa5, 0(a1)
9096 ; RV32ZVE32F-NEXT: andi a1, a0, 64
9097 ; RV32ZVE32F-NEXT: beqz a1, .LBB83_7
9098 ; RV32ZVE32F-NEXT: .LBB83_15: # %cond.store11
9099 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
9100 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
9101 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
9102 ; RV32ZVE32F-NEXT: fsd fa6, 0(a1)
9103 ; RV32ZVE32F-NEXT: andi a0, a0, -128
9104 ; RV32ZVE32F-NEXT: beqz a0, .LBB83_8
9105 ; RV32ZVE32F-NEXT: .LBB83_16: # %cond.store13
9106 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
9107 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
9108 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
9109 ; RV32ZVE32F-NEXT: fsd fa7, 0(a0)
9110 ; RV32ZVE32F-NEXT: ret
9112 ; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i8_v8f64:
9113 ; RV64ZVE32F: # %bb.0:
9114 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
9115 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
9116 ; RV64ZVE32F-NEXT: andi a2, a1, 1
9117 ; RV64ZVE32F-NEXT: beqz a2, .LBB83_2
9118 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
9119 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
9120 ; RV64ZVE32F-NEXT: andi a2, a2, 255
9121 ; RV64ZVE32F-NEXT: slli a2, a2, 3
9122 ; RV64ZVE32F-NEXT: add a2, a0, a2
9123 ; RV64ZVE32F-NEXT: fsd fa0, 0(a2)
9124 ; RV64ZVE32F-NEXT: .LBB83_2: # %else
9125 ; RV64ZVE32F-NEXT: andi a2, a1, 2
9126 ; RV64ZVE32F-NEXT: beqz a2, .LBB83_4
9127 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
9128 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
9129 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
9130 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
9131 ; RV64ZVE32F-NEXT: andi a2, a2, 255
9132 ; RV64ZVE32F-NEXT: slli a2, a2, 3
9133 ; RV64ZVE32F-NEXT: add a2, a0, a2
9134 ; RV64ZVE32F-NEXT: fsd fa1, 0(a2)
9135 ; RV64ZVE32F-NEXT: .LBB83_4: # %else2
9136 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
9137 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
9138 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
9139 ; RV64ZVE32F-NEXT: andi a2, a1, 4
9140 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
9141 ; RV64ZVE32F-NEXT: bnez a2, .LBB83_12
9142 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
9143 ; RV64ZVE32F-NEXT: andi a2, a1, 8
9144 ; RV64ZVE32F-NEXT: bnez a2, .LBB83_13
9145 ; RV64ZVE32F-NEXT: .LBB83_6: # %else6
9146 ; RV64ZVE32F-NEXT: andi a2, a1, 16
9147 ; RV64ZVE32F-NEXT: bnez a2, .LBB83_14
9148 ; RV64ZVE32F-NEXT: .LBB83_7: # %else8
9149 ; RV64ZVE32F-NEXT: andi a2, a1, 32
9150 ; RV64ZVE32F-NEXT: beqz a2, .LBB83_9
9151 ; RV64ZVE32F-NEXT: .LBB83_8: # %cond.store9
9152 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
9153 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
9154 ; RV64ZVE32F-NEXT: andi a2, a2, 255
9155 ; RV64ZVE32F-NEXT: slli a2, a2, 3
9156 ; RV64ZVE32F-NEXT: add a2, a0, a2
9157 ; RV64ZVE32F-NEXT: fsd fa5, 0(a2)
9158 ; RV64ZVE32F-NEXT: .LBB83_9: # %else10
9159 ; RV64ZVE32F-NEXT: andi a2, a1, 64
9160 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
9161 ; RV64ZVE32F-NEXT: bnez a2, .LBB83_15
9162 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
9163 ; RV64ZVE32F-NEXT: andi a1, a1, -128
9164 ; RV64ZVE32F-NEXT: bnez a1, .LBB83_16
9165 ; RV64ZVE32F-NEXT: .LBB83_11: # %else14
9166 ; RV64ZVE32F-NEXT: ret
9167 ; RV64ZVE32F-NEXT: .LBB83_12: # %cond.store3
9168 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
9169 ; RV64ZVE32F-NEXT: andi a2, a2, 255
9170 ; RV64ZVE32F-NEXT: slli a2, a2, 3
9171 ; RV64ZVE32F-NEXT: add a2, a0, a2
9172 ; RV64ZVE32F-NEXT: fsd fa2, 0(a2)
9173 ; RV64ZVE32F-NEXT: andi a2, a1, 8
9174 ; RV64ZVE32F-NEXT: beqz a2, .LBB83_6
9175 ; RV64ZVE32F-NEXT: .LBB83_13: # %cond.store5
9176 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
9177 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
9178 ; RV64ZVE32F-NEXT: andi a2, a2, 255
9179 ; RV64ZVE32F-NEXT: slli a2, a2, 3
9180 ; RV64ZVE32F-NEXT: add a2, a0, a2
9181 ; RV64ZVE32F-NEXT: fsd fa3, 0(a2)
9182 ; RV64ZVE32F-NEXT: andi a2, a1, 16
9183 ; RV64ZVE32F-NEXT: beqz a2, .LBB83_7
9184 ; RV64ZVE32F-NEXT: .LBB83_14: # %cond.store7
9185 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
9186 ; RV64ZVE32F-NEXT: andi a2, a2, 255
9187 ; RV64ZVE32F-NEXT: slli a2, a2, 3
9188 ; RV64ZVE32F-NEXT: add a2, a0, a2
9189 ; RV64ZVE32F-NEXT: fsd fa4, 0(a2)
9190 ; RV64ZVE32F-NEXT: andi a2, a1, 32
9191 ; RV64ZVE32F-NEXT: bnez a2, .LBB83_8
9192 ; RV64ZVE32F-NEXT: j .LBB83_9
9193 ; RV64ZVE32F-NEXT: .LBB83_15: # %cond.store11
9194 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
9195 ; RV64ZVE32F-NEXT: andi a2, a2, 255
9196 ; RV64ZVE32F-NEXT: slli a2, a2, 3
9197 ; RV64ZVE32F-NEXT: add a2, a0, a2
9198 ; RV64ZVE32F-NEXT: fsd fa6, 0(a2)
9199 ; RV64ZVE32F-NEXT: andi a1, a1, -128
9200 ; RV64ZVE32F-NEXT: beqz a1, .LBB83_11
9201 ; RV64ZVE32F-NEXT: .LBB83_16: # %cond.store13
9202 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
9203 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8
9204 ; RV64ZVE32F-NEXT: andi a1, a1, 255
9205 ; RV64ZVE32F-NEXT: slli a1, a1, 3
9206 ; RV64ZVE32F-NEXT: add a0, a0, a1
9207 ; RV64ZVE32F-NEXT: fsd fa7, 0(a0)
9208 ; RV64ZVE32F-NEXT: ret
9209 %eidxs = zext <8 x i8> %idxs to <8 x i64>
9210 %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
9211 call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m)
9215 define void @mscatter_baseidx_v8i16_v8f64(<8 x double> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m) {
9216 ; RV32V-LABEL: mscatter_baseidx_v8i16_v8f64:
9218 ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
9219 ; RV32V-NEXT: vsext.vf2 v14, v12
9220 ; RV32V-NEXT: vsll.vi v12, v14, 3
9221 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
9222 ; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t
9225 ; RV64-LABEL: mscatter_baseidx_v8i16_v8f64:
9227 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
9228 ; RV64-NEXT: vsext.vf4 v16, v12
9229 ; RV64-NEXT: vsll.vi v12, v16, 3
9230 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
9233 ; RV32ZVE32F-LABEL: mscatter_baseidx_v8i16_v8f64:
9234 ; RV32ZVE32F: # %bb.0:
9235 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
9236 ; RV32ZVE32F-NEXT: vsext.vf2 v10, v8
9237 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3
9238 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0
9239 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
9240 ; RV32ZVE32F-NEXT: vmv.x.s a0, v0
9241 ; RV32ZVE32F-NEXT: andi a1, a0, 1
9242 ; RV32ZVE32F-NEXT: bnez a1, .LBB84_9
9243 ; RV32ZVE32F-NEXT: # %bb.1: # %else
9244 ; RV32ZVE32F-NEXT: andi a1, a0, 2
9245 ; RV32ZVE32F-NEXT: bnez a1, .LBB84_10
9246 ; RV32ZVE32F-NEXT: .LBB84_2: # %else2
9247 ; RV32ZVE32F-NEXT: andi a1, a0, 4
9248 ; RV32ZVE32F-NEXT: bnez a1, .LBB84_11
9249 ; RV32ZVE32F-NEXT: .LBB84_3: # %else4
9250 ; RV32ZVE32F-NEXT: andi a1, a0, 8
9251 ; RV32ZVE32F-NEXT: bnez a1, .LBB84_12
9252 ; RV32ZVE32F-NEXT: .LBB84_4: # %else6
9253 ; RV32ZVE32F-NEXT: andi a1, a0, 16
9254 ; RV32ZVE32F-NEXT: bnez a1, .LBB84_13
9255 ; RV32ZVE32F-NEXT: .LBB84_5: # %else8
9256 ; RV32ZVE32F-NEXT: andi a1, a0, 32
9257 ; RV32ZVE32F-NEXT: bnez a1, .LBB84_14
9258 ; RV32ZVE32F-NEXT: .LBB84_6: # %else10
9259 ; RV32ZVE32F-NEXT: andi a1, a0, 64
9260 ; RV32ZVE32F-NEXT: bnez a1, .LBB84_15
9261 ; RV32ZVE32F-NEXT: .LBB84_7: # %else12
9262 ; RV32ZVE32F-NEXT: andi a0, a0, -128
9263 ; RV32ZVE32F-NEXT: bnez a0, .LBB84_16
9264 ; RV32ZVE32F-NEXT: .LBB84_8: # %else14
9265 ; RV32ZVE32F-NEXT: ret
9266 ; RV32ZVE32F-NEXT: .LBB84_9: # %cond.store
9267 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
9268 ; RV32ZVE32F-NEXT: vmv.x.s a1, v8
9269 ; RV32ZVE32F-NEXT: fsd fa0, 0(a1)
9270 ; RV32ZVE32F-NEXT: andi a1, a0, 2
9271 ; RV32ZVE32F-NEXT: beqz a1, .LBB84_2
9272 ; RV32ZVE32F-NEXT: .LBB84_10: # %cond.store1
9273 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
9274 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
9275 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
9276 ; RV32ZVE32F-NEXT: fsd fa1, 0(a1)
9277 ; RV32ZVE32F-NEXT: andi a1, a0, 4
9278 ; RV32ZVE32F-NEXT: beqz a1, .LBB84_3
9279 ; RV32ZVE32F-NEXT: .LBB84_11: # %cond.store3
9280 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
9281 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
9282 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
9283 ; RV32ZVE32F-NEXT: fsd fa2, 0(a1)
9284 ; RV32ZVE32F-NEXT: andi a1, a0, 8
9285 ; RV32ZVE32F-NEXT: beqz a1, .LBB84_4
9286 ; RV32ZVE32F-NEXT: .LBB84_12: # %cond.store5
9287 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
9288 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
9289 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
9290 ; RV32ZVE32F-NEXT: fsd fa3, 0(a1)
9291 ; RV32ZVE32F-NEXT: andi a1, a0, 16
9292 ; RV32ZVE32F-NEXT: beqz a1, .LBB84_5
9293 ; RV32ZVE32F-NEXT: .LBB84_13: # %cond.store7
9294 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
9295 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
9296 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
9297 ; RV32ZVE32F-NEXT: fsd fa4, 0(a1)
9298 ; RV32ZVE32F-NEXT: andi a1, a0, 32
9299 ; RV32ZVE32F-NEXT: beqz a1, .LBB84_6
9300 ; RV32ZVE32F-NEXT: .LBB84_14: # %cond.store9
9301 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
9302 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
9303 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
9304 ; RV32ZVE32F-NEXT: fsd fa5, 0(a1)
9305 ; RV32ZVE32F-NEXT: andi a1, a0, 64
9306 ; RV32ZVE32F-NEXT: beqz a1, .LBB84_7
9307 ; RV32ZVE32F-NEXT: .LBB84_15: # %cond.store11
9308 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
9309 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
9310 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
9311 ; RV32ZVE32F-NEXT: fsd fa6, 0(a1)
9312 ; RV32ZVE32F-NEXT: andi a0, a0, -128
9313 ; RV32ZVE32F-NEXT: beqz a0, .LBB84_8
9314 ; RV32ZVE32F-NEXT: .LBB84_16: # %cond.store13
9315 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
9316 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
9317 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
9318 ; RV32ZVE32F-NEXT: fsd fa7, 0(a0)
9319 ; RV32ZVE32F-NEXT: ret
9321 ; RV64ZVE32F-LABEL: mscatter_baseidx_v8i16_v8f64:
9322 ; RV64ZVE32F: # %bb.0:
9323 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
9324 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
9325 ; RV64ZVE32F-NEXT: andi a2, a1, 1
9326 ; RV64ZVE32F-NEXT: beqz a2, .LBB84_2
9327 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
9328 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
9329 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
9330 ; RV64ZVE32F-NEXT: slli a2, a2, 3
9331 ; RV64ZVE32F-NEXT: add a2, a0, a2
9332 ; RV64ZVE32F-NEXT: fsd fa0, 0(a2)
9333 ; RV64ZVE32F-NEXT: .LBB84_2: # %else
9334 ; RV64ZVE32F-NEXT: andi a2, a1, 2
9335 ; RV64ZVE32F-NEXT: beqz a2, .LBB84_4
9336 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
9337 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
9338 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
9339 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
9340 ; RV64ZVE32F-NEXT: slli a2, a2, 3
9341 ; RV64ZVE32F-NEXT: add a2, a0, a2
9342 ; RV64ZVE32F-NEXT: fsd fa1, 0(a2)
9343 ; RV64ZVE32F-NEXT: .LBB84_4: # %else2
9344 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
9345 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
9346 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
9347 ; RV64ZVE32F-NEXT: andi a2, a1, 4
9348 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
9349 ; RV64ZVE32F-NEXT: bnez a2, .LBB84_12
9350 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
9351 ; RV64ZVE32F-NEXT: andi a2, a1, 8
9352 ; RV64ZVE32F-NEXT: bnez a2, .LBB84_13
9353 ; RV64ZVE32F-NEXT: .LBB84_6: # %else6
9354 ; RV64ZVE32F-NEXT: andi a2, a1, 16
9355 ; RV64ZVE32F-NEXT: bnez a2, .LBB84_14
9356 ; RV64ZVE32F-NEXT: .LBB84_7: # %else8
9357 ; RV64ZVE32F-NEXT: andi a2, a1, 32
9358 ; RV64ZVE32F-NEXT: beqz a2, .LBB84_9
9359 ; RV64ZVE32F-NEXT: .LBB84_8: # %cond.store9
9360 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
9361 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
9362 ; RV64ZVE32F-NEXT: slli a2, a2, 3
9363 ; RV64ZVE32F-NEXT: add a2, a0, a2
9364 ; RV64ZVE32F-NEXT: fsd fa5, 0(a2)
9365 ; RV64ZVE32F-NEXT: .LBB84_9: # %else10
9366 ; RV64ZVE32F-NEXT: andi a2, a1, 64
9367 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
9368 ; RV64ZVE32F-NEXT: bnez a2, .LBB84_15
9369 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
9370 ; RV64ZVE32F-NEXT: andi a1, a1, -128
9371 ; RV64ZVE32F-NEXT: bnez a1, .LBB84_16
9372 ; RV64ZVE32F-NEXT: .LBB84_11: # %else14
9373 ; RV64ZVE32F-NEXT: ret
9374 ; RV64ZVE32F-NEXT: .LBB84_12: # %cond.store3
9375 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
9376 ; RV64ZVE32F-NEXT: slli a2, a2, 3
9377 ; RV64ZVE32F-NEXT: add a2, a0, a2
9378 ; RV64ZVE32F-NEXT: fsd fa2, 0(a2)
9379 ; RV64ZVE32F-NEXT: andi a2, a1, 8
9380 ; RV64ZVE32F-NEXT: beqz a2, .LBB84_6
9381 ; RV64ZVE32F-NEXT: .LBB84_13: # %cond.store5
9382 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
9383 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
9384 ; RV64ZVE32F-NEXT: slli a2, a2, 3
9385 ; RV64ZVE32F-NEXT: add a2, a0, a2
9386 ; RV64ZVE32F-NEXT: fsd fa3, 0(a2)
9387 ; RV64ZVE32F-NEXT: andi a2, a1, 16
9388 ; RV64ZVE32F-NEXT: beqz a2, .LBB84_7
9389 ; RV64ZVE32F-NEXT: .LBB84_14: # %cond.store7
9390 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
9391 ; RV64ZVE32F-NEXT: slli a2, a2, 3
9392 ; RV64ZVE32F-NEXT: add a2, a0, a2
9393 ; RV64ZVE32F-NEXT: fsd fa4, 0(a2)
9394 ; RV64ZVE32F-NEXT: andi a2, a1, 32
9395 ; RV64ZVE32F-NEXT: bnez a2, .LBB84_8
9396 ; RV64ZVE32F-NEXT: j .LBB84_9
9397 ; RV64ZVE32F-NEXT: .LBB84_15: # %cond.store11
9398 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
9399 ; RV64ZVE32F-NEXT: slli a2, a2, 3
9400 ; RV64ZVE32F-NEXT: add a2, a0, a2
9401 ; RV64ZVE32F-NEXT: fsd fa6, 0(a2)
9402 ; RV64ZVE32F-NEXT: andi a1, a1, -128
9403 ; RV64ZVE32F-NEXT: beqz a1, .LBB84_11
9404 ; RV64ZVE32F-NEXT: .LBB84_16: # %cond.store13
9405 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
9406 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8
9407 ; RV64ZVE32F-NEXT: slli a1, a1, 3
9408 ; RV64ZVE32F-NEXT: add a0, a0, a1
9409 ; RV64ZVE32F-NEXT: fsd fa7, 0(a0)
9410 ; RV64ZVE32F-NEXT: ret
9411 %ptrs = getelementptr inbounds double, ptr %base, <8 x i16> %idxs
9412 call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m)
9416 define void @mscatter_baseidx_sext_v8i16_v8f64(<8 x double> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m) {
9417 ; RV32V-LABEL: mscatter_baseidx_sext_v8i16_v8f64:
9419 ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
9420 ; RV32V-NEXT: vsext.vf2 v14, v12
9421 ; RV32V-NEXT: vsll.vi v12, v14, 3
9422 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
9423 ; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t
9426 ; RV64-LABEL: mscatter_baseidx_sext_v8i16_v8f64:
9428 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
9429 ; RV64-NEXT: vsext.vf4 v16, v12
9430 ; RV64-NEXT: vsll.vi v12, v16, 3
9431 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
9434 ; RV32ZVE32F-LABEL: mscatter_baseidx_sext_v8i16_v8f64:
9435 ; RV32ZVE32F: # %bb.0:
9436 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
9437 ; RV32ZVE32F-NEXT: vsext.vf2 v10, v8
9438 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3
9439 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0
9440 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
9441 ; RV32ZVE32F-NEXT: vmv.x.s a0, v0
9442 ; RV32ZVE32F-NEXT: andi a1, a0, 1
9443 ; RV32ZVE32F-NEXT: bnez a1, .LBB85_9
9444 ; RV32ZVE32F-NEXT: # %bb.1: # %else
9445 ; RV32ZVE32F-NEXT: andi a1, a0, 2
9446 ; RV32ZVE32F-NEXT: bnez a1, .LBB85_10
9447 ; RV32ZVE32F-NEXT: .LBB85_2: # %else2
9448 ; RV32ZVE32F-NEXT: andi a1, a0, 4
9449 ; RV32ZVE32F-NEXT: bnez a1, .LBB85_11
9450 ; RV32ZVE32F-NEXT: .LBB85_3: # %else4
9451 ; RV32ZVE32F-NEXT: andi a1, a0, 8
9452 ; RV32ZVE32F-NEXT: bnez a1, .LBB85_12
9453 ; RV32ZVE32F-NEXT: .LBB85_4: # %else6
9454 ; RV32ZVE32F-NEXT: andi a1, a0, 16
9455 ; RV32ZVE32F-NEXT: bnez a1, .LBB85_13
9456 ; RV32ZVE32F-NEXT: .LBB85_5: # %else8
9457 ; RV32ZVE32F-NEXT: andi a1, a0, 32
9458 ; RV32ZVE32F-NEXT: bnez a1, .LBB85_14
9459 ; RV32ZVE32F-NEXT: .LBB85_6: # %else10
9460 ; RV32ZVE32F-NEXT: andi a1, a0, 64
9461 ; RV32ZVE32F-NEXT: bnez a1, .LBB85_15
9462 ; RV32ZVE32F-NEXT: .LBB85_7: # %else12
9463 ; RV32ZVE32F-NEXT: andi a0, a0, -128
9464 ; RV32ZVE32F-NEXT: bnez a0, .LBB85_16
9465 ; RV32ZVE32F-NEXT: .LBB85_8: # %else14
9466 ; RV32ZVE32F-NEXT: ret
9467 ; RV32ZVE32F-NEXT: .LBB85_9: # %cond.store
9468 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
9469 ; RV32ZVE32F-NEXT: vmv.x.s a1, v8
9470 ; RV32ZVE32F-NEXT: fsd fa0, 0(a1)
9471 ; RV32ZVE32F-NEXT: andi a1, a0, 2
9472 ; RV32ZVE32F-NEXT: beqz a1, .LBB85_2
9473 ; RV32ZVE32F-NEXT: .LBB85_10: # %cond.store1
9474 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
9475 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
9476 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
9477 ; RV32ZVE32F-NEXT: fsd fa1, 0(a1)
9478 ; RV32ZVE32F-NEXT: andi a1, a0, 4
9479 ; RV32ZVE32F-NEXT: beqz a1, .LBB85_3
9480 ; RV32ZVE32F-NEXT: .LBB85_11: # %cond.store3
9481 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
9482 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
9483 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
9484 ; RV32ZVE32F-NEXT: fsd fa2, 0(a1)
9485 ; RV32ZVE32F-NEXT: andi a1, a0, 8
9486 ; RV32ZVE32F-NEXT: beqz a1, .LBB85_4
9487 ; RV32ZVE32F-NEXT: .LBB85_12: # %cond.store5
9488 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
9489 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
9490 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
9491 ; RV32ZVE32F-NEXT: fsd fa3, 0(a1)
9492 ; RV32ZVE32F-NEXT: andi a1, a0, 16
9493 ; RV32ZVE32F-NEXT: beqz a1, .LBB85_5
9494 ; RV32ZVE32F-NEXT: .LBB85_13: # %cond.store7
9495 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
9496 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
9497 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
9498 ; RV32ZVE32F-NEXT: fsd fa4, 0(a1)
9499 ; RV32ZVE32F-NEXT: andi a1, a0, 32
9500 ; RV32ZVE32F-NEXT: beqz a1, .LBB85_6
9501 ; RV32ZVE32F-NEXT: .LBB85_14: # %cond.store9
9502 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
9503 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
9504 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
9505 ; RV32ZVE32F-NEXT: fsd fa5, 0(a1)
9506 ; RV32ZVE32F-NEXT: andi a1, a0, 64
9507 ; RV32ZVE32F-NEXT: beqz a1, .LBB85_7
9508 ; RV32ZVE32F-NEXT: .LBB85_15: # %cond.store11
9509 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
9510 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
9511 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
9512 ; RV32ZVE32F-NEXT: fsd fa6, 0(a1)
9513 ; RV32ZVE32F-NEXT: andi a0, a0, -128
9514 ; RV32ZVE32F-NEXT: beqz a0, .LBB85_8
9515 ; RV32ZVE32F-NEXT: .LBB85_16: # %cond.store13
9516 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
9517 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
9518 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
9519 ; RV32ZVE32F-NEXT: fsd fa7, 0(a0)
9520 ; RV32ZVE32F-NEXT: ret
9522 ; RV64ZVE32F-LABEL: mscatter_baseidx_sext_v8i16_v8f64:
9523 ; RV64ZVE32F: # %bb.0:
9524 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
9525 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
9526 ; RV64ZVE32F-NEXT: andi a2, a1, 1
9527 ; RV64ZVE32F-NEXT: beqz a2, .LBB85_2
9528 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
9529 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
9530 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
9531 ; RV64ZVE32F-NEXT: slli a2, a2, 3
9532 ; RV64ZVE32F-NEXT: add a2, a0, a2
9533 ; RV64ZVE32F-NEXT: fsd fa0, 0(a2)
9534 ; RV64ZVE32F-NEXT: .LBB85_2: # %else
9535 ; RV64ZVE32F-NEXT: andi a2, a1, 2
9536 ; RV64ZVE32F-NEXT: beqz a2, .LBB85_4
9537 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
9538 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
9539 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
9540 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
9541 ; RV64ZVE32F-NEXT: slli a2, a2, 3
9542 ; RV64ZVE32F-NEXT: add a2, a0, a2
9543 ; RV64ZVE32F-NEXT: fsd fa1, 0(a2)
9544 ; RV64ZVE32F-NEXT: .LBB85_4: # %else2
9545 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
9546 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
9547 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
9548 ; RV64ZVE32F-NEXT: andi a2, a1, 4
9549 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
9550 ; RV64ZVE32F-NEXT: bnez a2, .LBB85_12
9551 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
9552 ; RV64ZVE32F-NEXT: andi a2, a1, 8
9553 ; RV64ZVE32F-NEXT: bnez a2, .LBB85_13
9554 ; RV64ZVE32F-NEXT: .LBB85_6: # %else6
9555 ; RV64ZVE32F-NEXT: andi a2, a1, 16
9556 ; RV64ZVE32F-NEXT: bnez a2, .LBB85_14
9557 ; RV64ZVE32F-NEXT: .LBB85_7: # %else8
9558 ; RV64ZVE32F-NEXT: andi a2, a1, 32
9559 ; RV64ZVE32F-NEXT: beqz a2, .LBB85_9
9560 ; RV64ZVE32F-NEXT: .LBB85_8: # %cond.store9
9561 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
9562 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
9563 ; RV64ZVE32F-NEXT: slli a2, a2, 3
9564 ; RV64ZVE32F-NEXT: add a2, a0, a2
9565 ; RV64ZVE32F-NEXT: fsd fa5, 0(a2)
9566 ; RV64ZVE32F-NEXT: .LBB85_9: # %else10
9567 ; RV64ZVE32F-NEXT: andi a2, a1, 64
9568 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
9569 ; RV64ZVE32F-NEXT: bnez a2, .LBB85_15
9570 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
9571 ; RV64ZVE32F-NEXT: andi a1, a1, -128
9572 ; RV64ZVE32F-NEXT: bnez a1, .LBB85_16
9573 ; RV64ZVE32F-NEXT: .LBB85_11: # %else14
9574 ; RV64ZVE32F-NEXT: ret
9575 ; RV64ZVE32F-NEXT: .LBB85_12: # %cond.store3
9576 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
9577 ; RV64ZVE32F-NEXT: slli a2, a2, 3
9578 ; RV64ZVE32F-NEXT: add a2, a0, a2
9579 ; RV64ZVE32F-NEXT: fsd fa2, 0(a2)
9580 ; RV64ZVE32F-NEXT: andi a2, a1, 8
9581 ; RV64ZVE32F-NEXT: beqz a2, .LBB85_6
9582 ; RV64ZVE32F-NEXT: .LBB85_13: # %cond.store5
9583 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
9584 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
9585 ; RV64ZVE32F-NEXT: slli a2, a2, 3
9586 ; RV64ZVE32F-NEXT: add a2, a0, a2
9587 ; RV64ZVE32F-NEXT: fsd fa3, 0(a2)
9588 ; RV64ZVE32F-NEXT: andi a2, a1, 16
9589 ; RV64ZVE32F-NEXT: beqz a2, .LBB85_7
9590 ; RV64ZVE32F-NEXT: .LBB85_14: # %cond.store7
9591 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
9592 ; RV64ZVE32F-NEXT: slli a2, a2, 3
9593 ; RV64ZVE32F-NEXT: add a2, a0, a2
9594 ; RV64ZVE32F-NEXT: fsd fa4, 0(a2)
9595 ; RV64ZVE32F-NEXT: andi a2, a1, 32
9596 ; RV64ZVE32F-NEXT: bnez a2, .LBB85_8
9597 ; RV64ZVE32F-NEXT: j .LBB85_9
9598 ; RV64ZVE32F-NEXT: .LBB85_15: # %cond.store11
9599 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
9600 ; RV64ZVE32F-NEXT: slli a2, a2, 3
9601 ; RV64ZVE32F-NEXT: add a2, a0, a2
9602 ; RV64ZVE32F-NEXT: fsd fa6, 0(a2)
9603 ; RV64ZVE32F-NEXT: andi a1, a1, -128
9604 ; RV64ZVE32F-NEXT: beqz a1, .LBB85_11
9605 ; RV64ZVE32F-NEXT: .LBB85_16: # %cond.store13
9606 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
9607 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8
9608 ; RV64ZVE32F-NEXT: slli a1, a1, 3
9609 ; RV64ZVE32F-NEXT: add a0, a0, a1
9610 ; RV64ZVE32F-NEXT: fsd fa7, 0(a0)
9611 ; RV64ZVE32F-NEXT: ret
9612 %eidxs = sext <8 x i16> %idxs to <8 x i64>
9613 %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
9614 call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m)
9618 define void @mscatter_baseidx_zext_v8i16_v8f64(<8 x double> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m) {
9619 ; RV32V-LABEL: mscatter_baseidx_zext_v8i16_v8f64:
9621 ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
9622 ; RV32V-NEXT: vzext.vf2 v14, v12
9623 ; RV32V-NEXT: vsll.vi v12, v14, 3
9624 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
9625 ; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t
9628 ; RV64-LABEL: mscatter_baseidx_zext_v8i16_v8f64:
9630 ; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma
9631 ; RV64-NEXT: vzext.vf2 v14, v12
9632 ; RV64-NEXT: vsll.vi v12, v14, 3
9633 ; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, ma
9634 ; RV64-NEXT: vsoxei32.v v8, (a0), v12, v0.t
9637 ; RV32ZVE32F-LABEL: mscatter_baseidx_zext_v8i16_v8f64:
9638 ; RV32ZVE32F: # %bb.0:
9639 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
9640 ; RV32ZVE32F-NEXT: vzext.vf2 v10, v8
9641 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3
9642 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0
9643 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
9644 ; RV32ZVE32F-NEXT: vmv.x.s a0, v0
9645 ; RV32ZVE32F-NEXT: andi a1, a0, 1
9646 ; RV32ZVE32F-NEXT: bnez a1, .LBB86_9
9647 ; RV32ZVE32F-NEXT: # %bb.1: # %else
9648 ; RV32ZVE32F-NEXT: andi a1, a0, 2
9649 ; RV32ZVE32F-NEXT: bnez a1, .LBB86_10
9650 ; RV32ZVE32F-NEXT: .LBB86_2: # %else2
9651 ; RV32ZVE32F-NEXT: andi a1, a0, 4
9652 ; RV32ZVE32F-NEXT: bnez a1, .LBB86_11
9653 ; RV32ZVE32F-NEXT: .LBB86_3: # %else4
9654 ; RV32ZVE32F-NEXT: andi a1, a0, 8
9655 ; RV32ZVE32F-NEXT: bnez a1, .LBB86_12
9656 ; RV32ZVE32F-NEXT: .LBB86_4: # %else6
9657 ; RV32ZVE32F-NEXT: andi a1, a0, 16
9658 ; RV32ZVE32F-NEXT: bnez a1, .LBB86_13
9659 ; RV32ZVE32F-NEXT: .LBB86_5: # %else8
9660 ; RV32ZVE32F-NEXT: andi a1, a0, 32
9661 ; RV32ZVE32F-NEXT: bnez a1, .LBB86_14
9662 ; RV32ZVE32F-NEXT: .LBB86_6: # %else10
9663 ; RV32ZVE32F-NEXT: andi a1, a0, 64
9664 ; RV32ZVE32F-NEXT: bnez a1, .LBB86_15
9665 ; RV32ZVE32F-NEXT: .LBB86_7: # %else12
9666 ; RV32ZVE32F-NEXT: andi a0, a0, -128
9667 ; RV32ZVE32F-NEXT: bnez a0, .LBB86_16
9668 ; RV32ZVE32F-NEXT: .LBB86_8: # %else14
9669 ; RV32ZVE32F-NEXT: ret
9670 ; RV32ZVE32F-NEXT: .LBB86_9: # %cond.store
9671 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
9672 ; RV32ZVE32F-NEXT: vmv.x.s a1, v8
9673 ; RV32ZVE32F-NEXT: fsd fa0, 0(a1)
9674 ; RV32ZVE32F-NEXT: andi a1, a0, 2
9675 ; RV32ZVE32F-NEXT: beqz a1, .LBB86_2
9676 ; RV32ZVE32F-NEXT: .LBB86_10: # %cond.store1
9677 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
9678 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
9679 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
9680 ; RV32ZVE32F-NEXT: fsd fa1, 0(a1)
9681 ; RV32ZVE32F-NEXT: andi a1, a0, 4
9682 ; RV32ZVE32F-NEXT: beqz a1, .LBB86_3
9683 ; RV32ZVE32F-NEXT: .LBB86_11: # %cond.store3
9684 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
9685 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
9686 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
9687 ; RV32ZVE32F-NEXT: fsd fa2, 0(a1)
9688 ; RV32ZVE32F-NEXT: andi a1, a0, 8
9689 ; RV32ZVE32F-NEXT: beqz a1, .LBB86_4
9690 ; RV32ZVE32F-NEXT: .LBB86_12: # %cond.store5
9691 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
9692 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
9693 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
9694 ; RV32ZVE32F-NEXT: fsd fa3, 0(a1)
9695 ; RV32ZVE32F-NEXT: andi a1, a0, 16
9696 ; RV32ZVE32F-NEXT: beqz a1, .LBB86_5
9697 ; RV32ZVE32F-NEXT: .LBB86_13: # %cond.store7
9698 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
9699 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
9700 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
9701 ; RV32ZVE32F-NEXT: fsd fa4, 0(a1)
9702 ; RV32ZVE32F-NEXT: andi a1, a0, 32
9703 ; RV32ZVE32F-NEXT: beqz a1, .LBB86_6
9704 ; RV32ZVE32F-NEXT: .LBB86_14: # %cond.store9
9705 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
9706 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
9707 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
9708 ; RV32ZVE32F-NEXT: fsd fa5, 0(a1)
9709 ; RV32ZVE32F-NEXT: andi a1, a0, 64
9710 ; RV32ZVE32F-NEXT: beqz a1, .LBB86_7
9711 ; RV32ZVE32F-NEXT: .LBB86_15: # %cond.store11
9712 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
9713 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
9714 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
9715 ; RV32ZVE32F-NEXT: fsd fa6, 0(a1)
9716 ; RV32ZVE32F-NEXT: andi a0, a0, -128
9717 ; RV32ZVE32F-NEXT: beqz a0, .LBB86_8
9718 ; RV32ZVE32F-NEXT: .LBB86_16: # %cond.store13
9719 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
9720 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
9721 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
9722 ; RV32ZVE32F-NEXT: fsd fa7, 0(a0)
9723 ; RV32ZVE32F-NEXT: ret
9725 ; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i16_v8f64:
9726 ; RV64ZVE32F: # %bb.0:
9727 ; RV64ZVE32F-NEXT: lui a1, 16
9728 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
9729 ; RV64ZVE32F-NEXT: vmv.x.s a2, v0
9730 ; RV64ZVE32F-NEXT: andi a3, a2, 1
9731 ; RV64ZVE32F-NEXT: addiw a1, a1, -1
9732 ; RV64ZVE32F-NEXT: beqz a3, .LBB86_2
9733 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
9734 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
9735 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
9736 ; RV64ZVE32F-NEXT: and a3, a3, a1
9737 ; RV64ZVE32F-NEXT: slli a3, a3, 3
9738 ; RV64ZVE32F-NEXT: add a3, a0, a3
9739 ; RV64ZVE32F-NEXT: fsd fa0, 0(a3)
9740 ; RV64ZVE32F-NEXT: .LBB86_2: # %else
9741 ; RV64ZVE32F-NEXT: andi a3, a2, 2
9742 ; RV64ZVE32F-NEXT: beqz a3, .LBB86_4
9743 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
9744 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
9745 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
9746 ; RV64ZVE32F-NEXT: vmv.x.s a3, v9
9747 ; RV64ZVE32F-NEXT: and a3, a3, a1
9748 ; RV64ZVE32F-NEXT: slli a3, a3, 3
9749 ; RV64ZVE32F-NEXT: add a3, a0, a3
9750 ; RV64ZVE32F-NEXT: fsd fa1, 0(a3)
9751 ; RV64ZVE32F-NEXT: .LBB86_4: # %else2
9752 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
9753 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
9754 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
9755 ; RV64ZVE32F-NEXT: andi a3, a2, 4
9756 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
9757 ; RV64ZVE32F-NEXT: bnez a3, .LBB86_12
9758 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
9759 ; RV64ZVE32F-NEXT: andi a3, a2, 8
9760 ; RV64ZVE32F-NEXT: bnez a3, .LBB86_13
9761 ; RV64ZVE32F-NEXT: .LBB86_6: # %else6
9762 ; RV64ZVE32F-NEXT: andi a3, a2, 16
9763 ; RV64ZVE32F-NEXT: bnez a3, .LBB86_14
9764 ; RV64ZVE32F-NEXT: .LBB86_7: # %else8
9765 ; RV64ZVE32F-NEXT: andi a3, a2, 32
9766 ; RV64ZVE32F-NEXT: beqz a3, .LBB86_9
9767 ; RV64ZVE32F-NEXT: .LBB86_8: # %cond.store9
9768 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
9769 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
9770 ; RV64ZVE32F-NEXT: and a3, a3, a1
9771 ; RV64ZVE32F-NEXT: slli a3, a3, 3
9772 ; RV64ZVE32F-NEXT: add a3, a0, a3
9773 ; RV64ZVE32F-NEXT: fsd fa5, 0(a3)
9774 ; RV64ZVE32F-NEXT: .LBB86_9: # %else10
9775 ; RV64ZVE32F-NEXT: andi a3, a2, 64
9776 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
9777 ; RV64ZVE32F-NEXT: bnez a3, .LBB86_15
9778 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
9779 ; RV64ZVE32F-NEXT: andi a2, a2, -128
9780 ; RV64ZVE32F-NEXT: bnez a2, .LBB86_16
9781 ; RV64ZVE32F-NEXT: .LBB86_11: # %else14
9782 ; RV64ZVE32F-NEXT: ret
9783 ; RV64ZVE32F-NEXT: .LBB86_12: # %cond.store3
9784 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
9785 ; RV64ZVE32F-NEXT: and a3, a3, a1
9786 ; RV64ZVE32F-NEXT: slli a3, a3, 3
9787 ; RV64ZVE32F-NEXT: add a3, a0, a3
9788 ; RV64ZVE32F-NEXT: fsd fa2, 0(a3)
9789 ; RV64ZVE32F-NEXT: andi a3, a2, 8
9790 ; RV64ZVE32F-NEXT: beqz a3, .LBB86_6
9791 ; RV64ZVE32F-NEXT: .LBB86_13: # %cond.store5
9792 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
9793 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
9794 ; RV64ZVE32F-NEXT: and a3, a3, a1
9795 ; RV64ZVE32F-NEXT: slli a3, a3, 3
9796 ; RV64ZVE32F-NEXT: add a3, a0, a3
9797 ; RV64ZVE32F-NEXT: fsd fa3, 0(a3)
9798 ; RV64ZVE32F-NEXT: andi a3, a2, 16
9799 ; RV64ZVE32F-NEXT: beqz a3, .LBB86_7
9800 ; RV64ZVE32F-NEXT: .LBB86_14: # %cond.store7
9801 ; RV64ZVE32F-NEXT: vmv.x.s a3, v9
9802 ; RV64ZVE32F-NEXT: and a3, a3, a1
9803 ; RV64ZVE32F-NEXT: slli a3, a3, 3
9804 ; RV64ZVE32F-NEXT: add a3, a0, a3
9805 ; RV64ZVE32F-NEXT: fsd fa4, 0(a3)
9806 ; RV64ZVE32F-NEXT: andi a3, a2, 32
9807 ; RV64ZVE32F-NEXT: bnez a3, .LBB86_8
9808 ; RV64ZVE32F-NEXT: j .LBB86_9
9809 ; RV64ZVE32F-NEXT: .LBB86_15: # %cond.store11
9810 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
9811 ; RV64ZVE32F-NEXT: and a3, a3, a1
9812 ; RV64ZVE32F-NEXT: slli a3, a3, 3
9813 ; RV64ZVE32F-NEXT: add a3, a0, a3
9814 ; RV64ZVE32F-NEXT: fsd fa6, 0(a3)
9815 ; RV64ZVE32F-NEXT: andi a2, a2, -128
9816 ; RV64ZVE32F-NEXT: beqz a2, .LBB86_11
9817 ; RV64ZVE32F-NEXT: .LBB86_16: # %cond.store13
9818 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
9819 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
9820 ; RV64ZVE32F-NEXT: and a1, a2, a1
9821 ; RV64ZVE32F-NEXT: slli a1, a1, 3
9822 ; RV64ZVE32F-NEXT: add a0, a0, a1
9823 ; RV64ZVE32F-NEXT: fsd fa7, 0(a0)
9824 ; RV64ZVE32F-NEXT: ret
9825 %eidxs = zext <8 x i16> %idxs to <8 x i64>
9826 %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
9827 call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m)
9831 define void @mscatter_baseidx_v8i32_v8f64(<8 x double> %val, ptr %base, <8 x i32> %idxs, <8 x i1> %m) {
9832 ; RV32V-LABEL: mscatter_baseidx_v8i32_v8f64:
9834 ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
9835 ; RV32V-NEXT: vsll.vi v12, v12, 3
9836 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
9837 ; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t
9840 ; RV64-LABEL: mscatter_baseidx_v8i32_v8f64:
9842 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
9843 ; RV64-NEXT: vsext.vf2 v16, v12
9844 ; RV64-NEXT: vsll.vi v12, v16, 3
9845 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
9848 ; RV32ZVE32F-LABEL: mscatter_baseidx_v8i32_v8f64:
9849 ; RV32ZVE32F: # %bb.0:
9850 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
9851 ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3
9852 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0
9853 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
9854 ; RV32ZVE32F-NEXT: vmv.x.s a0, v0
9855 ; RV32ZVE32F-NEXT: andi a1, a0, 1
9856 ; RV32ZVE32F-NEXT: bnez a1, .LBB87_9
9857 ; RV32ZVE32F-NEXT: # %bb.1: # %else
9858 ; RV32ZVE32F-NEXT: andi a1, a0, 2
9859 ; RV32ZVE32F-NEXT: bnez a1, .LBB87_10
9860 ; RV32ZVE32F-NEXT: .LBB87_2: # %else2
9861 ; RV32ZVE32F-NEXT: andi a1, a0, 4
9862 ; RV32ZVE32F-NEXT: bnez a1, .LBB87_11
9863 ; RV32ZVE32F-NEXT: .LBB87_3: # %else4
9864 ; RV32ZVE32F-NEXT: andi a1, a0, 8
9865 ; RV32ZVE32F-NEXT: bnez a1, .LBB87_12
9866 ; RV32ZVE32F-NEXT: .LBB87_4: # %else6
9867 ; RV32ZVE32F-NEXT: andi a1, a0, 16
9868 ; RV32ZVE32F-NEXT: bnez a1, .LBB87_13
9869 ; RV32ZVE32F-NEXT: .LBB87_5: # %else8
9870 ; RV32ZVE32F-NEXT: andi a1, a0, 32
9871 ; RV32ZVE32F-NEXT: bnez a1, .LBB87_14
9872 ; RV32ZVE32F-NEXT: .LBB87_6: # %else10
9873 ; RV32ZVE32F-NEXT: andi a1, a0, 64
9874 ; RV32ZVE32F-NEXT: bnez a1, .LBB87_15
9875 ; RV32ZVE32F-NEXT: .LBB87_7: # %else12
9876 ; RV32ZVE32F-NEXT: andi a0, a0, -128
9877 ; RV32ZVE32F-NEXT: bnez a0, .LBB87_16
9878 ; RV32ZVE32F-NEXT: .LBB87_8: # %else14
9879 ; RV32ZVE32F-NEXT: ret
9880 ; RV32ZVE32F-NEXT: .LBB87_9: # %cond.store
9881 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
9882 ; RV32ZVE32F-NEXT: vmv.x.s a1, v8
9883 ; RV32ZVE32F-NEXT: fsd fa0, 0(a1)
9884 ; RV32ZVE32F-NEXT: andi a1, a0, 2
9885 ; RV32ZVE32F-NEXT: beqz a1, .LBB87_2
9886 ; RV32ZVE32F-NEXT: .LBB87_10: # %cond.store1
9887 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
9888 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
9889 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
9890 ; RV32ZVE32F-NEXT: fsd fa1, 0(a1)
9891 ; RV32ZVE32F-NEXT: andi a1, a0, 4
9892 ; RV32ZVE32F-NEXT: beqz a1, .LBB87_3
9893 ; RV32ZVE32F-NEXT: .LBB87_11: # %cond.store3
9894 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
9895 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
9896 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
9897 ; RV32ZVE32F-NEXT: fsd fa2, 0(a1)
9898 ; RV32ZVE32F-NEXT: andi a1, a0, 8
9899 ; RV32ZVE32F-NEXT: beqz a1, .LBB87_4
9900 ; RV32ZVE32F-NEXT: .LBB87_12: # %cond.store5
9901 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
9902 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
9903 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
9904 ; RV32ZVE32F-NEXT: fsd fa3, 0(a1)
9905 ; RV32ZVE32F-NEXT: andi a1, a0, 16
9906 ; RV32ZVE32F-NEXT: beqz a1, .LBB87_5
9907 ; RV32ZVE32F-NEXT: .LBB87_13: # %cond.store7
9908 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
9909 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
9910 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
9911 ; RV32ZVE32F-NEXT: fsd fa4, 0(a1)
9912 ; RV32ZVE32F-NEXT: andi a1, a0, 32
9913 ; RV32ZVE32F-NEXT: beqz a1, .LBB87_6
9914 ; RV32ZVE32F-NEXT: .LBB87_14: # %cond.store9
9915 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
9916 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
9917 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
9918 ; RV32ZVE32F-NEXT: fsd fa5, 0(a1)
9919 ; RV32ZVE32F-NEXT: andi a1, a0, 64
9920 ; RV32ZVE32F-NEXT: beqz a1, .LBB87_7
9921 ; RV32ZVE32F-NEXT: .LBB87_15: # %cond.store11
9922 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
9923 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
9924 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
9925 ; RV32ZVE32F-NEXT: fsd fa6, 0(a1)
9926 ; RV32ZVE32F-NEXT: andi a0, a0, -128
9927 ; RV32ZVE32F-NEXT: beqz a0, .LBB87_8
9928 ; RV32ZVE32F-NEXT: .LBB87_16: # %cond.store13
9929 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
9930 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
9931 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
9932 ; RV32ZVE32F-NEXT: fsd fa7, 0(a0)
9933 ; RV32ZVE32F-NEXT: ret
9935 ; RV64ZVE32F-LABEL: mscatter_baseidx_v8i32_v8f64:
9936 ; RV64ZVE32F: # %bb.0:
9937 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
9938 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
9939 ; RV64ZVE32F-NEXT: andi a2, a1, 1
9940 ; RV64ZVE32F-NEXT: beqz a2, .LBB87_2
9941 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
9942 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
9943 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
9944 ; RV64ZVE32F-NEXT: slli a2, a2, 3
9945 ; RV64ZVE32F-NEXT: add a2, a0, a2
9946 ; RV64ZVE32F-NEXT: fsd fa0, 0(a2)
9947 ; RV64ZVE32F-NEXT: .LBB87_2: # %else
9948 ; RV64ZVE32F-NEXT: andi a2, a1, 2
9949 ; RV64ZVE32F-NEXT: beqz a2, .LBB87_4
9950 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
9951 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
9952 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
9953 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
9954 ; RV64ZVE32F-NEXT: slli a2, a2, 3
9955 ; RV64ZVE32F-NEXT: add a2, a0, a2
9956 ; RV64ZVE32F-NEXT: fsd fa1, 0(a2)
9957 ; RV64ZVE32F-NEXT: .LBB87_4: # %else2
9958 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
9959 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
9960 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
9961 ; RV64ZVE32F-NEXT: andi a2, a1, 4
9962 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
9963 ; RV64ZVE32F-NEXT: bnez a2, .LBB87_12
9964 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
9965 ; RV64ZVE32F-NEXT: andi a2, a1, 8
9966 ; RV64ZVE32F-NEXT: bnez a2, .LBB87_13
9967 ; RV64ZVE32F-NEXT: .LBB87_6: # %else6
9968 ; RV64ZVE32F-NEXT: andi a2, a1, 16
9969 ; RV64ZVE32F-NEXT: bnez a2, .LBB87_14
9970 ; RV64ZVE32F-NEXT: .LBB87_7: # %else8
9971 ; RV64ZVE32F-NEXT: andi a2, a1, 32
9972 ; RV64ZVE32F-NEXT: beqz a2, .LBB87_9
9973 ; RV64ZVE32F-NEXT: .LBB87_8: # %cond.store9
9974 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
9975 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
9976 ; RV64ZVE32F-NEXT: slli a2, a2, 3
9977 ; RV64ZVE32F-NEXT: add a2, a0, a2
9978 ; RV64ZVE32F-NEXT: fsd fa5, 0(a2)
9979 ; RV64ZVE32F-NEXT: .LBB87_9: # %else10
9980 ; RV64ZVE32F-NEXT: andi a2, a1, 64
9981 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
9982 ; RV64ZVE32F-NEXT: bnez a2, .LBB87_15
9983 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
9984 ; RV64ZVE32F-NEXT: andi a1, a1, -128
9985 ; RV64ZVE32F-NEXT: bnez a1, .LBB87_16
9986 ; RV64ZVE32F-NEXT: .LBB87_11: # %else14
9987 ; RV64ZVE32F-NEXT: ret
9988 ; RV64ZVE32F-NEXT: .LBB87_12: # %cond.store3
9989 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
9990 ; RV64ZVE32F-NEXT: slli a2, a2, 3
9991 ; RV64ZVE32F-NEXT: add a2, a0, a2
9992 ; RV64ZVE32F-NEXT: fsd fa2, 0(a2)
9993 ; RV64ZVE32F-NEXT: andi a2, a1, 8
9994 ; RV64ZVE32F-NEXT: beqz a2, .LBB87_6
9995 ; RV64ZVE32F-NEXT: .LBB87_13: # %cond.store5
9996 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
9997 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
9998 ; RV64ZVE32F-NEXT: slli a2, a2, 3
9999 ; RV64ZVE32F-NEXT: add a2, a0, a2
10000 ; RV64ZVE32F-NEXT: fsd fa3, 0(a2)
10001 ; RV64ZVE32F-NEXT: andi a2, a1, 16
10002 ; RV64ZVE32F-NEXT: beqz a2, .LBB87_7
10003 ; RV64ZVE32F-NEXT: .LBB87_14: # %cond.store7
10004 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
10005 ; RV64ZVE32F-NEXT: slli a2, a2, 3
10006 ; RV64ZVE32F-NEXT: add a2, a0, a2
10007 ; RV64ZVE32F-NEXT: fsd fa4, 0(a2)
10008 ; RV64ZVE32F-NEXT: andi a2, a1, 32
10009 ; RV64ZVE32F-NEXT: bnez a2, .LBB87_8
10010 ; RV64ZVE32F-NEXT: j .LBB87_9
10011 ; RV64ZVE32F-NEXT: .LBB87_15: # %cond.store11
10012 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
10013 ; RV64ZVE32F-NEXT: slli a2, a2, 3
10014 ; RV64ZVE32F-NEXT: add a2, a0, a2
10015 ; RV64ZVE32F-NEXT: fsd fa6, 0(a2)
10016 ; RV64ZVE32F-NEXT: andi a1, a1, -128
10017 ; RV64ZVE32F-NEXT: beqz a1, .LBB87_11
10018 ; RV64ZVE32F-NEXT: .LBB87_16: # %cond.store13
10019 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
10020 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8
10021 ; RV64ZVE32F-NEXT: slli a1, a1, 3
10022 ; RV64ZVE32F-NEXT: add a0, a0, a1
10023 ; RV64ZVE32F-NEXT: fsd fa7, 0(a0)
10024 ; RV64ZVE32F-NEXT: ret
10025 %ptrs = getelementptr inbounds double, ptr %base, <8 x i32> %idxs
10026 call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m)
10030 define void @mscatter_baseidx_sext_v8i32_v8f64(<8 x double> %val, ptr %base, <8 x i32> %idxs, <8 x i1> %m) {
10031 ; RV32V-LABEL: mscatter_baseidx_sext_v8i32_v8f64:
10033 ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
10034 ; RV32V-NEXT: vsll.vi v12, v12, 3
10035 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
10036 ; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t
10039 ; RV64-LABEL: mscatter_baseidx_sext_v8i32_v8f64:
10041 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
10042 ; RV64-NEXT: vsext.vf2 v16, v12
10043 ; RV64-NEXT: vsll.vi v12, v16, 3
10044 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
10047 ; RV32ZVE32F-LABEL: mscatter_baseidx_sext_v8i32_v8f64:
10048 ; RV32ZVE32F: # %bb.0:
10049 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
10050 ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3
10051 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0
10052 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
10053 ; RV32ZVE32F-NEXT: vmv.x.s a0, v0
10054 ; RV32ZVE32F-NEXT: andi a1, a0, 1
10055 ; RV32ZVE32F-NEXT: bnez a1, .LBB88_9
10056 ; RV32ZVE32F-NEXT: # %bb.1: # %else
10057 ; RV32ZVE32F-NEXT: andi a1, a0, 2
10058 ; RV32ZVE32F-NEXT: bnez a1, .LBB88_10
10059 ; RV32ZVE32F-NEXT: .LBB88_2: # %else2
10060 ; RV32ZVE32F-NEXT: andi a1, a0, 4
10061 ; RV32ZVE32F-NEXT: bnez a1, .LBB88_11
10062 ; RV32ZVE32F-NEXT: .LBB88_3: # %else4
10063 ; RV32ZVE32F-NEXT: andi a1, a0, 8
10064 ; RV32ZVE32F-NEXT: bnez a1, .LBB88_12
10065 ; RV32ZVE32F-NEXT: .LBB88_4: # %else6
10066 ; RV32ZVE32F-NEXT: andi a1, a0, 16
10067 ; RV32ZVE32F-NEXT: bnez a1, .LBB88_13
10068 ; RV32ZVE32F-NEXT: .LBB88_5: # %else8
10069 ; RV32ZVE32F-NEXT: andi a1, a0, 32
10070 ; RV32ZVE32F-NEXT: bnez a1, .LBB88_14
10071 ; RV32ZVE32F-NEXT: .LBB88_6: # %else10
10072 ; RV32ZVE32F-NEXT: andi a1, a0, 64
10073 ; RV32ZVE32F-NEXT: bnez a1, .LBB88_15
10074 ; RV32ZVE32F-NEXT: .LBB88_7: # %else12
10075 ; RV32ZVE32F-NEXT: andi a0, a0, -128
10076 ; RV32ZVE32F-NEXT: bnez a0, .LBB88_16
10077 ; RV32ZVE32F-NEXT: .LBB88_8: # %else14
10078 ; RV32ZVE32F-NEXT: ret
10079 ; RV32ZVE32F-NEXT: .LBB88_9: # %cond.store
10080 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
10081 ; RV32ZVE32F-NEXT: vmv.x.s a1, v8
10082 ; RV32ZVE32F-NEXT: fsd fa0, 0(a1)
10083 ; RV32ZVE32F-NEXT: andi a1, a0, 2
10084 ; RV32ZVE32F-NEXT: beqz a1, .LBB88_2
10085 ; RV32ZVE32F-NEXT: .LBB88_10: # %cond.store1
10086 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
10087 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
10088 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
10089 ; RV32ZVE32F-NEXT: fsd fa1, 0(a1)
10090 ; RV32ZVE32F-NEXT: andi a1, a0, 4
10091 ; RV32ZVE32F-NEXT: beqz a1, .LBB88_3
10092 ; RV32ZVE32F-NEXT: .LBB88_11: # %cond.store3
10093 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
10094 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
10095 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
10096 ; RV32ZVE32F-NEXT: fsd fa2, 0(a1)
10097 ; RV32ZVE32F-NEXT: andi a1, a0, 8
10098 ; RV32ZVE32F-NEXT: beqz a1, .LBB88_4
10099 ; RV32ZVE32F-NEXT: .LBB88_12: # %cond.store5
10100 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
10101 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
10102 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
10103 ; RV32ZVE32F-NEXT: fsd fa3, 0(a1)
10104 ; RV32ZVE32F-NEXT: andi a1, a0, 16
10105 ; RV32ZVE32F-NEXT: beqz a1, .LBB88_5
10106 ; RV32ZVE32F-NEXT: .LBB88_13: # %cond.store7
10107 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
10108 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
10109 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
10110 ; RV32ZVE32F-NEXT: fsd fa4, 0(a1)
10111 ; RV32ZVE32F-NEXT: andi a1, a0, 32
10112 ; RV32ZVE32F-NEXT: beqz a1, .LBB88_6
10113 ; RV32ZVE32F-NEXT: .LBB88_14: # %cond.store9
10114 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
10115 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
10116 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
10117 ; RV32ZVE32F-NEXT: fsd fa5, 0(a1)
10118 ; RV32ZVE32F-NEXT: andi a1, a0, 64
10119 ; RV32ZVE32F-NEXT: beqz a1, .LBB88_7
10120 ; RV32ZVE32F-NEXT: .LBB88_15: # %cond.store11
10121 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
10122 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
10123 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
10124 ; RV32ZVE32F-NEXT: fsd fa6, 0(a1)
10125 ; RV32ZVE32F-NEXT: andi a0, a0, -128
10126 ; RV32ZVE32F-NEXT: beqz a0, .LBB88_8
10127 ; RV32ZVE32F-NEXT: .LBB88_16: # %cond.store13
10128 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
10129 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
10130 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
10131 ; RV32ZVE32F-NEXT: fsd fa7, 0(a0)
10132 ; RV32ZVE32F-NEXT: ret
10134 ; RV64ZVE32F-LABEL: mscatter_baseidx_sext_v8i32_v8f64:
10135 ; RV64ZVE32F: # %bb.0:
10136 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
10137 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
10138 ; RV64ZVE32F-NEXT: andi a2, a1, 1
10139 ; RV64ZVE32F-NEXT: beqz a2, .LBB88_2
10140 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
10141 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
10142 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
10143 ; RV64ZVE32F-NEXT: slli a2, a2, 3
10144 ; RV64ZVE32F-NEXT: add a2, a0, a2
10145 ; RV64ZVE32F-NEXT: fsd fa0, 0(a2)
10146 ; RV64ZVE32F-NEXT: .LBB88_2: # %else
10147 ; RV64ZVE32F-NEXT: andi a2, a1, 2
10148 ; RV64ZVE32F-NEXT: beqz a2, .LBB88_4
10149 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
10150 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
10151 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
10152 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
10153 ; RV64ZVE32F-NEXT: slli a2, a2, 3
10154 ; RV64ZVE32F-NEXT: add a2, a0, a2
10155 ; RV64ZVE32F-NEXT: fsd fa1, 0(a2)
10156 ; RV64ZVE32F-NEXT: .LBB88_4: # %else2
10157 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
10158 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
10159 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
10160 ; RV64ZVE32F-NEXT: andi a2, a1, 4
10161 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
10162 ; RV64ZVE32F-NEXT: bnez a2, .LBB88_12
10163 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
10164 ; RV64ZVE32F-NEXT: andi a2, a1, 8
10165 ; RV64ZVE32F-NEXT: bnez a2, .LBB88_13
10166 ; RV64ZVE32F-NEXT: .LBB88_6: # %else6
10167 ; RV64ZVE32F-NEXT: andi a2, a1, 16
10168 ; RV64ZVE32F-NEXT: bnez a2, .LBB88_14
10169 ; RV64ZVE32F-NEXT: .LBB88_7: # %else8
10170 ; RV64ZVE32F-NEXT: andi a2, a1, 32
10171 ; RV64ZVE32F-NEXT: beqz a2, .LBB88_9
10172 ; RV64ZVE32F-NEXT: .LBB88_8: # %cond.store9
10173 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
10174 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
10175 ; RV64ZVE32F-NEXT: slli a2, a2, 3
10176 ; RV64ZVE32F-NEXT: add a2, a0, a2
10177 ; RV64ZVE32F-NEXT: fsd fa5, 0(a2)
10178 ; RV64ZVE32F-NEXT: .LBB88_9: # %else10
10179 ; RV64ZVE32F-NEXT: andi a2, a1, 64
10180 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
10181 ; RV64ZVE32F-NEXT: bnez a2, .LBB88_15
10182 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
10183 ; RV64ZVE32F-NEXT: andi a1, a1, -128
10184 ; RV64ZVE32F-NEXT: bnez a1, .LBB88_16
10185 ; RV64ZVE32F-NEXT: .LBB88_11: # %else14
10186 ; RV64ZVE32F-NEXT: ret
10187 ; RV64ZVE32F-NEXT: .LBB88_12: # %cond.store3
10188 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
10189 ; RV64ZVE32F-NEXT: slli a2, a2, 3
10190 ; RV64ZVE32F-NEXT: add a2, a0, a2
10191 ; RV64ZVE32F-NEXT: fsd fa2, 0(a2)
10192 ; RV64ZVE32F-NEXT: andi a2, a1, 8
10193 ; RV64ZVE32F-NEXT: beqz a2, .LBB88_6
10194 ; RV64ZVE32F-NEXT: .LBB88_13: # %cond.store5
10195 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
10196 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
10197 ; RV64ZVE32F-NEXT: slli a2, a2, 3
10198 ; RV64ZVE32F-NEXT: add a2, a0, a2
10199 ; RV64ZVE32F-NEXT: fsd fa3, 0(a2)
10200 ; RV64ZVE32F-NEXT: andi a2, a1, 16
10201 ; RV64ZVE32F-NEXT: beqz a2, .LBB88_7
10202 ; RV64ZVE32F-NEXT: .LBB88_14: # %cond.store7
10203 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
10204 ; RV64ZVE32F-NEXT: slli a2, a2, 3
10205 ; RV64ZVE32F-NEXT: add a2, a0, a2
10206 ; RV64ZVE32F-NEXT: fsd fa4, 0(a2)
10207 ; RV64ZVE32F-NEXT: andi a2, a1, 32
10208 ; RV64ZVE32F-NEXT: bnez a2, .LBB88_8
10209 ; RV64ZVE32F-NEXT: j .LBB88_9
10210 ; RV64ZVE32F-NEXT: .LBB88_15: # %cond.store11
10211 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
10212 ; RV64ZVE32F-NEXT: slli a2, a2, 3
10213 ; RV64ZVE32F-NEXT: add a2, a0, a2
10214 ; RV64ZVE32F-NEXT: fsd fa6, 0(a2)
10215 ; RV64ZVE32F-NEXT: andi a1, a1, -128
10216 ; RV64ZVE32F-NEXT: beqz a1, .LBB88_11
10217 ; RV64ZVE32F-NEXT: .LBB88_16: # %cond.store13
10218 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
10219 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8
10220 ; RV64ZVE32F-NEXT: slli a1, a1, 3
10221 ; RV64ZVE32F-NEXT: add a0, a0, a1
10222 ; RV64ZVE32F-NEXT: fsd fa7, 0(a0)
10223 ; RV64ZVE32F-NEXT: ret
10224 %eidxs = sext <8 x i32> %idxs to <8 x i64>
10225 %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
10226 call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m)
10230 define void @mscatter_baseidx_zext_v8i32_v8f64(<8 x double> %val, ptr %base, <8 x i32> %idxs, <8 x i1> %m) {
10231 ; RV32V-LABEL: mscatter_baseidx_zext_v8i32_v8f64:
10233 ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
10234 ; RV32V-NEXT: vsll.vi v12, v12, 3
10235 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
10236 ; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t
10239 ; RV64-LABEL: mscatter_baseidx_zext_v8i32_v8f64:
10241 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
10242 ; RV64-NEXT: vzext.vf2 v16, v12
10243 ; RV64-NEXT: vsll.vi v12, v16, 3
10244 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
10247 ; RV32ZVE32F-LABEL: mscatter_baseidx_zext_v8i32_v8f64:
10248 ; RV32ZVE32F: # %bb.0:
10249 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
10250 ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3
10251 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0
10252 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
10253 ; RV32ZVE32F-NEXT: vmv.x.s a0, v0
10254 ; RV32ZVE32F-NEXT: andi a1, a0, 1
10255 ; RV32ZVE32F-NEXT: bnez a1, .LBB89_9
10256 ; RV32ZVE32F-NEXT: # %bb.1: # %else
10257 ; RV32ZVE32F-NEXT: andi a1, a0, 2
10258 ; RV32ZVE32F-NEXT: bnez a1, .LBB89_10
10259 ; RV32ZVE32F-NEXT: .LBB89_2: # %else2
10260 ; RV32ZVE32F-NEXT: andi a1, a0, 4
10261 ; RV32ZVE32F-NEXT: bnez a1, .LBB89_11
10262 ; RV32ZVE32F-NEXT: .LBB89_3: # %else4
10263 ; RV32ZVE32F-NEXT: andi a1, a0, 8
10264 ; RV32ZVE32F-NEXT: bnez a1, .LBB89_12
10265 ; RV32ZVE32F-NEXT: .LBB89_4: # %else6
10266 ; RV32ZVE32F-NEXT: andi a1, a0, 16
10267 ; RV32ZVE32F-NEXT: bnez a1, .LBB89_13
10268 ; RV32ZVE32F-NEXT: .LBB89_5: # %else8
10269 ; RV32ZVE32F-NEXT: andi a1, a0, 32
10270 ; RV32ZVE32F-NEXT: bnez a1, .LBB89_14
10271 ; RV32ZVE32F-NEXT: .LBB89_6: # %else10
10272 ; RV32ZVE32F-NEXT: andi a1, a0, 64
10273 ; RV32ZVE32F-NEXT: bnez a1, .LBB89_15
10274 ; RV32ZVE32F-NEXT: .LBB89_7: # %else12
10275 ; RV32ZVE32F-NEXT: andi a0, a0, -128
10276 ; RV32ZVE32F-NEXT: bnez a0, .LBB89_16
10277 ; RV32ZVE32F-NEXT: .LBB89_8: # %else14
10278 ; RV32ZVE32F-NEXT: ret
10279 ; RV32ZVE32F-NEXT: .LBB89_9: # %cond.store
10280 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
10281 ; RV32ZVE32F-NEXT: vmv.x.s a1, v8
10282 ; RV32ZVE32F-NEXT: fsd fa0, 0(a1)
10283 ; RV32ZVE32F-NEXT: andi a1, a0, 2
10284 ; RV32ZVE32F-NEXT: beqz a1, .LBB89_2
10285 ; RV32ZVE32F-NEXT: .LBB89_10: # %cond.store1
10286 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
10287 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
10288 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
10289 ; RV32ZVE32F-NEXT: fsd fa1, 0(a1)
10290 ; RV32ZVE32F-NEXT: andi a1, a0, 4
10291 ; RV32ZVE32F-NEXT: beqz a1, .LBB89_3
10292 ; RV32ZVE32F-NEXT: .LBB89_11: # %cond.store3
10293 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
10294 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
10295 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
10296 ; RV32ZVE32F-NEXT: fsd fa2, 0(a1)
10297 ; RV32ZVE32F-NEXT: andi a1, a0, 8
10298 ; RV32ZVE32F-NEXT: beqz a1, .LBB89_4
10299 ; RV32ZVE32F-NEXT: .LBB89_12: # %cond.store5
10300 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
10301 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
10302 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
10303 ; RV32ZVE32F-NEXT: fsd fa3, 0(a1)
10304 ; RV32ZVE32F-NEXT: andi a1, a0, 16
10305 ; RV32ZVE32F-NEXT: beqz a1, .LBB89_5
10306 ; RV32ZVE32F-NEXT: .LBB89_13: # %cond.store7
10307 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
10308 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
10309 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
10310 ; RV32ZVE32F-NEXT: fsd fa4, 0(a1)
10311 ; RV32ZVE32F-NEXT: andi a1, a0, 32
10312 ; RV32ZVE32F-NEXT: beqz a1, .LBB89_6
10313 ; RV32ZVE32F-NEXT: .LBB89_14: # %cond.store9
10314 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
10315 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
10316 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
10317 ; RV32ZVE32F-NEXT: fsd fa5, 0(a1)
10318 ; RV32ZVE32F-NEXT: andi a1, a0, 64
10319 ; RV32ZVE32F-NEXT: beqz a1, .LBB89_7
10320 ; RV32ZVE32F-NEXT: .LBB89_15: # %cond.store11
10321 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
10322 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
10323 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
10324 ; RV32ZVE32F-NEXT: fsd fa6, 0(a1)
10325 ; RV32ZVE32F-NEXT: andi a0, a0, -128
10326 ; RV32ZVE32F-NEXT: beqz a0, .LBB89_8
10327 ; RV32ZVE32F-NEXT: .LBB89_16: # %cond.store13
10328 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
10329 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
10330 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
10331 ; RV32ZVE32F-NEXT: fsd fa7, 0(a0)
10332 ; RV32ZVE32F-NEXT: ret
10334 ; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i32_v8f64:
10335 ; RV64ZVE32F: # %bb.0:
10336 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
10337 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
10338 ; RV64ZVE32F-NEXT: andi a2, a1, 1
10339 ; RV64ZVE32F-NEXT: beqz a2, .LBB89_2
10340 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
10341 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
10342 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
10343 ; RV64ZVE32F-NEXT: slli a2, a2, 32
10344 ; RV64ZVE32F-NEXT: srli a2, a2, 29
10345 ; RV64ZVE32F-NEXT: add a2, a0, a2
10346 ; RV64ZVE32F-NEXT: fsd fa0, 0(a2)
10347 ; RV64ZVE32F-NEXT: .LBB89_2: # %else
10348 ; RV64ZVE32F-NEXT: andi a2, a1, 2
10349 ; RV64ZVE32F-NEXT: beqz a2, .LBB89_4
10350 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
10351 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
10352 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
10353 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
10354 ; RV64ZVE32F-NEXT: slli a2, a2, 32
10355 ; RV64ZVE32F-NEXT: srli a2, a2, 29
10356 ; RV64ZVE32F-NEXT: add a2, a0, a2
10357 ; RV64ZVE32F-NEXT: fsd fa1, 0(a2)
10358 ; RV64ZVE32F-NEXT: .LBB89_4: # %else2
10359 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
10360 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
10361 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
10362 ; RV64ZVE32F-NEXT: andi a2, a1, 4
10363 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
10364 ; RV64ZVE32F-NEXT: bnez a2, .LBB89_12
10365 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
10366 ; RV64ZVE32F-NEXT: andi a2, a1, 8
10367 ; RV64ZVE32F-NEXT: bnez a2, .LBB89_13
10368 ; RV64ZVE32F-NEXT: .LBB89_6: # %else6
10369 ; RV64ZVE32F-NEXT: andi a2, a1, 16
10370 ; RV64ZVE32F-NEXT: bnez a2, .LBB89_14
10371 ; RV64ZVE32F-NEXT: .LBB89_7: # %else8
10372 ; RV64ZVE32F-NEXT: andi a2, a1, 32
10373 ; RV64ZVE32F-NEXT: beqz a2, .LBB89_9
10374 ; RV64ZVE32F-NEXT: .LBB89_8: # %cond.store9
10375 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
10376 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
10377 ; RV64ZVE32F-NEXT: slli a2, a2, 32
10378 ; RV64ZVE32F-NEXT: srli a2, a2, 29
10379 ; RV64ZVE32F-NEXT: add a2, a0, a2
10380 ; RV64ZVE32F-NEXT: fsd fa5, 0(a2)
10381 ; RV64ZVE32F-NEXT: .LBB89_9: # %else10
10382 ; RV64ZVE32F-NEXT: andi a2, a1, 64
10383 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
10384 ; RV64ZVE32F-NEXT: bnez a2, .LBB89_15
10385 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
10386 ; RV64ZVE32F-NEXT: andi a1, a1, -128
10387 ; RV64ZVE32F-NEXT: bnez a1, .LBB89_16
10388 ; RV64ZVE32F-NEXT: .LBB89_11: # %else14
10389 ; RV64ZVE32F-NEXT: ret
10390 ; RV64ZVE32F-NEXT: .LBB89_12: # %cond.store3
10391 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
10392 ; RV64ZVE32F-NEXT: slli a2, a2, 32
10393 ; RV64ZVE32F-NEXT: srli a2, a2, 29
10394 ; RV64ZVE32F-NEXT: add a2, a0, a2
10395 ; RV64ZVE32F-NEXT: fsd fa2, 0(a2)
10396 ; RV64ZVE32F-NEXT: andi a2, a1, 8
10397 ; RV64ZVE32F-NEXT: beqz a2, .LBB89_6
10398 ; RV64ZVE32F-NEXT: .LBB89_13: # %cond.store5
10399 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
10400 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
10401 ; RV64ZVE32F-NEXT: slli a2, a2, 32
10402 ; RV64ZVE32F-NEXT: srli a2, a2, 29
10403 ; RV64ZVE32F-NEXT: add a2, a0, a2
10404 ; RV64ZVE32F-NEXT: fsd fa3, 0(a2)
10405 ; RV64ZVE32F-NEXT: andi a2, a1, 16
10406 ; RV64ZVE32F-NEXT: beqz a2, .LBB89_7
10407 ; RV64ZVE32F-NEXT: .LBB89_14: # %cond.store7
10408 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
10409 ; RV64ZVE32F-NEXT: slli a2, a2, 32
10410 ; RV64ZVE32F-NEXT: srli a2, a2, 29
10411 ; RV64ZVE32F-NEXT: add a2, a0, a2
10412 ; RV64ZVE32F-NEXT: fsd fa4, 0(a2)
10413 ; RV64ZVE32F-NEXT: andi a2, a1, 32
10414 ; RV64ZVE32F-NEXT: bnez a2, .LBB89_8
10415 ; RV64ZVE32F-NEXT: j .LBB89_9
10416 ; RV64ZVE32F-NEXT: .LBB89_15: # %cond.store11
10417 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
10418 ; RV64ZVE32F-NEXT: slli a2, a2, 32
10419 ; RV64ZVE32F-NEXT: srli a2, a2, 29
10420 ; RV64ZVE32F-NEXT: add a2, a0, a2
10421 ; RV64ZVE32F-NEXT: fsd fa6, 0(a2)
10422 ; RV64ZVE32F-NEXT: andi a1, a1, -128
10423 ; RV64ZVE32F-NEXT: beqz a1, .LBB89_11
10424 ; RV64ZVE32F-NEXT: .LBB89_16: # %cond.store13
10425 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
10426 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8
10427 ; RV64ZVE32F-NEXT: slli a1, a1, 32
10428 ; RV64ZVE32F-NEXT: srli a1, a1, 29
10429 ; RV64ZVE32F-NEXT: add a0, a0, a1
10430 ; RV64ZVE32F-NEXT: fsd fa7, 0(a0)
10431 ; RV64ZVE32F-NEXT: ret
10432 %eidxs = zext <8 x i32> %idxs to <8 x i64>
10433 %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
10434 call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m)
10438 define void @mscatter_baseidx_v8f64(<8 x double> %val, ptr %base, <8 x i64> %idxs, <8 x i1> %m) {
10439 ; RV32V-LABEL: mscatter_baseidx_v8f64:
10441 ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
10442 ; RV32V-NEXT: vnsrl.wi v16, v12, 0
10443 ; RV32V-NEXT: vsll.vi v12, v16, 3
10444 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
10445 ; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t
10448 ; RV64-LABEL: mscatter_baseidx_v8f64:
10450 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
10451 ; RV64-NEXT: vsll.vi v12, v12, 3
10452 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
10455 ; RV32ZVE32F-LABEL: mscatter_baseidx_v8f64:
10456 ; RV32ZVE32F: # %bb.0:
10457 ; RV32ZVE32F-NEXT: lw a2, 56(a1)
10458 ; RV32ZVE32F-NEXT: lw a3, 48(a1)
10459 ; RV32ZVE32F-NEXT: lw a4, 40(a1)
10460 ; RV32ZVE32F-NEXT: lw a5, 32(a1)
10461 ; RV32ZVE32F-NEXT: lw a6, 24(a1)
10462 ; RV32ZVE32F-NEXT: lw a7, 0(a1)
10463 ; RV32ZVE32F-NEXT: lw t0, 8(a1)
10464 ; RV32ZVE32F-NEXT: lw a1, 16(a1)
10465 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
10466 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a7
10467 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, t0
10468 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a1
10469 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a6
10470 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a5
10471 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a4
10472 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a3
10473 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a2
10474 ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3
10475 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0
10476 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
10477 ; RV32ZVE32F-NEXT: vmv.x.s a0, v0
10478 ; RV32ZVE32F-NEXT: andi a1, a0, 1
10479 ; RV32ZVE32F-NEXT: bnez a1, .LBB90_9
10480 ; RV32ZVE32F-NEXT: # %bb.1: # %else
10481 ; RV32ZVE32F-NEXT: andi a1, a0, 2
10482 ; RV32ZVE32F-NEXT: bnez a1, .LBB90_10
10483 ; RV32ZVE32F-NEXT: .LBB90_2: # %else2
10484 ; RV32ZVE32F-NEXT: andi a1, a0, 4
10485 ; RV32ZVE32F-NEXT: bnez a1, .LBB90_11
10486 ; RV32ZVE32F-NEXT: .LBB90_3: # %else4
10487 ; RV32ZVE32F-NEXT: andi a1, a0, 8
10488 ; RV32ZVE32F-NEXT: bnez a1, .LBB90_12
10489 ; RV32ZVE32F-NEXT: .LBB90_4: # %else6
10490 ; RV32ZVE32F-NEXT: andi a1, a0, 16
10491 ; RV32ZVE32F-NEXT: bnez a1, .LBB90_13
10492 ; RV32ZVE32F-NEXT: .LBB90_5: # %else8
10493 ; RV32ZVE32F-NEXT: andi a1, a0, 32
10494 ; RV32ZVE32F-NEXT: bnez a1, .LBB90_14
10495 ; RV32ZVE32F-NEXT: .LBB90_6: # %else10
10496 ; RV32ZVE32F-NEXT: andi a1, a0, 64
10497 ; RV32ZVE32F-NEXT: bnez a1, .LBB90_15
10498 ; RV32ZVE32F-NEXT: .LBB90_7: # %else12
10499 ; RV32ZVE32F-NEXT: andi a0, a0, -128
10500 ; RV32ZVE32F-NEXT: bnez a0, .LBB90_16
10501 ; RV32ZVE32F-NEXT: .LBB90_8: # %else14
10502 ; RV32ZVE32F-NEXT: ret
10503 ; RV32ZVE32F-NEXT: .LBB90_9: # %cond.store
10504 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
10505 ; RV32ZVE32F-NEXT: vmv.x.s a1, v8
10506 ; RV32ZVE32F-NEXT: fsd fa0, 0(a1)
10507 ; RV32ZVE32F-NEXT: andi a1, a0, 2
10508 ; RV32ZVE32F-NEXT: beqz a1, .LBB90_2
10509 ; RV32ZVE32F-NEXT: .LBB90_10: # %cond.store1
10510 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
10511 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
10512 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
10513 ; RV32ZVE32F-NEXT: fsd fa1, 0(a1)
10514 ; RV32ZVE32F-NEXT: andi a1, a0, 4
10515 ; RV32ZVE32F-NEXT: beqz a1, .LBB90_3
10516 ; RV32ZVE32F-NEXT: .LBB90_11: # %cond.store3
10517 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
10518 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
10519 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
10520 ; RV32ZVE32F-NEXT: fsd fa2, 0(a1)
10521 ; RV32ZVE32F-NEXT: andi a1, a0, 8
10522 ; RV32ZVE32F-NEXT: beqz a1, .LBB90_4
10523 ; RV32ZVE32F-NEXT: .LBB90_12: # %cond.store5
10524 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
10525 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
10526 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
10527 ; RV32ZVE32F-NEXT: fsd fa3, 0(a1)
10528 ; RV32ZVE32F-NEXT: andi a1, a0, 16
10529 ; RV32ZVE32F-NEXT: beqz a1, .LBB90_5
10530 ; RV32ZVE32F-NEXT: .LBB90_13: # %cond.store7
10531 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
10532 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
10533 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
10534 ; RV32ZVE32F-NEXT: fsd fa4, 0(a1)
10535 ; RV32ZVE32F-NEXT: andi a1, a0, 32
10536 ; RV32ZVE32F-NEXT: beqz a1, .LBB90_6
10537 ; RV32ZVE32F-NEXT: .LBB90_14: # %cond.store9
10538 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
10539 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
10540 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
10541 ; RV32ZVE32F-NEXT: fsd fa5, 0(a1)
10542 ; RV32ZVE32F-NEXT: andi a1, a0, 64
10543 ; RV32ZVE32F-NEXT: beqz a1, .LBB90_7
10544 ; RV32ZVE32F-NEXT: .LBB90_15: # %cond.store11
10545 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
10546 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
10547 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
10548 ; RV32ZVE32F-NEXT: fsd fa6, 0(a1)
10549 ; RV32ZVE32F-NEXT: andi a0, a0, -128
10550 ; RV32ZVE32F-NEXT: beqz a0, .LBB90_8
10551 ; RV32ZVE32F-NEXT: .LBB90_16: # %cond.store13
10552 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
10553 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
10554 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
10555 ; RV32ZVE32F-NEXT: fsd fa7, 0(a0)
10556 ; RV32ZVE32F-NEXT: ret
10558 ; RV64ZVE32F-LABEL: mscatter_baseidx_v8f64:
10559 ; RV64ZVE32F: # %bb.0:
10560 ; RV64ZVE32F-NEXT: ld t1, 8(a1)
10561 ; RV64ZVE32F-NEXT: ld t0, 16(a1)
10562 ; RV64ZVE32F-NEXT: ld a7, 24(a1)
10563 ; RV64ZVE32F-NEXT: ld a6, 32(a1)
10564 ; RV64ZVE32F-NEXT: ld a5, 40(a1)
10565 ; RV64ZVE32F-NEXT: ld a4, 48(a1)
10566 ; RV64ZVE32F-NEXT: ld a2, 56(a1)
10567 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
10568 ; RV64ZVE32F-NEXT: vmv.x.s a3, v0
10569 ; RV64ZVE32F-NEXT: andi t2, a3, 1
10570 ; RV64ZVE32F-NEXT: bnez t2, .LBB90_9
10571 ; RV64ZVE32F-NEXT: # %bb.1: # %else
10572 ; RV64ZVE32F-NEXT: andi a1, a3, 2
10573 ; RV64ZVE32F-NEXT: bnez a1, .LBB90_10
10574 ; RV64ZVE32F-NEXT: .LBB90_2: # %else2
10575 ; RV64ZVE32F-NEXT: andi a1, a3, 4
10576 ; RV64ZVE32F-NEXT: bnez a1, .LBB90_11
10577 ; RV64ZVE32F-NEXT: .LBB90_3: # %else4
10578 ; RV64ZVE32F-NEXT: andi a1, a3, 8
10579 ; RV64ZVE32F-NEXT: bnez a1, .LBB90_12
10580 ; RV64ZVE32F-NEXT: .LBB90_4: # %else6
10581 ; RV64ZVE32F-NEXT: andi a1, a3, 16
10582 ; RV64ZVE32F-NEXT: bnez a1, .LBB90_13
10583 ; RV64ZVE32F-NEXT: .LBB90_5: # %else8
10584 ; RV64ZVE32F-NEXT: andi a1, a3, 32
10585 ; RV64ZVE32F-NEXT: bnez a1, .LBB90_14
10586 ; RV64ZVE32F-NEXT: .LBB90_6: # %else10
10587 ; RV64ZVE32F-NEXT: andi a1, a3, 64
10588 ; RV64ZVE32F-NEXT: bnez a1, .LBB90_15
10589 ; RV64ZVE32F-NEXT: .LBB90_7: # %else12
10590 ; RV64ZVE32F-NEXT: andi a1, a3, -128
10591 ; RV64ZVE32F-NEXT: bnez a1, .LBB90_16
10592 ; RV64ZVE32F-NEXT: .LBB90_8: # %else14
10593 ; RV64ZVE32F-NEXT: ret
10594 ; RV64ZVE32F-NEXT: .LBB90_9: # %cond.store
10595 ; RV64ZVE32F-NEXT: ld a1, 0(a1)
10596 ; RV64ZVE32F-NEXT: slli a1, a1, 3
10597 ; RV64ZVE32F-NEXT: add a1, a0, a1
10598 ; RV64ZVE32F-NEXT: fsd fa0, 0(a1)
10599 ; RV64ZVE32F-NEXT: andi a1, a3, 2
10600 ; RV64ZVE32F-NEXT: beqz a1, .LBB90_2
10601 ; RV64ZVE32F-NEXT: .LBB90_10: # %cond.store1
10602 ; RV64ZVE32F-NEXT: slli t1, t1, 3
10603 ; RV64ZVE32F-NEXT: add t1, a0, t1
10604 ; RV64ZVE32F-NEXT: fsd fa1, 0(t1)
10605 ; RV64ZVE32F-NEXT: andi a1, a3, 4
10606 ; RV64ZVE32F-NEXT: beqz a1, .LBB90_3
10607 ; RV64ZVE32F-NEXT: .LBB90_11: # %cond.store3
10608 ; RV64ZVE32F-NEXT: slli t0, t0, 3
10609 ; RV64ZVE32F-NEXT: add t0, a0, t0
10610 ; RV64ZVE32F-NEXT: fsd fa2, 0(t0)
10611 ; RV64ZVE32F-NEXT: andi a1, a3, 8
10612 ; RV64ZVE32F-NEXT: beqz a1, .LBB90_4
10613 ; RV64ZVE32F-NEXT: .LBB90_12: # %cond.store5
10614 ; RV64ZVE32F-NEXT: slli a7, a7, 3
10615 ; RV64ZVE32F-NEXT: add a7, a0, a7
10616 ; RV64ZVE32F-NEXT: fsd fa3, 0(a7)
10617 ; RV64ZVE32F-NEXT: andi a1, a3, 16
10618 ; RV64ZVE32F-NEXT: beqz a1, .LBB90_5
10619 ; RV64ZVE32F-NEXT: .LBB90_13: # %cond.store7
10620 ; RV64ZVE32F-NEXT: slli a6, a6, 3
10621 ; RV64ZVE32F-NEXT: add a6, a0, a6
10622 ; RV64ZVE32F-NEXT: fsd fa4, 0(a6)
10623 ; RV64ZVE32F-NEXT: andi a1, a3, 32
10624 ; RV64ZVE32F-NEXT: beqz a1, .LBB90_6
10625 ; RV64ZVE32F-NEXT: .LBB90_14: # %cond.store9
10626 ; RV64ZVE32F-NEXT: slli a5, a5, 3
10627 ; RV64ZVE32F-NEXT: add a5, a0, a5
10628 ; RV64ZVE32F-NEXT: fsd fa5, 0(a5)
10629 ; RV64ZVE32F-NEXT: andi a1, a3, 64
10630 ; RV64ZVE32F-NEXT: beqz a1, .LBB90_7
10631 ; RV64ZVE32F-NEXT: .LBB90_15: # %cond.store11
10632 ; RV64ZVE32F-NEXT: slli a4, a4, 3
10633 ; RV64ZVE32F-NEXT: add a4, a0, a4
10634 ; RV64ZVE32F-NEXT: fsd fa6, 0(a4)
10635 ; RV64ZVE32F-NEXT: andi a1, a3, -128
10636 ; RV64ZVE32F-NEXT: beqz a1, .LBB90_8
10637 ; RV64ZVE32F-NEXT: .LBB90_16: # %cond.store13
10638 ; RV64ZVE32F-NEXT: slli a2, a2, 3
10639 ; RV64ZVE32F-NEXT: add a0, a0, a2
10640 ; RV64ZVE32F-NEXT: fsd fa7, 0(a0)
10641 ; RV64ZVE32F-NEXT: ret
10642 %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %idxs
10643 call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m)
10647 declare void @llvm.masked.scatter.v16i8.v16p0(<16 x i8>, <16 x ptr>, i32, <16 x i1>)
10649 define void @mscatter_baseidx_v16i8(<16 x i8> %val, ptr %base, <16 x i8> %idxs, <16 x i1> %m) {
10650 ; RV32-LABEL: mscatter_baseidx_v16i8:
10652 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
10653 ; RV32-NEXT: vsext.vf4 v12, v9
10654 ; RV32-NEXT: vsetvli zero, zero, e8, m1, ta, ma
10655 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
10658 ; RV64-LABEL: mscatter_baseidx_v16i8:
10660 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
10661 ; RV64-NEXT: vsext.vf8 v16, v9
10662 ; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, ma
10663 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
10666 ; RV64ZVE32F-LABEL: mscatter_baseidx_v16i8:
10667 ; RV64ZVE32F: # %bb.0:
10668 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
10669 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
10670 ; RV64ZVE32F-NEXT: andi a2, a1, 1
10671 ; RV64ZVE32F-NEXT: beqz a2, .LBB91_2
10672 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
10673 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10674 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
10675 ; RV64ZVE32F-NEXT: add a2, a0, a2
10676 ; RV64ZVE32F-NEXT: vse8.v v8, (a2)
10677 ; RV64ZVE32F-NEXT: .LBB91_2: # %else
10678 ; RV64ZVE32F-NEXT: andi a2, a1, 2
10679 ; RV64ZVE32F-NEXT: beqz a2, .LBB91_4
10680 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
10681 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
10682 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1
10683 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
10684 ; RV64ZVE32F-NEXT: add a2, a0, a2
10685 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10686 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
10687 ; RV64ZVE32F-NEXT: vse8.v v10, (a2)
10688 ; RV64ZVE32F-NEXT: .LBB91_4: # %else2
10689 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
10690 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4
10691 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
10692 ; RV64ZVE32F-NEXT: andi a2, a1, 4
10693 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v9, 2
10694 ; RV64ZVE32F-NEXT: bnez a2, .LBB91_25
10695 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
10696 ; RV64ZVE32F-NEXT: andi a2, a1, 8
10697 ; RV64ZVE32F-NEXT: bnez a2, .LBB91_26
10698 ; RV64ZVE32F-NEXT: .LBB91_6: # %else6
10699 ; RV64ZVE32F-NEXT: andi a2, a1, 16
10700 ; RV64ZVE32F-NEXT: beqz a2, .LBB91_8
10701 ; RV64ZVE32F-NEXT: .LBB91_7: # %cond.store7
10702 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10703 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
10704 ; RV64ZVE32F-NEXT: add a2, a0, a2
10705 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 4
10706 ; RV64ZVE32F-NEXT: vse8.v v11, (a2)
10707 ; RV64ZVE32F-NEXT: .LBB91_8: # %else8
10708 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, ta, ma
10709 ; RV64ZVE32F-NEXT: andi a2, a1, 32
10710 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 8
10711 ; RV64ZVE32F-NEXT: beqz a2, .LBB91_10
10712 ; RV64ZVE32F-NEXT: # %bb.9: # %cond.store9
10713 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
10714 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1
10715 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
10716 ; RV64ZVE32F-NEXT: add a2, a0, a2
10717 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10718 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 5
10719 ; RV64ZVE32F-NEXT: vse8.v v11, (a2)
10720 ; RV64ZVE32F-NEXT: .LBB91_10: # %else10
10721 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
10722 ; RV64ZVE32F-NEXT: andi a2, a1, 64
10723 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
10724 ; RV64ZVE32F-NEXT: bnez a2, .LBB91_27
10725 ; RV64ZVE32F-NEXT: # %bb.11: # %else12
10726 ; RV64ZVE32F-NEXT: andi a2, a1, 128
10727 ; RV64ZVE32F-NEXT: bnez a2, .LBB91_28
10728 ; RV64ZVE32F-NEXT: .LBB91_12: # %else14
10729 ; RV64ZVE32F-NEXT: andi a2, a1, 256
10730 ; RV64ZVE32F-NEXT: bnez a2, .LBB91_29
10731 ; RV64ZVE32F-NEXT: .LBB91_13: # %else16
10732 ; RV64ZVE32F-NEXT: andi a2, a1, 512
10733 ; RV64ZVE32F-NEXT: beqz a2, .LBB91_15
10734 ; RV64ZVE32F-NEXT: .LBB91_14: # %cond.store17
10735 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
10736 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1
10737 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
10738 ; RV64ZVE32F-NEXT: add a2, a0, a2
10739 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10740 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 9
10741 ; RV64ZVE32F-NEXT: vse8.v v10, (a2)
10742 ; RV64ZVE32F-NEXT: .LBB91_15: # %else18
10743 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
10744 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4
10745 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
10746 ; RV64ZVE32F-NEXT: andi a2, a1, 1024
10747 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2
10748 ; RV64ZVE32F-NEXT: bnez a2, .LBB91_30
10749 ; RV64ZVE32F-NEXT: # %bb.16: # %else20
10750 ; RV64ZVE32F-NEXT: slli a2, a1, 52
10751 ; RV64ZVE32F-NEXT: bltz a2, .LBB91_31
10752 ; RV64ZVE32F-NEXT: .LBB91_17: # %else22
10753 ; RV64ZVE32F-NEXT: slli a2, a1, 51
10754 ; RV64ZVE32F-NEXT: bltz a2, .LBB91_32
10755 ; RV64ZVE32F-NEXT: .LBB91_18: # %else24
10756 ; RV64ZVE32F-NEXT: slli a2, a1, 50
10757 ; RV64ZVE32F-NEXT: bgez a2, .LBB91_20
10758 ; RV64ZVE32F-NEXT: .LBB91_19: # %cond.store25
10759 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
10760 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 1
10761 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
10762 ; RV64ZVE32F-NEXT: add a2, a0, a2
10763 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10764 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 13
10765 ; RV64ZVE32F-NEXT: vse8.v v9, (a2)
10766 ; RV64ZVE32F-NEXT: .LBB91_20: # %else26
10767 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
10768 ; RV64ZVE32F-NEXT: slli a2, a1, 49
10769 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 2
10770 ; RV64ZVE32F-NEXT: bgez a2, .LBB91_22
10771 ; RV64ZVE32F-NEXT: # %bb.21: # %cond.store27
10772 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
10773 ; RV64ZVE32F-NEXT: add a2, a0, a2
10774 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10775 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 14
10776 ; RV64ZVE32F-NEXT: vse8.v v10, (a2)
10777 ; RV64ZVE32F-NEXT: .LBB91_22: # %else28
10778 ; RV64ZVE32F-NEXT: lui a2, 1048568
10779 ; RV64ZVE32F-NEXT: and a1, a1, a2
10780 ; RV64ZVE32F-NEXT: beqz a1, .LBB91_24
10781 ; RV64ZVE32F-NEXT: # %bb.23: # %cond.store29
10782 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
10783 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
10784 ; RV64ZVE32F-NEXT: vmv.x.s a1, v9
10785 ; RV64ZVE32F-NEXT: add a0, a0, a1
10786 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10787 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 15
10788 ; RV64ZVE32F-NEXT: vse8.v v8, (a0)
10789 ; RV64ZVE32F-NEXT: .LBB91_24: # %else30
10790 ; RV64ZVE32F-NEXT: ret
10791 ; RV64ZVE32F-NEXT: .LBB91_25: # %cond.store3
10792 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
10793 ; RV64ZVE32F-NEXT: add a2, a0, a2
10794 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10795 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2
10796 ; RV64ZVE32F-NEXT: vse8.v v12, (a2)
10797 ; RV64ZVE32F-NEXT: andi a2, a1, 8
10798 ; RV64ZVE32F-NEXT: beqz a2, .LBB91_6
10799 ; RV64ZVE32F-NEXT: .LBB91_26: # %cond.store5
10800 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
10801 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1
10802 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
10803 ; RV64ZVE32F-NEXT: add a2, a0, a2
10804 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10805 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 3
10806 ; RV64ZVE32F-NEXT: vse8.v v11, (a2)
10807 ; RV64ZVE32F-NEXT: andi a2, a1, 16
10808 ; RV64ZVE32F-NEXT: bnez a2, .LBB91_7
10809 ; RV64ZVE32F-NEXT: j .LBB91_8
10810 ; RV64ZVE32F-NEXT: .LBB91_27: # %cond.store11
10811 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
10812 ; RV64ZVE32F-NEXT: add a2, a0, a2
10813 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10814 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 6
10815 ; RV64ZVE32F-NEXT: vse8.v v11, (a2)
10816 ; RV64ZVE32F-NEXT: andi a2, a1, 128
10817 ; RV64ZVE32F-NEXT: beqz a2, .LBB91_12
10818 ; RV64ZVE32F-NEXT: .LBB91_28: # %cond.store13
10819 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
10820 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
10821 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
10822 ; RV64ZVE32F-NEXT: add a2, a0, a2
10823 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10824 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 7
10825 ; RV64ZVE32F-NEXT: vse8.v v10, (a2)
10826 ; RV64ZVE32F-NEXT: andi a2, a1, 256
10827 ; RV64ZVE32F-NEXT: beqz a2, .LBB91_13
10828 ; RV64ZVE32F-NEXT: .LBB91_29: # %cond.store15
10829 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10830 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
10831 ; RV64ZVE32F-NEXT: add a2, a0, a2
10832 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 8
10833 ; RV64ZVE32F-NEXT: vse8.v v10, (a2)
10834 ; RV64ZVE32F-NEXT: andi a2, a1, 512
10835 ; RV64ZVE32F-NEXT: bnez a2, .LBB91_14
10836 ; RV64ZVE32F-NEXT: j .LBB91_15
10837 ; RV64ZVE32F-NEXT: .LBB91_30: # %cond.store19
10838 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
10839 ; RV64ZVE32F-NEXT: add a2, a0, a2
10840 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10841 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 10
10842 ; RV64ZVE32F-NEXT: vse8.v v11, (a2)
10843 ; RV64ZVE32F-NEXT: slli a2, a1, 52
10844 ; RV64ZVE32F-NEXT: bgez a2, .LBB91_17
10845 ; RV64ZVE32F-NEXT: .LBB91_31: # %cond.store21
10846 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
10847 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
10848 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
10849 ; RV64ZVE32F-NEXT: add a2, a0, a2
10850 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10851 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 11
10852 ; RV64ZVE32F-NEXT: vse8.v v9, (a2)
10853 ; RV64ZVE32F-NEXT: slli a2, a1, 51
10854 ; RV64ZVE32F-NEXT: bgez a2, .LBB91_18
10855 ; RV64ZVE32F-NEXT: .LBB91_32: # %cond.store23
10856 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10857 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
10858 ; RV64ZVE32F-NEXT: add a2, a0, a2
10859 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 12
10860 ; RV64ZVE32F-NEXT: vse8.v v9, (a2)
10861 ; RV64ZVE32F-NEXT: slli a2, a1, 50
10862 ; RV64ZVE32F-NEXT: bltz a2, .LBB91_19
10863 ; RV64ZVE32F-NEXT: j .LBB91_20
10864 %ptrs = getelementptr inbounds i8, ptr %base, <16 x i8> %idxs
10865 call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> %val, <16 x ptr> %ptrs, i32 1, <16 x i1> %m)
10869 declare void @llvm.masked.scatter.v32i8.v32p0(<32 x i8>, <32 x ptr>, i32, <32 x i1>)
10871 define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs, <32 x i1> %m) {
10872 ; RV32-LABEL: mscatter_baseidx_v32i8:
10874 ; RV32-NEXT: li a1, 32
10875 ; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma
10876 ; RV32-NEXT: vsext.vf4 v16, v10
10877 ; RV32-NEXT: vsetvli zero, zero, e8, m2, ta, ma
10878 ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
10881 ; RV64-LABEL: mscatter_baseidx_v32i8:
10883 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
10884 ; RV64-NEXT: vsext.vf8 v16, v10
10885 ; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, ma
10886 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
10887 ; RV64-NEXT: vsetivli zero, 16, e8, m2, ta, ma
10888 ; RV64-NEXT: vslidedown.vi v8, v8, 16
10889 ; RV64-NEXT: vslidedown.vi v10, v10, 16
10890 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
10891 ; RV64-NEXT: vsext.vf8 v16, v10
10892 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
10893 ; RV64-NEXT: vslidedown.vi v0, v0, 2
10894 ; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma
10895 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
10898 ; RV64ZVE32F-LABEL: mscatter_baseidx_v32i8:
10899 ; RV64ZVE32F: # %bb.0:
10900 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
10901 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
10902 ; RV64ZVE32F-NEXT: andi a2, a1, 1
10903 ; RV64ZVE32F-NEXT: beqz a2, .LBB92_2
10904 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
10905 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10906 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
10907 ; RV64ZVE32F-NEXT: add a2, a0, a2
10908 ; RV64ZVE32F-NEXT: vse8.v v8, (a2)
10909 ; RV64ZVE32F-NEXT: .LBB92_2: # %else
10910 ; RV64ZVE32F-NEXT: andi a2, a1, 2
10911 ; RV64ZVE32F-NEXT: beqz a2, .LBB92_4
10912 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
10913 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
10914 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v10, 1
10915 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12
10916 ; RV64ZVE32F-NEXT: add a2, a0, a2
10917 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10918 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 1
10919 ; RV64ZVE32F-NEXT: vse8.v v12, (a2)
10920 ; RV64ZVE32F-NEXT: .LBB92_4: # %else2
10921 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
10922 ; RV64ZVE32F-NEXT: vslidedown.vi v13, v10, 4
10923 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
10924 ; RV64ZVE32F-NEXT: andi a2, a1, 4
10925 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v10, 2
10926 ; RV64ZVE32F-NEXT: bnez a2, .LBB92_49
10927 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
10928 ; RV64ZVE32F-NEXT: andi a2, a1, 8
10929 ; RV64ZVE32F-NEXT: bnez a2, .LBB92_50
10930 ; RV64ZVE32F-NEXT: .LBB92_6: # %else6
10931 ; RV64ZVE32F-NEXT: andi a2, a1, 16
10932 ; RV64ZVE32F-NEXT: beqz a2, .LBB92_8
10933 ; RV64ZVE32F-NEXT: .LBB92_7: # %cond.store7
10934 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10935 ; RV64ZVE32F-NEXT: vmv.x.s a2, v13
10936 ; RV64ZVE32F-NEXT: add a2, a0, a2
10937 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
10938 ; RV64ZVE32F-NEXT: vse8.v v12, (a2)
10939 ; RV64ZVE32F-NEXT: .LBB92_8: # %else8
10940 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, ta, ma
10941 ; RV64ZVE32F-NEXT: andi a2, a1, 32
10942 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v10, 8
10943 ; RV64ZVE32F-NEXT: beqz a2, .LBB92_10
10944 ; RV64ZVE32F-NEXT: # %bb.9: # %cond.store9
10945 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
10946 ; RV64ZVE32F-NEXT: vslidedown.vi v14, v13, 1
10947 ; RV64ZVE32F-NEXT: vmv.x.s a2, v14
10948 ; RV64ZVE32F-NEXT: add a2, a0, a2
10949 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10950 ; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 5
10951 ; RV64ZVE32F-NEXT: vse8.v v14, (a2)
10952 ; RV64ZVE32F-NEXT: .LBB92_10: # %else10
10953 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
10954 ; RV64ZVE32F-NEXT: andi a2, a1, 64
10955 ; RV64ZVE32F-NEXT: vslidedown.vi v13, v13, 2
10956 ; RV64ZVE32F-NEXT: bnez a2, .LBB92_51
10957 ; RV64ZVE32F-NEXT: # %bb.11: # %else12
10958 ; RV64ZVE32F-NEXT: andi a2, a1, 128
10959 ; RV64ZVE32F-NEXT: bnez a2, .LBB92_52
10960 ; RV64ZVE32F-NEXT: .LBB92_12: # %else14
10961 ; RV64ZVE32F-NEXT: andi a2, a1, 256
10962 ; RV64ZVE32F-NEXT: bnez a2, .LBB92_53
10963 ; RV64ZVE32F-NEXT: .LBB92_13: # %else16
10964 ; RV64ZVE32F-NEXT: andi a2, a1, 512
10965 ; RV64ZVE32F-NEXT: beqz a2, .LBB92_15
10966 ; RV64ZVE32F-NEXT: .LBB92_14: # %cond.store17
10967 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
10968 ; RV64ZVE32F-NEXT: vslidedown.vi v13, v12, 1
10969 ; RV64ZVE32F-NEXT: vmv.x.s a2, v13
10970 ; RV64ZVE32F-NEXT: add a2, a0, a2
10971 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10972 ; RV64ZVE32F-NEXT: vslidedown.vi v13, v8, 9
10973 ; RV64ZVE32F-NEXT: vse8.v v13, (a2)
10974 ; RV64ZVE32F-NEXT: .LBB92_15: # %else18
10975 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
10976 ; RV64ZVE32F-NEXT: vslidedown.vi v13, v12, 4
10977 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
10978 ; RV64ZVE32F-NEXT: andi a2, a1, 1024
10979 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 2
10980 ; RV64ZVE32F-NEXT: beqz a2, .LBB92_17
10981 ; RV64ZVE32F-NEXT: # %bb.16: # %cond.store19
10982 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12
10983 ; RV64ZVE32F-NEXT: add a2, a0, a2
10984 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10985 ; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 10
10986 ; RV64ZVE32F-NEXT: vse8.v v14, (a2)
10987 ; RV64ZVE32F-NEXT: .LBB92_17: # %else20
10988 ; RV64ZVE32F-NEXT: slli a2, a1, 52
10989 ; RV64ZVE32F-NEXT: bgez a2, .LBB92_19
10990 ; RV64ZVE32F-NEXT: # %bb.18: # %cond.store21
10991 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
10992 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 1
10993 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12
10994 ; RV64ZVE32F-NEXT: add a2, a0, a2
10995 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10996 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 11
10997 ; RV64ZVE32F-NEXT: vse8.v v12, (a2)
10998 ; RV64ZVE32F-NEXT: .LBB92_19: # %else22
10999 ; RV64ZVE32F-NEXT: vsetivli zero, 16, e8, m2, ta, ma
11000 ; RV64ZVE32F-NEXT: slli a2, a1, 51
11001 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 16
11002 ; RV64ZVE32F-NEXT: bgez a2, .LBB92_21
11003 ; RV64ZVE32F-NEXT: # %bb.20: # %cond.store23
11004 ; RV64ZVE32F-NEXT: vmv.x.s a2, v13
11005 ; RV64ZVE32F-NEXT: add a2, a0, a2
11006 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
11007 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 12
11008 ; RV64ZVE32F-NEXT: vse8.v v11, (a2)
11009 ; RV64ZVE32F-NEXT: .LBB92_21: # %else24
11010 ; RV64ZVE32F-NEXT: slli a2, a1, 50
11011 ; RV64ZVE32F-NEXT: bgez a2, .LBB92_23
11012 ; RV64ZVE32F-NEXT: # %bb.22: # %cond.store25
11013 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
11014 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v13, 1
11015 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
11016 ; RV64ZVE32F-NEXT: add a2, a0, a2
11017 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
11018 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 13
11019 ; RV64ZVE32F-NEXT: vse8.v v11, (a2)
11020 ; RV64ZVE32F-NEXT: .LBB92_23: # %else26
11021 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
11022 ; RV64ZVE32F-NEXT: slli a2, a1, 49
11023 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v13, 2
11024 ; RV64ZVE32F-NEXT: bltz a2, .LBB92_54
11025 ; RV64ZVE32F-NEXT: # %bb.24: # %else28
11026 ; RV64ZVE32F-NEXT: slli a2, a1, 48
11027 ; RV64ZVE32F-NEXT: bltz a2, .LBB92_55
11028 ; RV64ZVE32F-NEXT: .LBB92_25: # %else30
11029 ; RV64ZVE32F-NEXT: slli a2, a1, 47
11030 ; RV64ZVE32F-NEXT: bltz a2, .LBB92_56
11031 ; RV64ZVE32F-NEXT: .LBB92_26: # %else32
11032 ; RV64ZVE32F-NEXT: slli a2, a1, 46
11033 ; RV64ZVE32F-NEXT: bgez a2, .LBB92_28
11034 ; RV64ZVE32F-NEXT: .LBB92_27: # %cond.store33
11035 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
11036 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1
11037 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
11038 ; RV64ZVE32F-NEXT: add a2, a0, a2
11039 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
11040 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 17
11041 ; RV64ZVE32F-NEXT: vse8.v v12, (a2)
11042 ; RV64ZVE32F-NEXT: .LBB92_28: # %else34
11043 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
11044 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4
11045 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
11046 ; RV64ZVE32F-NEXT: slli a2, a1, 45
11047 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v10, 2
11048 ; RV64ZVE32F-NEXT: bltz a2, .LBB92_57
11049 ; RV64ZVE32F-NEXT: # %bb.29: # %else36
11050 ; RV64ZVE32F-NEXT: slli a2, a1, 44
11051 ; RV64ZVE32F-NEXT: bltz a2, .LBB92_58
11052 ; RV64ZVE32F-NEXT: .LBB92_30: # %else38
11053 ; RV64ZVE32F-NEXT: slli a2, a1, 43
11054 ; RV64ZVE32F-NEXT: bgez a2, .LBB92_32
11055 ; RV64ZVE32F-NEXT: .LBB92_31: # %cond.store39
11056 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
11057 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
11058 ; RV64ZVE32F-NEXT: add a2, a0, a2
11059 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 20
11060 ; RV64ZVE32F-NEXT: vse8.v v12, (a2)
11061 ; RV64ZVE32F-NEXT: .LBB92_32: # %else40
11062 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, ta, ma
11063 ; RV64ZVE32F-NEXT: slli a2, a1, 42
11064 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 8
11065 ; RV64ZVE32F-NEXT: bgez a2, .LBB92_34
11066 ; RV64ZVE32F-NEXT: # %bb.33: # %cond.store41
11067 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
11068 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v11, 1
11069 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12
11070 ; RV64ZVE32F-NEXT: add a2, a0, a2
11071 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
11072 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 21
11073 ; RV64ZVE32F-NEXT: vse8.v v12, (a2)
11074 ; RV64ZVE32F-NEXT: .LBB92_34: # %else42
11075 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
11076 ; RV64ZVE32F-NEXT: slli a2, a1, 41
11077 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 2
11078 ; RV64ZVE32F-NEXT: bltz a2, .LBB92_59
11079 ; RV64ZVE32F-NEXT: # %bb.35: # %else44
11080 ; RV64ZVE32F-NEXT: slli a2, a1, 40
11081 ; RV64ZVE32F-NEXT: bltz a2, .LBB92_60
11082 ; RV64ZVE32F-NEXT: .LBB92_36: # %else46
11083 ; RV64ZVE32F-NEXT: slli a2, a1, 39
11084 ; RV64ZVE32F-NEXT: bltz a2, .LBB92_61
11085 ; RV64ZVE32F-NEXT: .LBB92_37: # %else48
11086 ; RV64ZVE32F-NEXT: slli a2, a1, 38
11087 ; RV64ZVE32F-NEXT: bgez a2, .LBB92_39
11088 ; RV64ZVE32F-NEXT: .LBB92_38: # %cond.store49
11089 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
11090 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1
11091 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
11092 ; RV64ZVE32F-NEXT: add a2, a0, a2
11093 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
11094 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 25
11095 ; RV64ZVE32F-NEXT: vse8.v v12, (a2)
11096 ; RV64ZVE32F-NEXT: .LBB92_39: # %else50
11097 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
11098 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4
11099 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
11100 ; RV64ZVE32F-NEXT: slli a2, a1, 37
11101 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
11102 ; RV64ZVE32F-NEXT: bltz a2, .LBB92_62
11103 ; RV64ZVE32F-NEXT: # %bb.40: # %else52
11104 ; RV64ZVE32F-NEXT: slli a2, a1, 36
11105 ; RV64ZVE32F-NEXT: bltz a2, .LBB92_63
11106 ; RV64ZVE32F-NEXT: .LBB92_41: # %else54
11107 ; RV64ZVE32F-NEXT: slli a2, a1, 35
11108 ; RV64ZVE32F-NEXT: bltz a2, .LBB92_64
11109 ; RV64ZVE32F-NEXT: .LBB92_42: # %else56
11110 ; RV64ZVE32F-NEXT: slli a2, a1, 34
11111 ; RV64ZVE32F-NEXT: bgez a2, .LBB92_44
11112 ; RV64ZVE32F-NEXT: .LBB92_43: # %cond.store57
11113 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
11114 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 1
11115 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
11116 ; RV64ZVE32F-NEXT: add a2, a0, a2
11117 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
11118 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 29
11119 ; RV64ZVE32F-NEXT: vse8.v v12, (a2)
11120 ; RV64ZVE32F-NEXT: .LBB92_44: # %else58
11121 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
11122 ; RV64ZVE32F-NEXT: slli a2, a1, 33
11123 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2
11124 ; RV64ZVE32F-NEXT: bgez a2, .LBB92_46
11125 ; RV64ZVE32F-NEXT: # %bb.45: # %cond.store59
11126 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
11127 ; RV64ZVE32F-NEXT: add a2, a0, a2
11128 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
11129 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 30
11130 ; RV64ZVE32F-NEXT: vse8.v v12, (a2)
11131 ; RV64ZVE32F-NEXT: .LBB92_46: # %else60
11132 ; RV64ZVE32F-NEXT: lui a2, 524288
11133 ; RV64ZVE32F-NEXT: and a1, a1, a2
11134 ; RV64ZVE32F-NEXT: beqz a1, .LBB92_48
11135 ; RV64ZVE32F-NEXT: # %bb.47: # %cond.store61
11136 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
11137 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
11138 ; RV64ZVE32F-NEXT: vmv.x.s a1, v10
11139 ; RV64ZVE32F-NEXT: add a0, a0, a1
11140 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
11141 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 31
11142 ; RV64ZVE32F-NEXT: vse8.v v8, (a0)
11143 ; RV64ZVE32F-NEXT: .LBB92_48: # %else62
11144 ; RV64ZVE32F-NEXT: ret
11145 ; RV64ZVE32F-NEXT: .LBB92_49: # %cond.store3
11146 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12
11147 ; RV64ZVE32F-NEXT: add a2, a0, a2
11148 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
11149 ; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 2
11150 ; RV64ZVE32F-NEXT: vse8.v v14, (a2)
11151 ; RV64ZVE32F-NEXT: andi a2, a1, 8
11152 ; RV64ZVE32F-NEXT: beqz a2, .LBB92_6
11153 ; RV64ZVE32F-NEXT: .LBB92_50: # %cond.store5
11154 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
11155 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 1
11156 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12
11157 ; RV64ZVE32F-NEXT: add a2, a0, a2
11158 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
11159 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 3
11160 ; RV64ZVE32F-NEXT: vse8.v v12, (a2)
11161 ; RV64ZVE32F-NEXT: andi a2, a1, 16
11162 ; RV64ZVE32F-NEXT: bnez a2, .LBB92_7
11163 ; RV64ZVE32F-NEXT: j .LBB92_8
11164 ; RV64ZVE32F-NEXT: .LBB92_51: # %cond.store11
11165 ; RV64ZVE32F-NEXT: vmv.x.s a2, v13
11166 ; RV64ZVE32F-NEXT: add a2, a0, a2
11167 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
11168 ; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 6
11169 ; RV64ZVE32F-NEXT: vse8.v v14, (a2)
11170 ; RV64ZVE32F-NEXT: andi a2, a1, 128
11171 ; RV64ZVE32F-NEXT: beqz a2, .LBB92_12
11172 ; RV64ZVE32F-NEXT: .LBB92_52: # %cond.store13
11173 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
11174 ; RV64ZVE32F-NEXT: vslidedown.vi v13, v13, 1
11175 ; RV64ZVE32F-NEXT: vmv.x.s a2, v13
11176 ; RV64ZVE32F-NEXT: add a2, a0, a2
11177 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
11178 ; RV64ZVE32F-NEXT: vslidedown.vi v13, v8, 7
11179 ; RV64ZVE32F-NEXT: vse8.v v13, (a2)
11180 ; RV64ZVE32F-NEXT: andi a2, a1, 256
11181 ; RV64ZVE32F-NEXT: beqz a2, .LBB92_13
11182 ; RV64ZVE32F-NEXT: .LBB92_53: # %cond.store15
11183 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
11184 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12
11185 ; RV64ZVE32F-NEXT: add a2, a0, a2
11186 ; RV64ZVE32F-NEXT: vslidedown.vi v13, v8, 8
11187 ; RV64ZVE32F-NEXT: vse8.v v13, (a2)
11188 ; RV64ZVE32F-NEXT: andi a2, a1, 512
11189 ; RV64ZVE32F-NEXT: bnez a2, .LBB92_14
11190 ; RV64ZVE32F-NEXT: j .LBB92_15
11191 ; RV64ZVE32F-NEXT: .LBB92_54: # %cond.store27
11192 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
11193 ; RV64ZVE32F-NEXT: add a2, a0, a2
11194 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
11195 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 14
11196 ; RV64ZVE32F-NEXT: vse8.v v12, (a2)
11197 ; RV64ZVE32F-NEXT: slli a2, a1, 48
11198 ; RV64ZVE32F-NEXT: bgez a2, .LBB92_25
11199 ; RV64ZVE32F-NEXT: .LBB92_55: # %cond.store29
11200 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
11201 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1
11202 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
11203 ; RV64ZVE32F-NEXT: add a2, a0, a2
11204 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
11205 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 15
11206 ; RV64ZVE32F-NEXT: vse8.v v11, (a2)
11207 ; RV64ZVE32F-NEXT: slli a2, a1, 47
11208 ; RV64ZVE32F-NEXT: bgez a2, .LBB92_26
11209 ; RV64ZVE32F-NEXT: .LBB92_56: # %cond.store31
11210 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
11211 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
11212 ; RV64ZVE32F-NEXT: add a2, a0, a2
11213 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 16
11214 ; RV64ZVE32F-NEXT: vse8.v v12, (a2)
11215 ; RV64ZVE32F-NEXT: slli a2, a1, 46
11216 ; RV64ZVE32F-NEXT: bltz a2, .LBB92_27
11217 ; RV64ZVE32F-NEXT: j .LBB92_28
11218 ; RV64ZVE32F-NEXT: .LBB92_57: # %cond.store35
11219 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12
11220 ; RV64ZVE32F-NEXT: add a2, a0, a2
11221 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
11222 ; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 18
11223 ; RV64ZVE32F-NEXT: vse8.v v14, (a2)
11224 ; RV64ZVE32F-NEXT: slli a2, a1, 44
11225 ; RV64ZVE32F-NEXT: bgez a2, .LBB92_30
11226 ; RV64ZVE32F-NEXT: .LBB92_58: # %cond.store37
11227 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
11228 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 1
11229 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12
11230 ; RV64ZVE32F-NEXT: add a2, a0, a2
11231 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
11232 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 19
11233 ; RV64ZVE32F-NEXT: vse8.v v12, (a2)
11234 ; RV64ZVE32F-NEXT: slli a2, a1, 43
11235 ; RV64ZVE32F-NEXT: bltz a2, .LBB92_31
11236 ; RV64ZVE32F-NEXT: j .LBB92_32
11237 ; RV64ZVE32F-NEXT: .LBB92_59: # %cond.store43
11238 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
11239 ; RV64ZVE32F-NEXT: add a2, a0, a2
11240 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
11241 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 22
11242 ; RV64ZVE32F-NEXT: vse8.v v12, (a2)
11243 ; RV64ZVE32F-NEXT: slli a2, a1, 40
11244 ; RV64ZVE32F-NEXT: bgez a2, .LBB92_36
11245 ; RV64ZVE32F-NEXT: .LBB92_60: # %cond.store45
11246 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
11247 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1
11248 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
11249 ; RV64ZVE32F-NEXT: add a2, a0, a2
11250 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
11251 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 23
11252 ; RV64ZVE32F-NEXT: vse8.v v12, (a2)
11253 ; RV64ZVE32F-NEXT: slli a2, a1, 39
11254 ; RV64ZVE32F-NEXT: bgez a2, .LBB92_37
11255 ; RV64ZVE32F-NEXT: .LBB92_61: # %cond.store47
11256 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
11257 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
11258 ; RV64ZVE32F-NEXT: add a2, a0, a2
11259 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 24
11260 ; RV64ZVE32F-NEXT: vse8.v v12, (a2)
11261 ; RV64ZVE32F-NEXT: slli a2, a1, 38
11262 ; RV64ZVE32F-NEXT: bltz a2, .LBB92_38
11263 ; RV64ZVE32F-NEXT: j .LBB92_39
11264 ; RV64ZVE32F-NEXT: .LBB92_62: # %cond.store51
11265 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
11266 ; RV64ZVE32F-NEXT: add a2, a0, a2
11267 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
11268 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 26
11269 ; RV64ZVE32F-NEXT: vse8.v v12, (a2)
11270 ; RV64ZVE32F-NEXT: slli a2, a1, 36
11271 ; RV64ZVE32F-NEXT: bgez a2, .LBB92_41
11272 ; RV64ZVE32F-NEXT: .LBB92_63: # %cond.store53
11273 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
11274 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
11275 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
11276 ; RV64ZVE32F-NEXT: add a2, a0, a2
11277 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
11278 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 27
11279 ; RV64ZVE32F-NEXT: vse8.v v12, (a2)
11280 ; RV64ZVE32F-NEXT: slli a2, a1, 35
11281 ; RV64ZVE32F-NEXT: bgez a2, .LBB92_42
11282 ; RV64ZVE32F-NEXT: .LBB92_64: # %cond.store55
11283 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
11284 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
11285 ; RV64ZVE32F-NEXT: add a2, a0, a2
11286 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 28
11287 ; RV64ZVE32F-NEXT: vse8.v v12, (a2)
11288 ; RV64ZVE32F-NEXT: slli a2, a1, 34
11289 ; RV64ZVE32F-NEXT: bltz a2, .LBB92_43
11290 ; RV64ZVE32F-NEXT: j .LBB92_44
11291 %ptrs = getelementptr inbounds i8, ptr %base, <32 x i8> %idxs
11292 call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> %val, <32 x ptr> %ptrs, i32 1, <32 x i1> %m)
11296 define void @mscatter_unit_stride(<8 x i16> %val, ptr %base) {
11297 ; CHECK-LABEL: mscatter_unit_stride:
11299 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
11300 ; CHECK-NEXT: vse16.v v8, (a0)
11302 %head = insertelement <8 x i1> poison, i1 true, i16 0
11303 %allones = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
11304 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>
11305 call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> %allones)
11309 define void @mscatter_unit_stride_with_offset(<8 x i16> %val, ptr %base) {
11310 ; CHECK-LABEL: mscatter_unit_stride_with_offset:
11312 ; CHECK-NEXT: addi a0, a0, 10
11313 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
11314 ; CHECK-NEXT: vse16.v v8, (a0)
11316 %head = insertelement <8 x i1> poison, i1 true, i16 0
11317 %allones = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
11318 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> <i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12>
11319 call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> %allones)
11323 define void @mscatter_shuffle_reverse(<8 x i16> %val, ptr %base) {
11324 ; CHECK-LABEL: mscatter_shuffle_reverse:
11326 ; CHECK-NEXT: addi a0, a0, 14
11327 ; CHECK-NEXT: li a1, -2
11328 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
11329 ; CHECK-NEXT: vsse16.v v8, (a0), a1
11331 %head = insertelement <8 x i1> poison, i1 true, i16 0
11332 %allones = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
11333 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> <i64 7, i64 6, i64 5, i64 4, i64 3, i64 2, i64 1, i64 0>
11334 call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> %allones)
11338 define void @mscatter_shuffle_rotate(<8 x i16> %val, ptr %base) {
11339 ; RV32-LABEL: mscatter_shuffle_rotate:
11341 ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
11342 ; RV32-NEXT: vslidedown.vi v9, v8, 4
11343 ; RV32-NEXT: vslideup.vi v9, v8, 4
11344 ; RV32-NEXT: vse16.v v9, (a0)
11347 ; RV64-LABEL: mscatter_shuffle_rotate:
11349 ; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
11350 ; RV64-NEXT: vslidedown.vi v9, v8, 4
11351 ; RV64-NEXT: vslideup.vi v9, v8, 4
11352 ; RV64-NEXT: vse16.v v9, (a0)
11355 ; RV64ZVE32F-LABEL: mscatter_shuffle_rotate:
11356 ; RV64ZVE32F: # %bb.0:
11357 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
11358 ; RV64ZVE32F-NEXT: vmset.m v9
11359 ; RV64ZVE32F-NEXT: vmv.x.s a1, v9
11360 ; RV64ZVE32F-NEXT: beqz zero, .LBB96_9
11361 ; RV64ZVE32F-NEXT: # %bb.1: # %else
11362 ; RV64ZVE32F-NEXT: andi a2, a1, 2
11363 ; RV64ZVE32F-NEXT: bnez a2, .LBB96_10
11364 ; RV64ZVE32F-NEXT: .LBB96_2: # %else2
11365 ; RV64ZVE32F-NEXT: andi a2, a1, 4
11366 ; RV64ZVE32F-NEXT: bnez a2, .LBB96_11
11367 ; RV64ZVE32F-NEXT: .LBB96_3: # %else4
11368 ; RV64ZVE32F-NEXT: andi a2, a1, 8
11369 ; RV64ZVE32F-NEXT: bnez a2, .LBB96_12
11370 ; RV64ZVE32F-NEXT: .LBB96_4: # %else6
11371 ; RV64ZVE32F-NEXT: andi a2, a1, 16
11372 ; RV64ZVE32F-NEXT: bnez a2, .LBB96_13
11373 ; RV64ZVE32F-NEXT: .LBB96_5: # %else8
11374 ; RV64ZVE32F-NEXT: andi a2, a1, 32
11375 ; RV64ZVE32F-NEXT: bnez a2, .LBB96_14
11376 ; RV64ZVE32F-NEXT: .LBB96_6: # %else10
11377 ; RV64ZVE32F-NEXT: andi a2, a1, 64
11378 ; RV64ZVE32F-NEXT: bnez a2, .LBB96_15
11379 ; RV64ZVE32F-NEXT: .LBB96_7: # %else12
11380 ; RV64ZVE32F-NEXT: andi a1, a1, -128
11381 ; RV64ZVE32F-NEXT: bnez a1, .LBB96_16
11382 ; RV64ZVE32F-NEXT: .LBB96_8: # %else14
11383 ; RV64ZVE32F-NEXT: ret
11384 ; RV64ZVE32F-NEXT: .LBB96_9: # %cond.store
11385 ; RV64ZVE32F-NEXT: addi a2, a0, 8
11386 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
11387 ; RV64ZVE32F-NEXT: vse16.v v8, (a2)
11388 ; RV64ZVE32F-NEXT: andi a2, a1, 2
11389 ; RV64ZVE32F-NEXT: beqz a2, .LBB96_2
11390 ; RV64ZVE32F-NEXT: .LBB96_10: # %cond.store1
11391 ; RV64ZVE32F-NEXT: addi a2, a0, 10
11392 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
11393 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
11394 ; RV64ZVE32F-NEXT: vse16.v v9, (a2)
11395 ; RV64ZVE32F-NEXT: andi a2, a1, 4
11396 ; RV64ZVE32F-NEXT: beqz a2, .LBB96_3
11397 ; RV64ZVE32F-NEXT: .LBB96_11: # %cond.store3
11398 ; RV64ZVE32F-NEXT: addi a2, a0, 12
11399 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
11400 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
11401 ; RV64ZVE32F-NEXT: vse16.v v9, (a2)
11402 ; RV64ZVE32F-NEXT: andi a2, a1, 8
11403 ; RV64ZVE32F-NEXT: beqz a2, .LBB96_4
11404 ; RV64ZVE32F-NEXT: .LBB96_12: # %cond.store5
11405 ; RV64ZVE32F-NEXT: addi a2, a0, 14
11406 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
11407 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 3
11408 ; RV64ZVE32F-NEXT: vse16.v v9, (a2)
11409 ; RV64ZVE32F-NEXT: andi a2, a1, 16
11410 ; RV64ZVE32F-NEXT: beqz a2, .LBB96_5
11411 ; RV64ZVE32F-NEXT: .LBB96_13: # %cond.store7
11412 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
11413 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
11414 ; RV64ZVE32F-NEXT: vse16.v v9, (a0)
11415 ; RV64ZVE32F-NEXT: andi a2, a1, 32
11416 ; RV64ZVE32F-NEXT: beqz a2, .LBB96_6
11417 ; RV64ZVE32F-NEXT: .LBB96_14: # %cond.store9
11418 ; RV64ZVE32F-NEXT: addi a2, a0, 2
11419 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
11420 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5
11421 ; RV64ZVE32F-NEXT: vse16.v v9, (a2)
11422 ; RV64ZVE32F-NEXT: andi a2, a1, 64
11423 ; RV64ZVE32F-NEXT: beqz a2, .LBB96_7
11424 ; RV64ZVE32F-NEXT: .LBB96_15: # %cond.store11
11425 ; RV64ZVE32F-NEXT: addi a2, a0, 4
11426 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
11427 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 6
11428 ; RV64ZVE32F-NEXT: vse16.v v9, (a2)
11429 ; RV64ZVE32F-NEXT: andi a1, a1, -128
11430 ; RV64ZVE32F-NEXT: beqz a1, .LBB96_8
11431 ; RV64ZVE32F-NEXT: .LBB96_16: # %cond.store13
11432 ; RV64ZVE32F-NEXT: addi a0, a0, 6
11433 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
11434 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
11435 ; RV64ZVE32F-NEXT: vse16.v v8, (a0)
11436 ; RV64ZVE32F-NEXT: ret
11437 %head = insertelement <8 x i1> poison, i1 true, i16 0
11438 %allones = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
11439 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> <i64 4, i64 5, i64 6, i64 7, i64 0, i64 1, i64 2, i64 3>
11440 call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> %allones)