1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=ilp32d \
3 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32V
4 ; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=lp64d \
5 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
6 ; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfh,+zve32f,+zvl128b -target-abi=ilp32d \
7 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32ZVE32F
8 ; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfh,+zve32f,+zvl128b -target-abi=lp64d \
9 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64ZVE32F
11 declare void @llvm.masked.scatter.v1i8.v1p0(<1 x i8>, <1 x ptr>, i32, <1 x i1>)
13 define void @mscatter_v1i8(<1 x i8> %val, <1 x ptr> %ptrs, <1 x i1> %m) {
14 ; RV32V-LABEL: mscatter_v1i8:
16 ; RV32V-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
17 ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t
20 ; RV64-LABEL: mscatter_v1i8:
22 ; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
23 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
26 ; RV32ZVE32F-LABEL: mscatter_v1i8:
27 ; RV32ZVE32F: # %bb.0:
28 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
29 ; RV32ZVE32F-NEXT: vsoxei32.v v8, (zero), v9, v0.t
30 ; RV32ZVE32F-NEXT: ret
32 ; RV64ZVE32F-LABEL: mscatter_v1i8:
33 ; RV64ZVE32F: # %bb.0:
34 ; RV64ZVE32F-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
35 ; RV64ZVE32F-NEXT: vfirst.m a1, v0
36 ; RV64ZVE32F-NEXT: bnez a1, .LBB0_2
37 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
38 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
39 ; RV64ZVE32F-NEXT: vse8.v v8, (a0)
40 ; RV64ZVE32F-NEXT: .LBB0_2: # %else
41 ; RV64ZVE32F-NEXT: ret
42 call void @llvm.masked.scatter.v1i8.v1p0(<1 x i8> %val, <1 x ptr> %ptrs, i32 1, <1 x i1> %m)
46 declare void @llvm.masked.scatter.v2i8.v2p0(<2 x i8>, <2 x ptr>, i32, <2 x i1>)
48 define void @mscatter_v2i8(<2 x i8> %val, <2 x ptr> %ptrs, <2 x i1> %m) {
49 ; RV32V-LABEL: mscatter_v2i8:
51 ; RV32V-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
52 ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t
55 ; RV64-LABEL: mscatter_v2i8:
57 ; RV64-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
58 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
61 ; RV32ZVE32F-LABEL: mscatter_v2i8:
62 ; RV32ZVE32F: # %bb.0:
63 ; RV32ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
64 ; RV32ZVE32F-NEXT: vsoxei32.v v8, (zero), v9, v0.t
65 ; RV32ZVE32F-NEXT: ret
67 ; RV64ZVE32F-LABEL: mscatter_v2i8:
68 ; RV64ZVE32F: # %bb.0:
69 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
70 ; RV64ZVE32F-NEXT: vmv.x.s a2, v0
71 ; RV64ZVE32F-NEXT: andi a3, a2, 1
72 ; RV64ZVE32F-NEXT: bnez a3, .LBB1_3
73 ; RV64ZVE32F-NEXT: # %bb.1: # %else
74 ; RV64ZVE32F-NEXT: andi a2, a2, 2
75 ; RV64ZVE32F-NEXT: bnez a2, .LBB1_4
76 ; RV64ZVE32F-NEXT: .LBB1_2: # %else2
77 ; RV64ZVE32F-NEXT: ret
78 ; RV64ZVE32F-NEXT: .LBB1_3: # %cond.store
79 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
80 ; RV64ZVE32F-NEXT: vse8.v v8, (a0)
81 ; RV64ZVE32F-NEXT: andi a2, a2, 2
82 ; RV64ZVE32F-NEXT: beqz a2, .LBB1_2
83 ; RV64ZVE32F-NEXT: .LBB1_4: # %cond.store1
84 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
85 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
86 ; RV64ZVE32F-NEXT: vse8.v v8, (a1)
87 ; RV64ZVE32F-NEXT: ret
88 call void @llvm.masked.scatter.v2i8.v2p0(<2 x i8> %val, <2 x ptr> %ptrs, i32 1, <2 x i1> %m)
92 define void @mscatter_v2i16_truncstore_v2i8(<2 x i16> %val, <2 x ptr> %ptrs, <2 x i1> %m) {
93 ; RV32V-LABEL: mscatter_v2i16_truncstore_v2i8:
95 ; RV32V-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
96 ; RV32V-NEXT: vnsrl.wi v8, v8, 0
97 ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t
100 ; RV64-LABEL: mscatter_v2i16_truncstore_v2i8:
102 ; RV64-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
103 ; RV64-NEXT: vnsrl.wi v8, v8, 0
104 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
107 ; RV32ZVE32F-LABEL: mscatter_v2i16_truncstore_v2i8:
108 ; RV32ZVE32F: # %bb.0:
109 ; RV32ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
110 ; RV32ZVE32F-NEXT: vnsrl.wi v8, v8, 0
111 ; RV32ZVE32F-NEXT: vsoxei32.v v8, (zero), v9, v0.t
112 ; RV32ZVE32F-NEXT: ret
114 ; RV64ZVE32F-LABEL: mscatter_v2i16_truncstore_v2i8:
115 ; RV64ZVE32F: # %bb.0:
116 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
117 ; RV64ZVE32F-NEXT: vmv.x.s a2, v0
118 ; RV64ZVE32F-NEXT: andi a3, a2, 1
119 ; RV64ZVE32F-NEXT: vnsrl.wi v8, v8, 0
120 ; RV64ZVE32F-NEXT: bnez a3, .LBB2_3
121 ; RV64ZVE32F-NEXT: # %bb.1: # %else
122 ; RV64ZVE32F-NEXT: andi a2, a2, 2
123 ; RV64ZVE32F-NEXT: bnez a2, .LBB2_4
124 ; RV64ZVE32F-NEXT: .LBB2_2: # %else2
125 ; RV64ZVE32F-NEXT: ret
126 ; RV64ZVE32F-NEXT: .LBB2_3: # %cond.store
127 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
128 ; RV64ZVE32F-NEXT: vse8.v v8, (a0)
129 ; RV64ZVE32F-NEXT: andi a2, a2, 2
130 ; RV64ZVE32F-NEXT: beqz a2, .LBB2_2
131 ; RV64ZVE32F-NEXT: .LBB2_4: # %cond.store1
132 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
133 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
134 ; RV64ZVE32F-NEXT: vse8.v v8, (a1)
135 ; RV64ZVE32F-NEXT: ret
136 %tval = trunc <2 x i16> %val to <2 x i8>
137 call void @llvm.masked.scatter.v2i8.v2p0(<2 x i8> %tval, <2 x ptr> %ptrs, i32 1, <2 x i1> %m)
141 define void @mscatter_v2i32_truncstore_v2i8(<2 x i32> %val, <2 x ptr> %ptrs, <2 x i1> %m) {
142 ; RV32V-LABEL: mscatter_v2i32_truncstore_v2i8:
144 ; RV32V-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
145 ; RV32V-NEXT: vnsrl.wi v8, v8, 0
146 ; RV32V-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
147 ; RV32V-NEXT: vnsrl.wi v8, v8, 0
148 ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t
151 ; RV64-LABEL: mscatter_v2i32_truncstore_v2i8:
153 ; RV64-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
154 ; RV64-NEXT: vnsrl.wi v8, v8, 0
155 ; RV64-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
156 ; RV64-NEXT: vnsrl.wi v8, v8, 0
157 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
160 ; RV32ZVE32F-LABEL: mscatter_v2i32_truncstore_v2i8:
161 ; RV32ZVE32F: # %bb.0:
162 ; RV32ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
163 ; RV32ZVE32F-NEXT: vnsrl.wi v8, v8, 0
164 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
165 ; RV32ZVE32F-NEXT: vnsrl.wi v8, v8, 0
166 ; RV32ZVE32F-NEXT: vsoxei32.v v8, (zero), v9, v0.t
167 ; RV32ZVE32F-NEXT: ret
169 ; RV64ZVE32F-LABEL: mscatter_v2i32_truncstore_v2i8:
170 ; RV64ZVE32F: # %bb.0:
171 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
172 ; RV64ZVE32F-NEXT: vnsrl.wi v8, v8, 0
173 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
174 ; RV64ZVE32F-NEXT: vmv.x.s a2, v0
175 ; RV64ZVE32F-NEXT: andi a3, a2, 1
176 ; RV64ZVE32F-NEXT: vnsrl.wi v8, v8, 0
177 ; RV64ZVE32F-NEXT: bnez a3, .LBB3_3
178 ; RV64ZVE32F-NEXT: # %bb.1: # %else
179 ; RV64ZVE32F-NEXT: andi a2, a2, 2
180 ; RV64ZVE32F-NEXT: bnez a2, .LBB3_4
181 ; RV64ZVE32F-NEXT: .LBB3_2: # %else2
182 ; RV64ZVE32F-NEXT: ret
183 ; RV64ZVE32F-NEXT: .LBB3_3: # %cond.store
184 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
185 ; RV64ZVE32F-NEXT: vse8.v v8, (a0)
186 ; RV64ZVE32F-NEXT: andi a2, a2, 2
187 ; RV64ZVE32F-NEXT: beqz a2, .LBB3_2
188 ; RV64ZVE32F-NEXT: .LBB3_4: # %cond.store1
189 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
190 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
191 ; RV64ZVE32F-NEXT: vse8.v v8, (a1)
192 ; RV64ZVE32F-NEXT: ret
193 %tval = trunc <2 x i32> %val to <2 x i8>
194 call void @llvm.masked.scatter.v2i8.v2p0(<2 x i8> %tval, <2 x ptr> %ptrs, i32 1, <2 x i1> %m)
198 define void @mscatter_v2i64_truncstore_v2i8(<2 x i64> %val, <2 x ptr> %ptrs, <2 x i1> %m) {
199 ; RV32V-LABEL: mscatter_v2i64_truncstore_v2i8:
201 ; RV32V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
202 ; RV32V-NEXT: vnsrl.wi v8, v8, 0
203 ; RV32V-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
204 ; RV32V-NEXT: vnsrl.wi v8, v8, 0
205 ; RV32V-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
206 ; RV32V-NEXT: vnsrl.wi v8, v8, 0
207 ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t
210 ; RV64-LABEL: mscatter_v2i64_truncstore_v2i8:
212 ; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
213 ; RV64-NEXT: vnsrl.wi v8, v8, 0
214 ; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
215 ; RV64-NEXT: vnsrl.wi v8, v8, 0
216 ; RV64-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
217 ; RV64-NEXT: vnsrl.wi v8, v8, 0
218 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
221 ; RV32ZVE32F-LABEL: mscatter_v2i64_truncstore_v2i8:
222 ; RV32ZVE32F: # %bb.0:
223 ; RV32ZVE32F-NEXT: lw a1, 8(a0)
224 ; RV32ZVE32F-NEXT: lw a0, 0(a0)
225 ; RV32ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
226 ; RV32ZVE32F-NEXT: vmv.s.x v9, a1
227 ; RV32ZVE32F-NEXT: vmv.s.x v10, a0
228 ; RV32ZVE32F-NEXT: vslideup.vi v10, v9, 1
229 ; RV32ZVE32F-NEXT: vsoxei32.v v10, (zero), v8, v0.t
230 ; RV32ZVE32F-NEXT: ret
232 ; RV64ZVE32F-LABEL: mscatter_v2i64_truncstore_v2i8:
233 ; RV64ZVE32F: # %bb.0:
234 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
235 ; RV64ZVE32F-NEXT: vmv.s.x v9, a1
236 ; RV64ZVE32F-NEXT: vmv.s.x v8, a0
237 ; RV64ZVE32F-NEXT: vmv.x.s a0, v0
238 ; RV64ZVE32F-NEXT: andi a1, a0, 1
239 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
240 ; RV64ZVE32F-NEXT: bnez a1, .LBB4_3
241 ; RV64ZVE32F-NEXT: # %bb.1: # %else
242 ; RV64ZVE32F-NEXT: andi a0, a0, 2
243 ; RV64ZVE32F-NEXT: bnez a0, .LBB4_4
244 ; RV64ZVE32F-NEXT: .LBB4_2: # %else2
245 ; RV64ZVE32F-NEXT: ret
246 ; RV64ZVE32F-NEXT: .LBB4_3: # %cond.store
247 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
248 ; RV64ZVE32F-NEXT: vse8.v v8, (a2)
249 ; RV64ZVE32F-NEXT: andi a0, a0, 2
250 ; RV64ZVE32F-NEXT: beqz a0, .LBB4_2
251 ; RV64ZVE32F-NEXT: .LBB4_4: # %cond.store1
252 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
253 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
254 ; RV64ZVE32F-NEXT: vse8.v v8, (a3)
255 ; RV64ZVE32F-NEXT: ret
256 %tval = trunc <2 x i64> %val to <2 x i8>
257 call void @llvm.masked.scatter.v2i8.v2p0(<2 x i8> %tval, <2 x ptr> %ptrs, i32 1, <2 x i1> %m)
261 declare void @llvm.masked.scatter.v4i8.v4p0(<4 x i8>, <4 x ptr>, i32, <4 x i1>)
263 define void @mscatter_v4i8(<4 x i8> %val, <4 x ptr> %ptrs, <4 x i1> %m) {
264 ; RV32-LABEL: mscatter_v4i8:
266 ; RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
267 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
270 ; RV64-LABEL: mscatter_v4i8:
272 ; RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
273 ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
276 ; RV64ZVE32F-LABEL: mscatter_v4i8:
277 ; RV64ZVE32F: # %bb.0:
278 ; RV64ZVE32F-NEXT: ld a1, 24(a0)
279 ; RV64ZVE32F-NEXT: ld a2, 16(a0)
280 ; RV64ZVE32F-NEXT: ld a4, 8(a0)
281 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
282 ; RV64ZVE32F-NEXT: vmv.x.s a3, v0
283 ; RV64ZVE32F-NEXT: andi a5, a3, 1
284 ; RV64ZVE32F-NEXT: bnez a5, .LBB5_5
285 ; RV64ZVE32F-NEXT: # %bb.1: # %else
286 ; RV64ZVE32F-NEXT: andi a0, a3, 2
287 ; RV64ZVE32F-NEXT: bnez a0, .LBB5_6
288 ; RV64ZVE32F-NEXT: .LBB5_2: # %else2
289 ; RV64ZVE32F-NEXT: andi a0, a3, 4
290 ; RV64ZVE32F-NEXT: bnez a0, .LBB5_7
291 ; RV64ZVE32F-NEXT: .LBB5_3: # %else4
292 ; RV64ZVE32F-NEXT: andi a3, a3, 8
293 ; RV64ZVE32F-NEXT: bnez a3, .LBB5_8
294 ; RV64ZVE32F-NEXT: .LBB5_4: # %else6
295 ; RV64ZVE32F-NEXT: ret
296 ; RV64ZVE32F-NEXT: .LBB5_5: # %cond.store
297 ; RV64ZVE32F-NEXT: ld a0, 0(a0)
298 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
299 ; RV64ZVE32F-NEXT: vse8.v v8, (a0)
300 ; RV64ZVE32F-NEXT: andi a0, a3, 2
301 ; RV64ZVE32F-NEXT: beqz a0, .LBB5_2
302 ; RV64ZVE32F-NEXT: .LBB5_6: # %cond.store1
303 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
304 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
305 ; RV64ZVE32F-NEXT: vse8.v v9, (a4)
306 ; RV64ZVE32F-NEXT: andi a0, a3, 4
307 ; RV64ZVE32F-NEXT: beqz a0, .LBB5_3
308 ; RV64ZVE32F-NEXT: .LBB5_7: # %cond.store3
309 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
310 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
311 ; RV64ZVE32F-NEXT: vse8.v v9, (a2)
312 ; RV64ZVE32F-NEXT: andi a3, a3, 8
313 ; RV64ZVE32F-NEXT: beqz a3, .LBB5_4
314 ; RV64ZVE32F-NEXT: .LBB5_8: # %cond.store5
315 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
316 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 3
317 ; RV64ZVE32F-NEXT: vse8.v v8, (a1)
318 ; RV64ZVE32F-NEXT: ret
319 call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> %m)
323 define void @mscatter_truemask_v4i8(<4 x i8> %val, <4 x ptr> %ptrs) {
324 ; RV32-LABEL: mscatter_truemask_v4i8:
326 ; RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
327 ; RV32-NEXT: vsoxei32.v v8, (zero), v9
330 ; RV64-LABEL: mscatter_truemask_v4i8:
332 ; RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
333 ; RV64-NEXT: vsoxei64.v v8, (zero), v10
336 ; RV64ZVE32F-LABEL: mscatter_truemask_v4i8:
337 ; RV64ZVE32F: # %bb.0:
338 ; RV64ZVE32F-NEXT: ld a1, 0(a0)
339 ; RV64ZVE32F-NEXT: ld a2, 24(a0)
340 ; RV64ZVE32F-NEXT: ld a3, 8(a0)
341 ; RV64ZVE32F-NEXT: ld a0, 16(a0)
342 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
343 ; RV64ZVE32F-NEXT: vse8.v v8, (a1)
344 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
345 ; RV64ZVE32F-NEXT: vse8.v v9, (a3)
346 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
347 ; RV64ZVE32F-NEXT: vse8.v v9, (a0)
348 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 3
349 ; RV64ZVE32F-NEXT: vse8.v v8, (a2)
350 ; RV64ZVE32F-NEXT: ret
351 call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> splat (i1 1))
355 define void @mscatter_falsemask_v4i8(<4 x i8> %val, <4 x ptr> %ptrs) {
356 ; CHECK-LABEL: mscatter_falsemask_v4i8:
359 call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> zeroinitializer)
363 declare void @llvm.masked.scatter.v8i8.v8p0(<8 x i8>, <8 x ptr>, i32, <8 x i1>)
365 define void @mscatter_v8i8(<8 x i8> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
366 ; RV32-LABEL: mscatter_v8i8:
368 ; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
369 ; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t
372 ; RV64-LABEL: mscatter_v8i8:
374 ; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
375 ; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t
378 ; RV64ZVE32F-LABEL: mscatter_v8i8:
379 ; RV64ZVE32F: # %bb.0:
380 ; RV64ZVE32F-NEXT: ld a1, 56(a0)
381 ; RV64ZVE32F-NEXT: ld a2, 48(a0)
382 ; RV64ZVE32F-NEXT: ld a4, 40(a0)
383 ; RV64ZVE32F-NEXT: ld a5, 32(a0)
384 ; RV64ZVE32F-NEXT: ld a6, 24(a0)
385 ; RV64ZVE32F-NEXT: ld a7, 16(a0)
386 ; RV64ZVE32F-NEXT: ld t0, 8(a0)
387 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
388 ; RV64ZVE32F-NEXT: vmv.x.s a3, v0
389 ; RV64ZVE32F-NEXT: andi t1, a3, 1
390 ; RV64ZVE32F-NEXT: bnez t1, .LBB8_9
391 ; RV64ZVE32F-NEXT: # %bb.1: # %else
392 ; RV64ZVE32F-NEXT: andi a0, a3, 2
393 ; RV64ZVE32F-NEXT: bnez a0, .LBB8_10
394 ; RV64ZVE32F-NEXT: .LBB8_2: # %else2
395 ; RV64ZVE32F-NEXT: andi a0, a3, 4
396 ; RV64ZVE32F-NEXT: bnez a0, .LBB8_11
397 ; RV64ZVE32F-NEXT: .LBB8_3: # %else4
398 ; RV64ZVE32F-NEXT: andi a0, a3, 8
399 ; RV64ZVE32F-NEXT: bnez a0, .LBB8_12
400 ; RV64ZVE32F-NEXT: .LBB8_4: # %else6
401 ; RV64ZVE32F-NEXT: andi a0, a3, 16
402 ; RV64ZVE32F-NEXT: bnez a0, .LBB8_13
403 ; RV64ZVE32F-NEXT: .LBB8_5: # %else8
404 ; RV64ZVE32F-NEXT: andi a0, a3, 32
405 ; RV64ZVE32F-NEXT: bnez a0, .LBB8_14
406 ; RV64ZVE32F-NEXT: .LBB8_6: # %else10
407 ; RV64ZVE32F-NEXT: andi a0, a3, 64
408 ; RV64ZVE32F-NEXT: bnez a0, .LBB8_15
409 ; RV64ZVE32F-NEXT: .LBB8_7: # %else12
410 ; RV64ZVE32F-NEXT: andi a0, a3, -128
411 ; RV64ZVE32F-NEXT: bnez a0, .LBB8_16
412 ; RV64ZVE32F-NEXT: .LBB8_8: # %else14
413 ; RV64ZVE32F-NEXT: ret
414 ; RV64ZVE32F-NEXT: .LBB8_9: # %cond.store
415 ; RV64ZVE32F-NEXT: ld a0, 0(a0)
416 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
417 ; RV64ZVE32F-NEXT: vse8.v v8, (a0)
418 ; RV64ZVE32F-NEXT: andi a0, a3, 2
419 ; RV64ZVE32F-NEXT: beqz a0, .LBB8_2
420 ; RV64ZVE32F-NEXT: .LBB8_10: # %cond.store1
421 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
422 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
423 ; RV64ZVE32F-NEXT: vse8.v v9, (t0)
424 ; RV64ZVE32F-NEXT: andi a0, a3, 4
425 ; RV64ZVE32F-NEXT: beqz a0, .LBB8_3
426 ; RV64ZVE32F-NEXT: .LBB8_11: # %cond.store3
427 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
428 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
429 ; RV64ZVE32F-NEXT: vse8.v v9, (a7)
430 ; RV64ZVE32F-NEXT: andi a0, a3, 8
431 ; RV64ZVE32F-NEXT: beqz a0, .LBB8_4
432 ; RV64ZVE32F-NEXT: .LBB8_12: # %cond.store5
433 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
434 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 3
435 ; RV64ZVE32F-NEXT: vse8.v v9, (a6)
436 ; RV64ZVE32F-NEXT: andi a0, a3, 16
437 ; RV64ZVE32F-NEXT: beqz a0, .LBB8_5
438 ; RV64ZVE32F-NEXT: .LBB8_13: # %cond.store7
439 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
440 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
441 ; RV64ZVE32F-NEXT: vse8.v v9, (a5)
442 ; RV64ZVE32F-NEXT: andi a0, a3, 32
443 ; RV64ZVE32F-NEXT: beqz a0, .LBB8_6
444 ; RV64ZVE32F-NEXT: .LBB8_14: # %cond.store9
445 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
446 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5
447 ; RV64ZVE32F-NEXT: vse8.v v9, (a4)
448 ; RV64ZVE32F-NEXT: andi a0, a3, 64
449 ; RV64ZVE32F-NEXT: beqz a0, .LBB8_7
450 ; RV64ZVE32F-NEXT: .LBB8_15: # %cond.store11
451 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
452 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 6
453 ; RV64ZVE32F-NEXT: vse8.v v9, (a2)
454 ; RV64ZVE32F-NEXT: andi a0, a3, -128
455 ; RV64ZVE32F-NEXT: beqz a0, .LBB8_8
456 ; RV64ZVE32F-NEXT: .LBB8_16: # %cond.store13
457 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
458 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
459 ; RV64ZVE32F-NEXT: vse8.v v8, (a1)
460 ; RV64ZVE32F-NEXT: ret
461 call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> %val, <8 x ptr> %ptrs, i32 1, <8 x i1> %m)
465 define void @mscatter_baseidx_v8i8(<8 x i8> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) {
466 ; RV32-LABEL: mscatter_baseidx_v8i8:
468 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
469 ; RV32-NEXT: vsext.vf4 v10, v9
470 ; RV32-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
471 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
474 ; RV64-LABEL: mscatter_baseidx_v8i8:
476 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
477 ; RV64-NEXT: vsext.vf8 v12, v9
478 ; RV64-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
479 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
482 ; RV64ZVE32F-LABEL: mscatter_baseidx_v8i8:
483 ; RV64ZVE32F: # %bb.0:
484 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
485 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
486 ; RV64ZVE32F-NEXT: andi a2, a1, 1
487 ; RV64ZVE32F-NEXT: beqz a2, .LBB9_2
488 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
489 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
490 ; RV64ZVE32F-NEXT: add a2, a0, a2
491 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
492 ; RV64ZVE32F-NEXT: vse8.v v8, (a2)
493 ; RV64ZVE32F-NEXT: .LBB9_2: # %else
494 ; RV64ZVE32F-NEXT: andi a2, a1, 2
495 ; RV64ZVE32F-NEXT: beqz a2, .LBB9_4
496 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
497 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
498 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1
499 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
500 ; RV64ZVE32F-NEXT: add a2, a0, a2
501 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
502 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
503 ; RV64ZVE32F-NEXT: vse8.v v10, (a2)
504 ; RV64ZVE32F-NEXT: .LBB9_4: # %else2
505 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
506 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4
507 ; RV64ZVE32F-NEXT: andi a2, a1, 4
508 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
509 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2
510 ; RV64ZVE32F-NEXT: bnez a2, .LBB9_12
511 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
512 ; RV64ZVE32F-NEXT: andi a2, a1, 8
513 ; RV64ZVE32F-NEXT: bnez a2, .LBB9_13
514 ; RV64ZVE32F-NEXT: .LBB9_6: # %else6
515 ; RV64ZVE32F-NEXT: andi a2, a1, 16
516 ; RV64ZVE32F-NEXT: bnez a2, .LBB9_14
517 ; RV64ZVE32F-NEXT: .LBB9_7: # %else8
518 ; RV64ZVE32F-NEXT: andi a2, a1, 32
519 ; RV64ZVE32F-NEXT: beqz a2, .LBB9_9
520 ; RV64ZVE32F-NEXT: .LBB9_8: # %cond.store9
521 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
522 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 1
523 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
524 ; RV64ZVE32F-NEXT: add a2, a0, a2
525 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
526 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5
527 ; RV64ZVE32F-NEXT: vse8.v v9, (a2)
528 ; RV64ZVE32F-NEXT: .LBB9_9: # %else10
529 ; RV64ZVE32F-NEXT: andi a2, a1, 64
530 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
531 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 2
532 ; RV64ZVE32F-NEXT: bnez a2, .LBB9_15
533 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
534 ; RV64ZVE32F-NEXT: andi a1, a1, -128
535 ; RV64ZVE32F-NEXT: bnez a1, .LBB9_16
536 ; RV64ZVE32F-NEXT: .LBB9_11: # %else14
537 ; RV64ZVE32F-NEXT: ret
538 ; RV64ZVE32F-NEXT: .LBB9_12: # %cond.store3
539 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
540 ; RV64ZVE32F-NEXT: add a2, a0, a2
541 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
542 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2
543 ; RV64ZVE32F-NEXT: vse8.v v11, (a2)
544 ; RV64ZVE32F-NEXT: andi a2, a1, 8
545 ; RV64ZVE32F-NEXT: beqz a2, .LBB9_6
546 ; RV64ZVE32F-NEXT: .LBB9_13: # %cond.store5
547 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
548 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
549 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
550 ; RV64ZVE32F-NEXT: add a2, a0, a2
551 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
552 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 3
553 ; RV64ZVE32F-NEXT: vse8.v v9, (a2)
554 ; RV64ZVE32F-NEXT: andi a2, a1, 16
555 ; RV64ZVE32F-NEXT: beqz a2, .LBB9_7
556 ; RV64ZVE32F-NEXT: .LBB9_14: # %cond.store7
557 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
558 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
559 ; RV64ZVE32F-NEXT: add a2, a0, a2
560 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
561 ; RV64ZVE32F-NEXT: vse8.v v9, (a2)
562 ; RV64ZVE32F-NEXT: andi a2, a1, 32
563 ; RV64ZVE32F-NEXT: bnez a2, .LBB9_8
564 ; RV64ZVE32F-NEXT: j .LBB9_9
565 ; RV64ZVE32F-NEXT: .LBB9_15: # %cond.store11
566 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
567 ; RV64ZVE32F-NEXT: add a2, a0, a2
568 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
569 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 6
570 ; RV64ZVE32F-NEXT: vse8.v v10, (a2)
571 ; RV64ZVE32F-NEXT: andi a1, a1, -128
572 ; RV64ZVE32F-NEXT: beqz a1, .LBB9_11
573 ; RV64ZVE32F-NEXT: .LBB9_16: # %cond.store13
574 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
575 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
576 ; RV64ZVE32F-NEXT: vmv.x.s a1, v9
577 ; RV64ZVE32F-NEXT: add a0, a0, a1
578 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
579 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
580 ; RV64ZVE32F-NEXT: vse8.v v8, (a0)
581 ; RV64ZVE32F-NEXT: ret
582 %ptrs = getelementptr inbounds i8, ptr %base, <8 x i8> %idxs
583 call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> %val, <8 x ptr> %ptrs, i32 1, <8 x i1> %m)
587 declare void @llvm.masked.scatter.v1i16.v1p0(<1 x i16>, <1 x ptr>, i32, <1 x i1>)
589 define void @mscatter_v1i16(<1 x i16> %val, <1 x ptr> %ptrs, <1 x i1> %m) {
590 ; RV32V-LABEL: mscatter_v1i16:
592 ; RV32V-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
593 ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t
596 ; RV64-LABEL: mscatter_v1i16:
598 ; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
599 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
602 ; RV32ZVE32F-LABEL: mscatter_v1i16:
603 ; RV32ZVE32F: # %bb.0:
604 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
605 ; RV32ZVE32F-NEXT: vsoxei32.v v8, (zero), v9, v0.t
606 ; RV32ZVE32F-NEXT: ret
608 ; RV64ZVE32F-LABEL: mscatter_v1i16:
609 ; RV64ZVE32F: # %bb.0:
610 ; RV64ZVE32F-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
611 ; RV64ZVE32F-NEXT: vfirst.m a1, v0
612 ; RV64ZVE32F-NEXT: bnez a1, .LBB10_2
613 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
614 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
615 ; RV64ZVE32F-NEXT: vse16.v v8, (a0)
616 ; RV64ZVE32F-NEXT: .LBB10_2: # %else
617 ; RV64ZVE32F-NEXT: ret
618 call void @llvm.masked.scatter.v1i16.v1p0(<1 x i16> %val, <1 x ptr> %ptrs, i32 2, <1 x i1> %m)
622 declare void @llvm.masked.scatter.v2i16.v2p0(<2 x i16>, <2 x ptr>, i32, <2 x i1>)
624 define void @mscatter_v2i16(<2 x i16> %val, <2 x ptr> %ptrs, <2 x i1> %m) {
625 ; RV32V-LABEL: mscatter_v2i16:
627 ; RV32V-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
628 ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t
631 ; RV64-LABEL: mscatter_v2i16:
633 ; RV64-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
634 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
637 ; RV32ZVE32F-LABEL: mscatter_v2i16:
638 ; RV32ZVE32F: # %bb.0:
639 ; RV32ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
640 ; RV32ZVE32F-NEXT: vsoxei32.v v8, (zero), v9, v0.t
641 ; RV32ZVE32F-NEXT: ret
643 ; RV64ZVE32F-LABEL: mscatter_v2i16:
644 ; RV64ZVE32F: # %bb.0:
645 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
646 ; RV64ZVE32F-NEXT: vmv.x.s a2, v0
647 ; RV64ZVE32F-NEXT: andi a3, a2, 1
648 ; RV64ZVE32F-NEXT: bnez a3, .LBB11_3
649 ; RV64ZVE32F-NEXT: # %bb.1: # %else
650 ; RV64ZVE32F-NEXT: andi a2, a2, 2
651 ; RV64ZVE32F-NEXT: bnez a2, .LBB11_4
652 ; RV64ZVE32F-NEXT: .LBB11_2: # %else2
653 ; RV64ZVE32F-NEXT: ret
654 ; RV64ZVE32F-NEXT: .LBB11_3: # %cond.store
655 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
656 ; RV64ZVE32F-NEXT: vse16.v v8, (a0)
657 ; RV64ZVE32F-NEXT: andi a2, a2, 2
658 ; RV64ZVE32F-NEXT: beqz a2, .LBB11_2
659 ; RV64ZVE32F-NEXT: .LBB11_4: # %cond.store1
660 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
661 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
662 ; RV64ZVE32F-NEXT: vse16.v v8, (a1)
663 ; RV64ZVE32F-NEXT: ret
664 call void @llvm.masked.scatter.v2i16.v2p0(<2 x i16> %val, <2 x ptr> %ptrs, i32 2, <2 x i1> %m)
668 define void @mscatter_v2i32_truncstore_v2i16(<2 x i32> %val, <2 x ptr> %ptrs, <2 x i1> %m) {
669 ; RV32V-LABEL: mscatter_v2i32_truncstore_v2i16:
671 ; RV32V-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
672 ; RV32V-NEXT: vnsrl.wi v8, v8, 0
673 ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t
676 ; RV64-LABEL: mscatter_v2i32_truncstore_v2i16:
678 ; RV64-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
679 ; RV64-NEXT: vnsrl.wi v8, v8, 0
680 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
683 ; RV32ZVE32F-LABEL: mscatter_v2i32_truncstore_v2i16:
684 ; RV32ZVE32F: # %bb.0:
685 ; RV32ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
686 ; RV32ZVE32F-NEXT: vnsrl.wi v8, v8, 0
687 ; RV32ZVE32F-NEXT: vsoxei32.v v8, (zero), v9, v0.t
688 ; RV32ZVE32F-NEXT: ret
690 ; RV64ZVE32F-LABEL: mscatter_v2i32_truncstore_v2i16:
691 ; RV64ZVE32F: # %bb.0:
692 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
693 ; RV64ZVE32F-NEXT: vmv.x.s a2, v0
694 ; RV64ZVE32F-NEXT: andi a3, a2, 1
695 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
696 ; RV64ZVE32F-NEXT: vnsrl.wi v8, v8, 0
697 ; RV64ZVE32F-NEXT: bnez a3, .LBB12_3
698 ; RV64ZVE32F-NEXT: # %bb.1: # %else
699 ; RV64ZVE32F-NEXT: andi a2, a2, 2
700 ; RV64ZVE32F-NEXT: bnez a2, .LBB12_4
701 ; RV64ZVE32F-NEXT: .LBB12_2: # %else2
702 ; RV64ZVE32F-NEXT: ret
703 ; RV64ZVE32F-NEXT: .LBB12_3: # %cond.store
704 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
705 ; RV64ZVE32F-NEXT: vse16.v v8, (a0)
706 ; RV64ZVE32F-NEXT: andi a2, a2, 2
707 ; RV64ZVE32F-NEXT: beqz a2, .LBB12_2
708 ; RV64ZVE32F-NEXT: .LBB12_4: # %cond.store1
709 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
710 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
711 ; RV64ZVE32F-NEXT: vse16.v v8, (a1)
712 ; RV64ZVE32F-NEXT: ret
713 %tval = trunc <2 x i32> %val to <2 x i16>
714 call void @llvm.masked.scatter.v2i16.v2p0(<2 x i16> %tval, <2 x ptr> %ptrs, i32 2, <2 x i1> %m)
718 define void @mscatter_v2i64_truncstore_v2i16(<2 x i64> %val, <2 x ptr> %ptrs, <2 x i1> %m) {
719 ; RV32V-LABEL: mscatter_v2i64_truncstore_v2i16:
721 ; RV32V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
722 ; RV32V-NEXT: vnsrl.wi v8, v8, 0
723 ; RV32V-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
724 ; RV32V-NEXT: vnsrl.wi v8, v8, 0
725 ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t
728 ; RV64-LABEL: mscatter_v2i64_truncstore_v2i16:
730 ; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
731 ; RV64-NEXT: vnsrl.wi v8, v8, 0
732 ; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
733 ; RV64-NEXT: vnsrl.wi v8, v8, 0
734 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
737 ; RV32ZVE32F-LABEL: mscatter_v2i64_truncstore_v2i16:
738 ; RV32ZVE32F: # %bb.0:
739 ; RV32ZVE32F-NEXT: lw a1, 8(a0)
740 ; RV32ZVE32F-NEXT: lw a0, 0(a0)
741 ; RV32ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
742 ; RV32ZVE32F-NEXT: vmv.s.x v9, a1
743 ; RV32ZVE32F-NEXT: vmv.s.x v10, a0
744 ; RV32ZVE32F-NEXT: vslideup.vi v10, v9, 1
745 ; RV32ZVE32F-NEXT: vsoxei32.v v10, (zero), v8, v0.t
746 ; RV32ZVE32F-NEXT: ret
748 ; RV64ZVE32F-LABEL: mscatter_v2i64_truncstore_v2i16:
749 ; RV64ZVE32F: # %bb.0:
750 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
751 ; RV64ZVE32F-NEXT: vmv.s.x v9, a1
752 ; RV64ZVE32F-NEXT: vmv.s.x v8, a0
753 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
754 ; RV64ZVE32F-NEXT: vmv.x.s a0, v0
755 ; RV64ZVE32F-NEXT: andi a1, a0, 1
756 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
757 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
758 ; RV64ZVE32F-NEXT: bnez a1, .LBB13_3
759 ; RV64ZVE32F-NEXT: # %bb.1: # %else
760 ; RV64ZVE32F-NEXT: andi a0, a0, 2
761 ; RV64ZVE32F-NEXT: bnez a0, .LBB13_4
762 ; RV64ZVE32F-NEXT: .LBB13_2: # %else2
763 ; RV64ZVE32F-NEXT: ret
764 ; RV64ZVE32F-NEXT: .LBB13_3: # %cond.store
765 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
766 ; RV64ZVE32F-NEXT: vse16.v v8, (a2)
767 ; RV64ZVE32F-NEXT: andi a0, a0, 2
768 ; RV64ZVE32F-NEXT: beqz a0, .LBB13_2
769 ; RV64ZVE32F-NEXT: .LBB13_4: # %cond.store1
770 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
771 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
772 ; RV64ZVE32F-NEXT: vse16.v v8, (a3)
773 ; RV64ZVE32F-NEXT: ret
774 %tval = trunc <2 x i64> %val to <2 x i16>
775 call void @llvm.masked.scatter.v2i16.v2p0(<2 x i16> %tval, <2 x ptr> %ptrs, i32 2, <2 x i1> %m)
779 declare void @llvm.masked.scatter.v4i16.v4p0(<4 x i16>, <4 x ptr>, i32, <4 x i1>)
781 define void @mscatter_v4i16(<4 x i16> %val, <4 x ptr> %ptrs, <4 x i1> %m) {
782 ; RV32-LABEL: mscatter_v4i16:
784 ; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
785 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
788 ; RV64-LABEL: mscatter_v4i16:
790 ; RV64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
791 ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
794 ; RV64ZVE32F-LABEL: mscatter_v4i16:
795 ; RV64ZVE32F: # %bb.0:
796 ; RV64ZVE32F-NEXT: ld a1, 24(a0)
797 ; RV64ZVE32F-NEXT: ld a2, 16(a0)
798 ; RV64ZVE32F-NEXT: ld a4, 8(a0)
799 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
800 ; RV64ZVE32F-NEXT: vmv.x.s a3, v0
801 ; RV64ZVE32F-NEXT: andi a5, a3, 1
802 ; RV64ZVE32F-NEXT: bnez a5, .LBB14_5
803 ; RV64ZVE32F-NEXT: # %bb.1: # %else
804 ; RV64ZVE32F-NEXT: andi a0, a3, 2
805 ; RV64ZVE32F-NEXT: bnez a0, .LBB14_6
806 ; RV64ZVE32F-NEXT: .LBB14_2: # %else2
807 ; RV64ZVE32F-NEXT: andi a0, a3, 4
808 ; RV64ZVE32F-NEXT: bnez a0, .LBB14_7
809 ; RV64ZVE32F-NEXT: .LBB14_3: # %else4
810 ; RV64ZVE32F-NEXT: andi a3, a3, 8
811 ; RV64ZVE32F-NEXT: bnez a3, .LBB14_8
812 ; RV64ZVE32F-NEXT: .LBB14_4: # %else6
813 ; RV64ZVE32F-NEXT: ret
814 ; RV64ZVE32F-NEXT: .LBB14_5: # %cond.store
815 ; RV64ZVE32F-NEXT: ld a0, 0(a0)
816 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
817 ; RV64ZVE32F-NEXT: vse16.v v8, (a0)
818 ; RV64ZVE32F-NEXT: andi a0, a3, 2
819 ; RV64ZVE32F-NEXT: beqz a0, .LBB14_2
820 ; RV64ZVE32F-NEXT: .LBB14_6: # %cond.store1
821 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
822 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
823 ; RV64ZVE32F-NEXT: vse16.v v9, (a4)
824 ; RV64ZVE32F-NEXT: andi a0, a3, 4
825 ; RV64ZVE32F-NEXT: beqz a0, .LBB14_3
826 ; RV64ZVE32F-NEXT: .LBB14_7: # %cond.store3
827 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
828 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
829 ; RV64ZVE32F-NEXT: vse16.v v9, (a2)
830 ; RV64ZVE32F-NEXT: andi a3, a3, 8
831 ; RV64ZVE32F-NEXT: beqz a3, .LBB14_4
832 ; RV64ZVE32F-NEXT: .LBB14_8: # %cond.store5
833 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
834 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 3
835 ; RV64ZVE32F-NEXT: vse16.v v8, (a1)
836 ; RV64ZVE32F-NEXT: ret
837 call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> %val, <4 x ptr> %ptrs, i32 2, <4 x i1> %m)
841 define void @mscatter_truemask_v4i16(<4 x i16> %val, <4 x ptr> %ptrs) {
842 ; RV32-LABEL: mscatter_truemask_v4i16:
844 ; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
845 ; RV32-NEXT: vsoxei32.v v8, (zero), v9
848 ; RV64-LABEL: mscatter_truemask_v4i16:
850 ; RV64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
851 ; RV64-NEXT: vsoxei64.v v8, (zero), v10
854 ; RV64ZVE32F-LABEL: mscatter_truemask_v4i16:
855 ; RV64ZVE32F: # %bb.0:
856 ; RV64ZVE32F-NEXT: ld a1, 0(a0)
857 ; RV64ZVE32F-NEXT: ld a2, 24(a0)
858 ; RV64ZVE32F-NEXT: ld a3, 8(a0)
859 ; RV64ZVE32F-NEXT: ld a0, 16(a0)
860 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
861 ; RV64ZVE32F-NEXT: vse16.v v8, (a1)
862 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
863 ; RV64ZVE32F-NEXT: vse16.v v9, (a3)
864 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
865 ; RV64ZVE32F-NEXT: vse16.v v9, (a0)
866 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 3
867 ; RV64ZVE32F-NEXT: vse16.v v8, (a2)
868 ; RV64ZVE32F-NEXT: ret
869 call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> %val, <4 x ptr> %ptrs, i32 2, <4 x i1> splat (i1 1))
873 define void @mscatter_falsemask_v4i16(<4 x i16> %val, <4 x ptr> %ptrs) {
874 ; CHECK-LABEL: mscatter_falsemask_v4i16:
877 call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> %val, <4 x ptr> %ptrs, i32 2, <4 x i1> zeroinitializer)
881 declare void @llvm.masked.scatter.v8i16.v8p0(<8 x i16>, <8 x ptr>, i32, <8 x i1>)
883 define void @mscatter_v8i16(<8 x i16> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
884 ; RV32-LABEL: mscatter_v8i16:
886 ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
887 ; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t
890 ; RV64-LABEL: mscatter_v8i16:
892 ; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
893 ; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t
896 ; RV64ZVE32F-LABEL: mscatter_v8i16:
897 ; RV64ZVE32F: # %bb.0:
898 ; RV64ZVE32F-NEXT: ld a1, 56(a0)
899 ; RV64ZVE32F-NEXT: ld a2, 48(a0)
900 ; RV64ZVE32F-NEXT: ld a4, 40(a0)
901 ; RV64ZVE32F-NEXT: ld a5, 32(a0)
902 ; RV64ZVE32F-NEXT: ld a6, 24(a0)
903 ; RV64ZVE32F-NEXT: ld a7, 16(a0)
904 ; RV64ZVE32F-NEXT: ld t0, 8(a0)
905 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
906 ; RV64ZVE32F-NEXT: vmv.x.s a3, v0
907 ; RV64ZVE32F-NEXT: andi t1, a3, 1
908 ; RV64ZVE32F-NEXT: bnez t1, .LBB17_9
909 ; RV64ZVE32F-NEXT: # %bb.1: # %else
910 ; RV64ZVE32F-NEXT: andi a0, a3, 2
911 ; RV64ZVE32F-NEXT: bnez a0, .LBB17_10
912 ; RV64ZVE32F-NEXT: .LBB17_2: # %else2
913 ; RV64ZVE32F-NEXT: andi a0, a3, 4
914 ; RV64ZVE32F-NEXT: bnez a0, .LBB17_11
915 ; RV64ZVE32F-NEXT: .LBB17_3: # %else4
916 ; RV64ZVE32F-NEXT: andi a0, a3, 8
917 ; RV64ZVE32F-NEXT: bnez a0, .LBB17_12
918 ; RV64ZVE32F-NEXT: .LBB17_4: # %else6
919 ; RV64ZVE32F-NEXT: andi a0, a3, 16
920 ; RV64ZVE32F-NEXT: bnez a0, .LBB17_13
921 ; RV64ZVE32F-NEXT: .LBB17_5: # %else8
922 ; RV64ZVE32F-NEXT: andi a0, a3, 32
923 ; RV64ZVE32F-NEXT: bnez a0, .LBB17_14
924 ; RV64ZVE32F-NEXT: .LBB17_6: # %else10
925 ; RV64ZVE32F-NEXT: andi a0, a3, 64
926 ; RV64ZVE32F-NEXT: bnez a0, .LBB17_15
927 ; RV64ZVE32F-NEXT: .LBB17_7: # %else12
928 ; RV64ZVE32F-NEXT: andi a0, a3, -128
929 ; RV64ZVE32F-NEXT: bnez a0, .LBB17_16
930 ; RV64ZVE32F-NEXT: .LBB17_8: # %else14
931 ; RV64ZVE32F-NEXT: ret
932 ; RV64ZVE32F-NEXT: .LBB17_9: # %cond.store
933 ; RV64ZVE32F-NEXT: ld a0, 0(a0)
934 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
935 ; RV64ZVE32F-NEXT: vse16.v v8, (a0)
936 ; RV64ZVE32F-NEXT: andi a0, a3, 2
937 ; RV64ZVE32F-NEXT: beqz a0, .LBB17_2
938 ; RV64ZVE32F-NEXT: .LBB17_10: # %cond.store1
939 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
940 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
941 ; RV64ZVE32F-NEXT: vse16.v v9, (t0)
942 ; RV64ZVE32F-NEXT: andi a0, a3, 4
943 ; RV64ZVE32F-NEXT: beqz a0, .LBB17_3
944 ; RV64ZVE32F-NEXT: .LBB17_11: # %cond.store3
945 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
946 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
947 ; RV64ZVE32F-NEXT: vse16.v v9, (a7)
948 ; RV64ZVE32F-NEXT: andi a0, a3, 8
949 ; RV64ZVE32F-NEXT: beqz a0, .LBB17_4
950 ; RV64ZVE32F-NEXT: .LBB17_12: # %cond.store5
951 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
952 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 3
953 ; RV64ZVE32F-NEXT: vse16.v v9, (a6)
954 ; RV64ZVE32F-NEXT: andi a0, a3, 16
955 ; RV64ZVE32F-NEXT: beqz a0, .LBB17_5
956 ; RV64ZVE32F-NEXT: .LBB17_13: # %cond.store7
957 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
958 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
959 ; RV64ZVE32F-NEXT: vse16.v v9, (a5)
960 ; RV64ZVE32F-NEXT: andi a0, a3, 32
961 ; RV64ZVE32F-NEXT: beqz a0, .LBB17_6
962 ; RV64ZVE32F-NEXT: .LBB17_14: # %cond.store9
963 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
964 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5
965 ; RV64ZVE32F-NEXT: vse16.v v9, (a4)
966 ; RV64ZVE32F-NEXT: andi a0, a3, 64
967 ; RV64ZVE32F-NEXT: beqz a0, .LBB17_7
968 ; RV64ZVE32F-NEXT: .LBB17_15: # %cond.store11
969 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
970 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 6
971 ; RV64ZVE32F-NEXT: vse16.v v9, (a2)
972 ; RV64ZVE32F-NEXT: andi a0, a3, -128
973 ; RV64ZVE32F-NEXT: beqz a0, .LBB17_8
974 ; RV64ZVE32F-NEXT: .LBB17_16: # %cond.store13
975 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
976 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
977 ; RV64ZVE32F-NEXT: vse16.v v8, (a1)
978 ; RV64ZVE32F-NEXT: ret
979 call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> %m)
983 define void @mscatter_baseidx_v8i8_v8i16(<8 x i16> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) {
984 ; RV32-LABEL: mscatter_baseidx_v8i8_v8i16:
986 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
987 ; RV32-NEXT: vsext.vf4 v10, v9
988 ; RV32-NEXT: vadd.vv v10, v10, v10
989 ; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma
990 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
993 ; RV64-LABEL: mscatter_baseidx_v8i8_v8i16:
995 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
996 ; RV64-NEXT: vsext.vf8 v12, v9
997 ; RV64-NEXT: vadd.vv v12, v12, v12
998 ; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, ma
999 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
1002 ; RV64ZVE32F-LABEL: mscatter_baseidx_v8i8_v8i16:
1003 ; RV64ZVE32F: # %bb.0:
1004 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
1005 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
1006 ; RV64ZVE32F-NEXT: andi a2, a1, 1
1007 ; RV64ZVE32F-NEXT: beqz a2, .LBB18_2
1008 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
1009 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
1010 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1011 ; RV64ZVE32F-NEXT: add a2, a0, a2
1012 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1013 ; RV64ZVE32F-NEXT: vse16.v v8, (a2)
1014 ; RV64ZVE32F-NEXT: .LBB18_2: # %else
1015 ; RV64ZVE32F-NEXT: andi a2, a1, 2
1016 ; RV64ZVE32F-NEXT: beqz a2, .LBB18_4
1017 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
1018 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1019 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1
1020 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
1021 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1022 ; RV64ZVE32F-NEXT: add a2, a0, a2
1023 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1024 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
1025 ; RV64ZVE32F-NEXT: vse16.v v10, (a2)
1026 ; RV64ZVE32F-NEXT: .LBB18_4: # %else2
1027 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
1028 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4
1029 ; RV64ZVE32F-NEXT: andi a2, a1, 4
1030 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
1031 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2
1032 ; RV64ZVE32F-NEXT: bnez a2, .LBB18_12
1033 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
1034 ; RV64ZVE32F-NEXT: andi a2, a1, 8
1035 ; RV64ZVE32F-NEXT: bnez a2, .LBB18_13
1036 ; RV64ZVE32F-NEXT: .LBB18_6: # %else6
1037 ; RV64ZVE32F-NEXT: andi a2, a1, 16
1038 ; RV64ZVE32F-NEXT: bnez a2, .LBB18_14
1039 ; RV64ZVE32F-NEXT: .LBB18_7: # %else8
1040 ; RV64ZVE32F-NEXT: andi a2, a1, 32
1041 ; RV64ZVE32F-NEXT: beqz a2, .LBB18_9
1042 ; RV64ZVE32F-NEXT: .LBB18_8: # %cond.store9
1043 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1044 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 1
1045 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
1046 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1047 ; RV64ZVE32F-NEXT: add a2, a0, a2
1048 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1049 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5
1050 ; RV64ZVE32F-NEXT: vse16.v v9, (a2)
1051 ; RV64ZVE32F-NEXT: .LBB18_9: # %else10
1052 ; RV64ZVE32F-NEXT: andi a2, a1, 64
1053 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
1054 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 2
1055 ; RV64ZVE32F-NEXT: bnez a2, .LBB18_15
1056 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
1057 ; RV64ZVE32F-NEXT: andi a1, a1, -128
1058 ; RV64ZVE32F-NEXT: bnez a1, .LBB18_16
1059 ; RV64ZVE32F-NEXT: .LBB18_11: # %else14
1060 ; RV64ZVE32F-NEXT: ret
1061 ; RV64ZVE32F-NEXT: .LBB18_12: # %cond.store3
1062 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
1063 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1064 ; RV64ZVE32F-NEXT: add a2, a0, a2
1065 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1066 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2
1067 ; RV64ZVE32F-NEXT: vse16.v v11, (a2)
1068 ; RV64ZVE32F-NEXT: andi a2, a1, 8
1069 ; RV64ZVE32F-NEXT: beqz a2, .LBB18_6
1070 ; RV64ZVE32F-NEXT: .LBB18_13: # %cond.store5
1071 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1072 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
1073 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
1074 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1075 ; RV64ZVE32F-NEXT: add a2, a0, a2
1076 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1077 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 3
1078 ; RV64ZVE32F-NEXT: vse16.v v9, (a2)
1079 ; RV64ZVE32F-NEXT: andi a2, a1, 16
1080 ; RV64ZVE32F-NEXT: beqz a2, .LBB18_7
1081 ; RV64ZVE32F-NEXT: .LBB18_14: # %cond.store7
1082 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
1083 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
1084 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1085 ; RV64ZVE32F-NEXT: add a2, a0, a2
1086 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1087 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
1088 ; RV64ZVE32F-NEXT: vse16.v v9, (a2)
1089 ; RV64ZVE32F-NEXT: andi a2, a1, 32
1090 ; RV64ZVE32F-NEXT: bnez a2, .LBB18_8
1091 ; RV64ZVE32F-NEXT: j .LBB18_9
1092 ; RV64ZVE32F-NEXT: .LBB18_15: # %cond.store11
1093 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
1094 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1095 ; RV64ZVE32F-NEXT: add a2, a0, a2
1096 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1097 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 6
1098 ; RV64ZVE32F-NEXT: vse16.v v10, (a2)
1099 ; RV64ZVE32F-NEXT: andi a1, a1, -128
1100 ; RV64ZVE32F-NEXT: beqz a1, .LBB18_11
1101 ; RV64ZVE32F-NEXT: .LBB18_16: # %cond.store13
1102 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1103 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
1104 ; RV64ZVE32F-NEXT: vmv.x.s a1, v9
1105 ; RV64ZVE32F-NEXT: slli a1, a1, 1
1106 ; RV64ZVE32F-NEXT: add a0, a0, a1
1107 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1108 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
1109 ; RV64ZVE32F-NEXT: vse16.v v8, (a0)
1110 ; RV64ZVE32F-NEXT: ret
1111 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i8> %idxs
1112 call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> %m)
1116 define void @mscatter_baseidx_sext_v8i8_v8i16(<8 x i16> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) {
1117 ; RV32-LABEL: mscatter_baseidx_sext_v8i8_v8i16:
1119 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1120 ; RV32-NEXT: vsext.vf4 v10, v9
1121 ; RV32-NEXT: vadd.vv v10, v10, v10
1122 ; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma
1123 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
1126 ; RV64-LABEL: mscatter_baseidx_sext_v8i8_v8i16:
1128 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
1129 ; RV64-NEXT: vsext.vf8 v12, v9
1130 ; RV64-NEXT: vadd.vv v12, v12, v12
1131 ; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, ma
1132 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
1135 ; RV64ZVE32F-LABEL: mscatter_baseidx_sext_v8i8_v8i16:
1136 ; RV64ZVE32F: # %bb.0:
1137 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
1138 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
1139 ; RV64ZVE32F-NEXT: andi a2, a1, 1
1140 ; RV64ZVE32F-NEXT: beqz a2, .LBB19_2
1141 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
1142 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
1143 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1144 ; RV64ZVE32F-NEXT: add a2, a0, a2
1145 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1146 ; RV64ZVE32F-NEXT: vse16.v v8, (a2)
1147 ; RV64ZVE32F-NEXT: .LBB19_2: # %else
1148 ; RV64ZVE32F-NEXT: andi a2, a1, 2
1149 ; RV64ZVE32F-NEXT: beqz a2, .LBB19_4
1150 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
1151 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1152 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1
1153 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
1154 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1155 ; RV64ZVE32F-NEXT: add a2, a0, a2
1156 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1157 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
1158 ; RV64ZVE32F-NEXT: vse16.v v10, (a2)
1159 ; RV64ZVE32F-NEXT: .LBB19_4: # %else2
1160 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
1161 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4
1162 ; RV64ZVE32F-NEXT: andi a2, a1, 4
1163 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
1164 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2
1165 ; RV64ZVE32F-NEXT: bnez a2, .LBB19_12
1166 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
1167 ; RV64ZVE32F-NEXT: andi a2, a1, 8
1168 ; RV64ZVE32F-NEXT: bnez a2, .LBB19_13
1169 ; RV64ZVE32F-NEXT: .LBB19_6: # %else6
1170 ; RV64ZVE32F-NEXT: andi a2, a1, 16
1171 ; RV64ZVE32F-NEXT: bnez a2, .LBB19_14
1172 ; RV64ZVE32F-NEXT: .LBB19_7: # %else8
1173 ; RV64ZVE32F-NEXT: andi a2, a1, 32
1174 ; RV64ZVE32F-NEXT: beqz a2, .LBB19_9
1175 ; RV64ZVE32F-NEXT: .LBB19_8: # %cond.store9
1176 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1177 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 1
1178 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
1179 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1180 ; RV64ZVE32F-NEXT: add a2, a0, a2
1181 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1182 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5
1183 ; RV64ZVE32F-NEXT: vse16.v v9, (a2)
1184 ; RV64ZVE32F-NEXT: .LBB19_9: # %else10
1185 ; RV64ZVE32F-NEXT: andi a2, a1, 64
1186 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
1187 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 2
1188 ; RV64ZVE32F-NEXT: bnez a2, .LBB19_15
1189 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
1190 ; RV64ZVE32F-NEXT: andi a1, a1, -128
1191 ; RV64ZVE32F-NEXT: bnez a1, .LBB19_16
1192 ; RV64ZVE32F-NEXT: .LBB19_11: # %else14
1193 ; RV64ZVE32F-NEXT: ret
1194 ; RV64ZVE32F-NEXT: .LBB19_12: # %cond.store3
1195 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
1196 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1197 ; RV64ZVE32F-NEXT: add a2, a0, a2
1198 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1199 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2
1200 ; RV64ZVE32F-NEXT: vse16.v v11, (a2)
1201 ; RV64ZVE32F-NEXT: andi a2, a1, 8
1202 ; RV64ZVE32F-NEXT: beqz a2, .LBB19_6
1203 ; RV64ZVE32F-NEXT: .LBB19_13: # %cond.store5
1204 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1205 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
1206 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
1207 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1208 ; RV64ZVE32F-NEXT: add a2, a0, a2
1209 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1210 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 3
1211 ; RV64ZVE32F-NEXT: vse16.v v9, (a2)
1212 ; RV64ZVE32F-NEXT: andi a2, a1, 16
1213 ; RV64ZVE32F-NEXT: beqz a2, .LBB19_7
1214 ; RV64ZVE32F-NEXT: .LBB19_14: # %cond.store7
1215 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
1216 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
1217 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1218 ; RV64ZVE32F-NEXT: add a2, a0, a2
1219 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1220 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
1221 ; RV64ZVE32F-NEXT: vse16.v v9, (a2)
1222 ; RV64ZVE32F-NEXT: andi a2, a1, 32
1223 ; RV64ZVE32F-NEXT: bnez a2, .LBB19_8
1224 ; RV64ZVE32F-NEXT: j .LBB19_9
1225 ; RV64ZVE32F-NEXT: .LBB19_15: # %cond.store11
1226 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
1227 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1228 ; RV64ZVE32F-NEXT: add a2, a0, a2
1229 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1230 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 6
1231 ; RV64ZVE32F-NEXT: vse16.v v10, (a2)
1232 ; RV64ZVE32F-NEXT: andi a1, a1, -128
1233 ; RV64ZVE32F-NEXT: beqz a1, .LBB19_11
1234 ; RV64ZVE32F-NEXT: .LBB19_16: # %cond.store13
1235 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1236 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
1237 ; RV64ZVE32F-NEXT: vmv.x.s a1, v9
1238 ; RV64ZVE32F-NEXT: slli a1, a1, 1
1239 ; RV64ZVE32F-NEXT: add a0, a0, a1
1240 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1241 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
1242 ; RV64ZVE32F-NEXT: vse16.v v8, (a0)
1243 ; RV64ZVE32F-NEXT: ret
1244 %eidxs = sext <8 x i8> %idxs to <8 x i16>
1245 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i16> %eidxs
1246 call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> %m)
1250 define void @mscatter_baseidx_zext_v8i8_v8i16(<8 x i16> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) {
1251 ; RV32-LABEL: mscatter_baseidx_zext_v8i8_v8i16:
1253 ; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
1254 ; RV32-NEXT: vwaddu.vv v10, v9, v9
1255 ; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma
1256 ; RV32-NEXT: vsoxei16.v v8, (a0), v10, v0.t
1259 ; RV64-LABEL: mscatter_baseidx_zext_v8i8_v8i16:
1261 ; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
1262 ; RV64-NEXT: vwaddu.vv v10, v9, v9
1263 ; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, ma
1264 ; RV64-NEXT: vsoxei16.v v8, (a0), v10, v0.t
1267 ; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i8_v8i16:
1268 ; RV64ZVE32F: # %bb.0:
1269 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
1270 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
1271 ; RV64ZVE32F-NEXT: andi a2, a1, 1
1272 ; RV64ZVE32F-NEXT: beqz a2, .LBB20_2
1273 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
1274 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
1275 ; RV64ZVE32F-NEXT: andi a2, a2, 255
1276 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1277 ; RV64ZVE32F-NEXT: add a2, a0, a2
1278 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1279 ; RV64ZVE32F-NEXT: vse16.v v8, (a2)
1280 ; RV64ZVE32F-NEXT: .LBB20_2: # %else
1281 ; RV64ZVE32F-NEXT: andi a2, a1, 2
1282 ; RV64ZVE32F-NEXT: beqz a2, .LBB20_4
1283 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
1284 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1285 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1
1286 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
1287 ; RV64ZVE32F-NEXT: andi a2, a2, 255
1288 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1289 ; RV64ZVE32F-NEXT: add a2, a0, a2
1290 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1291 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
1292 ; RV64ZVE32F-NEXT: vse16.v v10, (a2)
1293 ; RV64ZVE32F-NEXT: .LBB20_4: # %else2
1294 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
1295 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4
1296 ; RV64ZVE32F-NEXT: andi a2, a1, 4
1297 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
1298 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2
1299 ; RV64ZVE32F-NEXT: bnez a2, .LBB20_12
1300 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
1301 ; RV64ZVE32F-NEXT: andi a2, a1, 8
1302 ; RV64ZVE32F-NEXT: bnez a2, .LBB20_13
1303 ; RV64ZVE32F-NEXT: .LBB20_6: # %else6
1304 ; RV64ZVE32F-NEXT: andi a2, a1, 16
1305 ; RV64ZVE32F-NEXT: bnez a2, .LBB20_14
1306 ; RV64ZVE32F-NEXT: .LBB20_7: # %else8
1307 ; RV64ZVE32F-NEXT: andi a2, a1, 32
1308 ; RV64ZVE32F-NEXT: beqz a2, .LBB20_9
1309 ; RV64ZVE32F-NEXT: .LBB20_8: # %cond.store9
1310 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1311 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 1
1312 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
1313 ; RV64ZVE32F-NEXT: andi a2, a2, 255
1314 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1315 ; RV64ZVE32F-NEXT: add a2, a0, a2
1316 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1317 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5
1318 ; RV64ZVE32F-NEXT: vse16.v v9, (a2)
1319 ; RV64ZVE32F-NEXT: .LBB20_9: # %else10
1320 ; RV64ZVE32F-NEXT: andi a2, a1, 64
1321 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
1322 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 2
1323 ; RV64ZVE32F-NEXT: bnez a2, .LBB20_15
1324 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
1325 ; RV64ZVE32F-NEXT: andi a1, a1, -128
1326 ; RV64ZVE32F-NEXT: bnez a1, .LBB20_16
1327 ; RV64ZVE32F-NEXT: .LBB20_11: # %else14
1328 ; RV64ZVE32F-NEXT: ret
1329 ; RV64ZVE32F-NEXT: .LBB20_12: # %cond.store3
1330 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
1331 ; RV64ZVE32F-NEXT: andi a2, a2, 255
1332 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1333 ; RV64ZVE32F-NEXT: add a2, a0, a2
1334 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1335 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2
1336 ; RV64ZVE32F-NEXT: vse16.v v11, (a2)
1337 ; RV64ZVE32F-NEXT: andi a2, a1, 8
1338 ; RV64ZVE32F-NEXT: beqz a2, .LBB20_6
1339 ; RV64ZVE32F-NEXT: .LBB20_13: # %cond.store5
1340 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1341 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
1342 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
1343 ; RV64ZVE32F-NEXT: andi a2, a2, 255
1344 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1345 ; RV64ZVE32F-NEXT: add a2, a0, a2
1346 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1347 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 3
1348 ; RV64ZVE32F-NEXT: vse16.v v9, (a2)
1349 ; RV64ZVE32F-NEXT: andi a2, a1, 16
1350 ; RV64ZVE32F-NEXT: beqz a2, .LBB20_7
1351 ; RV64ZVE32F-NEXT: .LBB20_14: # %cond.store7
1352 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
1353 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
1354 ; RV64ZVE32F-NEXT: andi a2, a2, 255
1355 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1356 ; RV64ZVE32F-NEXT: add a2, a0, a2
1357 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1358 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
1359 ; RV64ZVE32F-NEXT: vse16.v v9, (a2)
1360 ; RV64ZVE32F-NEXT: andi a2, a1, 32
1361 ; RV64ZVE32F-NEXT: bnez a2, .LBB20_8
1362 ; RV64ZVE32F-NEXT: j .LBB20_9
1363 ; RV64ZVE32F-NEXT: .LBB20_15: # %cond.store11
1364 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
1365 ; RV64ZVE32F-NEXT: andi a2, a2, 255
1366 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1367 ; RV64ZVE32F-NEXT: add a2, a0, a2
1368 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1369 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 6
1370 ; RV64ZVE32F-NEXT: vse16.v v10, (a2)
1371 ; RV64ZVE32F-NEXT: andi a1, a1, -128
1372 ; RV64ZVE32F-NEXT: beqz a1, .LBB20_11
1373 ; RV64ZVE32F-NEXT: .LBB20_16: # %cond.store13
1374 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1375 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
1376 ; RV64ZVE32F-NEXT: vmv.x.s a1, v9
1377 ; RV64ZVE32F-NEXT: andi a1, a1, 255
1378 ; RV64ZVE32F-NEXT: slli a1, a1, 1
1379 ; RV64ZVE32F-NEXT: add a0, a0, a1
1380 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1381 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
1382 ; RV64ZVE32F-NEXT: vse16.v v8, (a0)
1383 ; RV64ZVE32F-NEXT: ret
1384 %eidxs = zext <8 x i8> %idxs to <8 x i16>
1385 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i16> %eidxs
1386 call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> %m)
1390 define void @mscatter_baseidx_v8i16(<8 x i16> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m) {
1391 ; RV32-LABEL: mscatter_baseidx_v8i16:
1393 ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1394 ; RV32-NEXT: vwadd.vv v10, v9, v9
1395 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
1398 ; RV64-LABEL: mscatter_baseidx_v8i16:
1400 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
1401 ; RV64-NEXT: vsext.vf4 v12, v9
1402 ; RV64-NEXT: vadd.vv v12, v12, v12
1403 ; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, ma
1404 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
1407 ; RV64ZVE32F-LABEL: mscatter_baseidx_v8i16:
1408 ; RV64ZVE32F: # %bb.0:
1409 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
1410 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
1411 ; RV64ZVE32F-NEXT: andi a2, a1, 1
1412 ; RV64ZVE32F-NEXT: beqz a2, .LBB21_2
1413 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
1414 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1415 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
1416 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1417 ; RV64ZVE32F-NEXT: add a2, a0, a2
1418 ; RV64ZVE32F-NEXT: vse16.v v8, (a2)
1419 ; RV64ZVE32F-NEXT: .LBB21_2: # %else
1420 ; RV64ZVE32F-NEXT: andi a2, a1, 2
1421 ; RV64ZVE32F-NEXT: beqz a2, .LBB21_4
1422 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
1423 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
1424 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1
1425 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
1426 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1427 ; RV64ZVE32F-NEXT: add a2, a0, a2
1428 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1429 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
1430 ; RV64ZVE32F-NEXT: vse16.v v10, (a2)
1431 ; RV64ZVE32F-NEXT: .LBB21_4: # %else2
1432 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
1433 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4
1434 ; RV64ZVE32F-NEXT: andi a2, a1, 4
1435 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
1436 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2
1437 ; RV64ZVE32F-NEXT: bnez a2, .LBB21_12
1438 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
1439 ; RV64ZVE32F-NEXT: andi a2, a1, 8
1440 ; RV64ZVE32F-NEXT: bnez a2, .LBB21_13
1441 ; RV64ZVE32F-NEXT: .LBB21_6: # %else6
1442 ; RV64ZVE32F-NEXT: andi a2, a1, 16
1443 ; RV64ZVE32F-NEXT: bnez a2, .LBB21_14
1444 ; RV64ZVE32F-NEXT: .LBB21_7: # %else8
1445 ; RV64ZVE32F-NEXT: andi a2, a1, 32
1446 ; RV64ZVE32F-NEXT: beqz a2, .LBB21_9
1447 ; RV64ZVE32F-NEXT: .LBB21_8: # %cond.store9
1448 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
1449 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 1
1450 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
1451 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1452 ; RV64ZVE32F-NEXT: add a2, a0, a2
1453 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1454 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5
1455 ; RV64ZVE32F-NEXT: vse16.v v9, (a2)
1456 ; RV64ZVE32F-NEXT: .LBB21_9: # %else10
1457 ; RV64ZVE32F-NEXT: andi a2, a1, 64
1458 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
1459 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 2
1460 ; RV64ZVE32F-NEXT: bnez a2, .LBB21_15
1461 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
1462 ; RV64ZVE32F-NEXT: andi a1, a1, -128
1463 ; RV64ZVE32F-NEXT: bnez a1, .LBB21_16
1464 ; RV64ZVE32F-NEXT: .LBB21_11: # %else14
1465 ; RV64ZVE32F-NEXT: ret
1466 ; RV64ZVE32F-NEXT: .LBB21_12: # %cond.store3
1467 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
1468 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1469 ; RV64ZVE32F-NEXT: add a2, a0, a2
1470 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1471 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2
1472 ; RV64ZVE32F-NEXT: vse16.v v11, (a2)
1473 ; RV64ZVE32F-NEXT: andi a2, a1, 8
1474 ; RV64ZVE32F-NEXT: beqz a2, .LBB21_6
1475 ; RV64ZVE32F-NEXT: .LBB21_13: # %cond.store5
1476 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
1477 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
1478 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
1479 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1480 ; RV64ZVE32F-NEXT: add a2, a0, a2
1481 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1482 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 3
1483 ; RV64ZVE32F-NEXT: vse16.v v9, (a2)
1484 ; RV64ZVE32F-NEXT: andi a2, a1, 16
1485 ; RV64ZVE32F-NEXT: beqz a2, .LBB21_7
1486 ; RV64ZVE32F-NEXT: .LBB21_14: # %cond.store7
1487 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1488 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
1489 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1490 ; RV64ZVE32F-NEXT: add a2, a0, a2
1491 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
1492 ; RV64ZVE32F-NEXT: vse16.v v9, (a2)
1493 ; RV64ZVE32F-NEXT: andi a2, a1, 32
1494 ; RV64ZVE32F-NEXT: bnez a2, .LBB21_8
1495 ; RV64ZVE32F-NEXT: j .LBB21_9
1496 ; RV64ZVE32F-NEXT: .LBB21_15: # %cond.store11
1497 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
1498 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1499 ; RV64ZVE32F-NEXT: add a2, a0, a2
1500 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1501 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 6
1502 ; RV64ZVE32F-NEXT: vse16.v v10, (a2)
1503 ; RV64ZVE32F-NEXT: andi a1, a1, -128
1504 ; RV64ZVE32F-NEXT: beqz a1, .LBB21_11
1505 ; RV64ZVE32F-NEXT: .LBB21_16: # %cond.store13
1506 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
1507 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
1508 ; RV64ZVE32F-NEXT: vmv.x.s a1, v9
1509 ; RV64ZVE32F-NEXT: slli a1, a1, 1
1510 ; RV64ZVE32F-NEXT: add a0, a0, a1
1511 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
1512 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
1513 ; RV64ZVE32F-NEXT: vse16.v v8, (a0)
1514 ; RV64ZVE32F-NEXT: ret
1515 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i16> %idxs
1516 call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> %m)
1520 declare void @llvm.masked.scatter.v1i32.v1p0(<1 x i32>, <1 x ptr>, i32, <1 x i1>)
1522 define void @mscatter_v1i32(<1 x i32> %val, <1 x ptr> %ptrs, <1 x i1> %m) {
1523 ; RV32V-LABEL: mscatter_v1i32:
1525 ; RV32V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
1526 ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t
1529 ; RV64-LABEL: mscatter_v1i32:
1531 ; RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
1532 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
1535 ; RV32ZVE32F-LABEL: mscatter_v1i32:
1536 ; RV32ZVE32F: # %bb.0:
1537 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1538 ; RV32ZVE32F-NEXT: vsoxei32.v v8, (zero), v9, v0.t
1539 ; RV32ZVE32F-NEXT: ret
1541 ; RV64ZVE32F-LABEL: mscatter_v1i32:
1542 ; RV64ZVE32F: # %bb.0:
1543 ; RV64ZVE32F-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
1544 ; RV64ZVE32F-NEXT: vfirst.m a1, v0
1545 ; RV64ZVE32F-NEXT: bnez a1, .LBB22_2
1546 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
1547 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1548 ; RV64ZVE32F-NEXT: vse32.v v8, (a0)
1549 ; RV64ZVE32F-NEXT: .LBB22_2: # %else
1550 ; RV64ZVE32F-NEXT: ret
1551 call void @llvm.masked.scatter.v1i32.v1p0(<1 x i32> %val, <1 x ptr> %ptrs, i32 4, <1 x i1> %m)
1555 declare void @llvm.masked.scatter.v2i32.v2p0(<2 x i32>, <2 x ptr>, i32, <2 x i1>)
1557 define void @mscatter_v2i32(<2 x i32> %val, <2 x ptr> %ptrs, <2 x i1> %m) {
1558 ; RV32V-LABEL: mscatter_v2i32:
1560 ; RV32V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
1561 ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t
1564 ; RV64-LABEL: mscatter_v2i32:
1566 ; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
1567 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
1570 ; RV32ZVE32F-LABEL: mscatter_v2i32:
1571 ; RV32ZVE32F: # %bb.0:
1572 ; RV32ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
1573 ; RV32ZVE32F-NEXT: vsoxei32.v v8, (zero), v9, v0.t
1574 ; RV32ZVE32F-NEXT: ret
1576 ; RV64ZVE32F-LABEL: mscatter_v2i32:
1577 ; RV64ZVE32F: # %bb.0:
1578 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
1579 ; RV64ZVE32F-NEXT: vmv.x.s a2, v0
1580 ; RV64ZVE32F-NEXT: andi a3, a2, 1
1581 ; RV64ZVE32F-NEXT: bnez a3, .LBB23_3
1582 ; RV64ZVE32F-NEXT: # %bb.1: # %else
1583 ; RV64ZVE32F-NEXT: andi a2, a2, 2
1584 ; RV64ZVE32F-NEXT: bnez a2, .LBB23_4
1585 ; RV64ZVE32F-NEXT: .LBB23_2: # %else2
1586 ; RV64ZVE32F-NEXT: ret
1587 ; RV64ZVE32F-NEXT: .LBB23_3: # %cond.store
1588 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1589 ; RV64ZVE32F-NEXT: vse32.v v8, (a0)
1590 ; RV64ZVE32F-NEXT: andi a2, a2, 2
1591 ; RV64ZVE32F-NEXT: beqz a2, .LBB23_2
1592 ; RV64ZVE32F-NEXT: .LBB23_4: # %cond.store1
1593 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1594 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
1595 ; RV64ZVE32F-NEXT: vse32.v v8, (a1)
1596 ; RV64ZVE32F-NEXT: ret
1597 call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> %val, <2 x ptr> %ptrs, i32 4, <2 x i1> %m)
1601 define void @mscatter_v2i64_truncstore_v2i32(<2 x i64> %val, <2 x ptr> %ptrs, <2 x i1> %m) {
1602 ; RV32V-LABEL: mscatter_v2i64_truncstore_v2i32:
1604 ; RV32V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
1605 ; RV32V-NEXT: vnsrl.wi v8, v8, 0
1606 ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t
1609 ; RV64-LABEL: mscatter_v2i64_truncstore_v2i32:
1611 ; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
1612 ; RV64-NEXT: vnsrl.wi v8, v8, 0
1613 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
1616 ; RV32ZVE32F-LABEL: mscatter_v2i64_truncstore_v2i32:
1617 ; RV32ZVE32F: # %bb.0:
1618 ; RV32ZVE32F-NEXT: lw a1, 8(a0)
1619 ; RV32ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
1620 ; RV32ZVE32F-NEXT: vlse32.v v9, (a0), zero
1621 ; RV32ZVE32F-NEXT: vslide1down.vx v9, v9, a1
1622 ; RV32ZVE32F-NEXT: vsoxei32.v v9, (zero), v8, v0.t
1623 ; RV32ZVE32F-NEXT: ret
1625 ; RV64ZVE32F-LABEL: mscatter_v2i64_truncstore_v2i32:
1626 ; RV64ZVE32F: # %bb.0:
1627 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
1628 ; RV64ZVE32F-NEXT: vmv.v.x v8, a0
1629 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
1630 ; RV64ZVE32F-NEXT: vmv.x.s a0, v0
1631 ; RV64ZVE32F-NEXT: andi a4, a0, 1
1632 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
1633 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a1
1634 ; RV64ZVE32F-NEXT: bnez a4, .LBB24_3
1635 ; RV64ZVE32F-NEXT: # %bb.1: # %else
1636 ; RV64ZVE32F-NEXT: andi a0, a0, 2
1637 ; RV64ZVE32F-NEXT: bnez a0, .LBB24_4
1638 ; RV64ZVE32F-NEXT: .LBB24_2: # %else2
1639 ; RV64ZVE32F-NEXT: ret
1640 ; RV64ZVE32F-NEXT: .LBB24_3: # %cond.store
1641 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1642 ; RV64ZVE32F-NEXT: vse32.v v8, (a2)
1643 ; RV64ZVE32F-NEXT: andi a0, a0, 2
1644 ; RV64ZVE32F-NEXT: beqz a0, .LBB24_2
1645 ; RV64ZVE32F-NEXT: .LBB24_4: # %cond.store1
1646 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1647 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
1648 ; RV64ZVE32F-NEXT: vse32.v v8, (a3)
1649 ; RV64ZVE32F-NEXT: ret
1650 %tval = trunc <2 x i64> %val to <2 x i32>
1651 call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> %tval, <2 x ptr> %ptrs, i32 4, <2 x i1> %m)
1655 declare void @llvm.masked.scatter.v4i32.v4p0(<4 x i32>, <4 x ptr>, i32, <4 x i1>)
1657 define void @mscatter_v4i32(<4 x i32> %val, <4 x ptr> %ptrs, <4 x i1> %m) {
1658 ; RV32-LABEL: mscatter_v4i32:
1660 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1661 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
1664 ; RV64-LABEL: mscatter_v4i32:
1666 ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1667 ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
1670 ; RV64ZVE32F-LABEL: mscatter_v4i32:
1671 ; RV64ZVE32F: # %bb.0:
1672 ; RV64ZVE32F-NEXT: ld a1, 24(a0)
1673 ; RV64ZVE32F-NEXT: ld a2, 16(a0)
1674 ; RV64ZVE32F-NEXT: ld a4, 8(a0)
1675 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
1676 ; RV64ZVE32F-NEXT: vmv.x.s a3, v0
1677 ; RV64ZVE32F-NEXT: andi a5, a3, 1
1678 ; RV64ZVE32F-NEXT: bnez a5, .LBB25_5
1679 ; RV64ZVE32F-NEXT: # %bb.1: # %else
1680 ; RV64ZVE32F-NEXT: andi a0, a3, 2
1681 ; RV64ZVE32F-NEXT: bnez a0, .LBB25_6
1682 ; RV64ZVE32F-NEXT: .LBB25_2: # %else2
1683 ; RV64ZVE32F-NEXT: andi a0, a3, 4
1684 ; RV64ZVE32F-NEXT: bnez a0, .LBB25_7
1685 ; RV64ZVE32F-NEXT: .LBB25_3: # %else4
1686 ; RV64ZVE32F-NEXT: andi a3, a3, 8
1687 ; RV64ZVE32F-NEXT: bnez a3, .LBB25_8
1688 ; RV64ZVE32F-NEXT: .LBB25_4: # %else6
1689 ; RV64ZVE32F-NEXT: ret
1690 ; RV64ZVE32F-NEXT: .LBB25_5: # %cond.store
1691 ; RV64ZVE32F-NEXT: ld a0, 0(a0)
1692 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1693 ; RV64ZVE32F-NEXT: vse32.v v8, (a0)
1694 ; RV64ZVE32F-NEXT: andi a0, a3, 2
1695 ; RV64ZVE32F-NEXT: beqz a0, .LBB25_2
1696 ; RV64ZVE32F-NEXT: .LBB25_6: # %cond.store1
1697 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1698 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
1699 ; RV64ZVE32F-NEXT: vse32.v v9, (a4)
1700 ; RV64ZVE32F-NEXT: andi a0, a3, 4
1701 ; RV64ZVE32F-NEXT: beqz a0, .LBB25_3
1702 ; RV64ZVE32F-NEXT: .LBB25_7: # %cond.store3
1703 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1704 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
1705 ; RV64ZVE32F-NEXT: vse32.v v9, (a2)
1706 ; RV64ZVE32F-NEXT: andi a3, a3, 8
1707 ; RV64ZVE32F-NEXT: beqz a3, .LBB25_4
1708 ; RV64ZVE32F-NEXT: .LBB25_8: # %cond.store5
1709 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1710 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 3
1711 ; RV64ZVE32F-NEXT: vse32.v v8, (a1)
1712 ; RV64ZVE32F-NEXT: ret
1713 call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %val, <4 x ptr> %ptrs, i32 4, <4 x i1> %m)
1717 define void @mscatter_truemask_v4i32(<4 x i32> %val, <4 x ptr> %ptrs) {
1718 ; RV32-LABEL: mscatter_truemask_v4i32:
1720 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1721 ; RV32-NEXT: vsoxei32.v v8, (zero), v9
1724 ; RV64-LABEL: mscatter_truemask_v4i32:
1726 ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1727 ; RV64-NEXT: vsoxei64.v v8, (zero), v10
1730 ; RV64ZVE32F-LABEL: mscatter_truemask_v4i32:
1731 ; RV64ZVE32F: # %bb.0:
1732 ; RV64ZVE32F-NEXT: ld a1, 0(a0)
1733 ; RV64ZVE32F-NEXT: ld a2, 24(a0)
1734 ; RV64ZVE32F-NEXT: ld a3, 8(a0)
1735 ; RV64ZVE32F-NEXT: ld a0, 16(a0)
1736 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1737 ; RV64ZVE32F-NEXT: vse32.v v8, (a1)
1738 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
1739 ; RV64ZVE32F-NEXT: vse32.v v9, (a3)
1740 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
1741 ; RV64ZVE32F-NEXT: vse32.v v9, (a0)
1742 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 3
1743 ; RV64ZVE32F-NEXT: vse32.v v8, (a2)
1744 ; RV64ZVE32F-NEXT: ret
1745 call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %val, <4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 1))
1749 define void @mscatter_falsemask_v4i32(<4 x i32> %val, <4 x ptr> %ptrs) {
1750 ; CHECK-LABEL: mscatter_falsemask_v4i32:
1753 call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %val, <4 x ptr> %ptrs, i32 4, <4 x i1> zeroinitializer)
1757 declare void @llvm.masked.scatter.v8i32.v8p0(<8 x i32>, <8 x ptr>, i32, <8 x i1>)
1759 define void @mscatter_v8i32(<8 x i32> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
1760 ; RV32-LABEL: mscatter_v8i32:
1762 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1763 ; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t
1766 ; RV64-LABEL: mscatter_v8i32:
1768 ; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1769 ; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t
1772 ; RV64ZVE32F-LABEL: mscatter_v8i32:
1773 ; RV64ZVE32F: # %bb.0:
1774 ; RV64ZVE32F-NEXT: ld a1, 56(a0)
1775 ; RV64ZVE32F-NEXT: ld a2, 48(a0)
1776 ; RV64ZVE32F-NEXT: ld a4, 40(a0)
1777 ; RV64ZVE32F-NEXT: ld a5, 32(a0)
1778 ; RV64ZVE32F-NEXT: ld a6, 24(a0)
1779 ; RV64ZVE32F-NEXT: ld a7, 16(a0)
1780 ; RV64ZVE32F-NEXT: ld t0, 8(a0)
1781 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
1782 ; RV64ZVE32F-NEXT: vmv.x.s a3, v0
1783 ; RV64ZVE32F-NEXT: andi t1, a3, 1
1784 ; RV64ZVE32F-NEXT: bnez t1, .LBB28_9
1785 ; RV64ZVE32F-NEXT: # %bb.1: # %else
1786 ; RV64ZVE32F-NEXT: andi a0, a3, 2
1787 ; RV64ZVE32F-NEXT: bnez a0, .LBB28_10
1788 ; RV64ZVE32F-NEXT: .LBB28_2: # %else2
1789 ; RV64ZVE32F-NEXT: andi a0, a3, 4
1790 ; RV64ZVE32F-NEXT: bnez a0, .LBB28_11
1791 ; RV64ZVE32F-NEXT: .LBB28_3: # %else4
1792 ; RV64ZVE32F-NEXT: andi a0, a3, 8
1793 ; RV64ZVE32F-NEXT: bnez a0, .LBB28_12
1794 ; RV64ZVE32F-NEXT: .LBB28_4: # %else6
1795 ; RV64ZVE32F-NEXT: andi a0, a3, 16
1796 ; RV64ZVE32F-NEXT: bnez a0, .LBB28_13
1797 ; RV64ZVE32F-NEXT: .LBB28_5: # %else8
1798 ; RV64ZVE32F-NEXT: andi a0, a3, 32
1799 ; RV64ZVE32F-NEXT: bnez a0, .LBB28_14
1800 ; RV64ZVE32F-NEXT: .LBB28_6: # %else10
1801 ; RV64ZVE32F-NEXT: andi a0, a3, 64
1802 ; RV64ZVE32F-NEXT: bnez a0, .LBB28_15
1803 ; RV64ZVE32F-NEXT: .LBB28_7: # %else12
1804 ; RV64ZVE32F-NEXT: andi a0, a3, -128
1805 ; RV64ZVE32F-NEXT: bnez a0, .LBB28_16
1806 ; RV64ZVE32F-NEXT: .LBB28_8: # %else14
1807 ; RV64ZVE32F-NEXT: ret
1808 ; RV64ZVE32F-NEXT: .LBB28_9: # %cond.store
1809 ; RV64ZVE32F-NEXT: ld a0, 0(a0)
1810 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1811 ; RV64ZVE32F-NEXT: vse32.v v8, (a0)
1812 ; RV64ZVE32F-NEXT: andi a0, a3, 2
1813 ; RV64ZVE32F-NEXT: beqz a0, .LBB28_2
1814 ; RV64ZVE32F-NEXT: .LBB28_10: # %cond.store1
1815 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1816 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
1817 ; RV64ZVE32F-NEXT: vse32.v v10, (t0)
1818 ; RV64ZVE32F-NEXT: andi a0, a3, 4
1819 ; RV64ZVE32F-NEXT: beqz a0, .LBB28_3
1820 ; RV64ZVE32F-NEXT: .LBB28_11: # %cond.store3
1821 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1822 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 2
1823 ; RV64ZVE32F-NEXT: vse32.v v10, (a7)
1824 ; RV64ZVE32F-NEXT: andi a0, a3, 8
1825 ; RV64ZVE32F-NEXT: beqz a0, .LBB28_4
1826 ; RV64ZVE32F-NEXT: .LBB28_12: # %cond.store5
1827 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1828 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
1829 ; RV64ZVE32F-NEXT: vse32.v v10, (a6)
1830 ; RV64ZVE32F-NEXT: andi a0, a3, 16
1831 ; RV64ZVE32F-NEXT: beqz a0, .LBB28_5
1832 ; RV64ZVE32F-NEXT: .LBB28_13: # %cond.store7
1833 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
1834 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
1835 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1836 ; RV64ZVE32F-NEXT: vse32.v v10, (a5)
1837 ; RV64ZVE32F-NEXT: andi a0, a3, 32
1838 ; RV64ZVE32F-NEXT: beqz a0, .LBB28_6
1839 ; RV64ZVE32F-NEXT: .LBB28_14: # %cond.store9
1840 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
1841 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 5
1842 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1843 ; RV64ZVE32F-NEXT: vse32.v v10, (a4)
1844 ; RV64ZVE32F-NEXT: andi a0, a3, 64
1845 ; RV64ZVE32F-NEXT: beqz a0, .LBB28_7
1846 ; RV64ZVE32F-NEXT: .LBB28_15: # %cond.store11
1847 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
1848 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 6
1849 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1850 ; RV64ZVE32F-NEXT: vse32.v v10, (a2)
1851 ; RV64ZVE32F-NEXT: andi a0, a3, -128
1852 ; RV64ZVE32F-NEXT: beqz a0, .LBB28_8
1853 ; RV64ZVE32F-NEXT: .LBB28_16: # %cond.store13
1854 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
1855 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
1856 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1857 ; RV64ZVE32F-NEXT: vse32.v v8, (a1)
1858 ; RV64ZVE32F-NEXT: ret
1859 call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %val, <8 x ptr> %ptrs, i32 4, <8 x i1> %m)
1863 define void @mscatter_baseidx_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) {
1864 ; RV32-LABEL: mscatter_baseidx_v8i8_v8i32:
1866 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1867 ; RV32-NEXT: vsext.vf4 v12, v10
1868 ; RV32-NEXT: vsll.vi v10, v12, 2
1869 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
1872 ; RV64-LABEL: mscatter_baseidx_v8i8_v8i32:
1874 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
1875 ; RV64-NEXT: vsext.vf8 v12, v10
1876 ; RV64-NEXT: vsll.vi v12, v12, 2
1877 ; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
1878 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
1881 ; RV64ZVE32F-LABEL: mscatter_baseidx_v8i8_v8i32:
1882 ; RV64ZVE32F: # %bb.0:
1883 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
1884 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
1885 ; RV64ZVE32F-NEXT: andi a2, a1, 1
1886 ; RV64ZVE32F-NEXT: beqz a2, .LBB29_2
1887 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
1888 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
1889 ; RV64ZVE32F-NEXT: slli a2, a2, 2
1890 ; RV64ZVE32F-NEXT: add a2, a0, a2
1891 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1892 ; RV64ZVE32F-NEXT: vse32.v v8, (a2)
1893 ; RV64ZVE32F-NEXT: .LBB29_2: # %else
1894 ; RV64ZVE32F-NEXT: andi a2, a1, 2
1895 ; RV64ZVE32F-NEXT: beqz a2, .LBB29_4
1896 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
1897 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1898 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1
1899 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
1900 ; RV64ZVE32F-NEXT: slli a2, a2, 2
1901 ; RV64ZVE32F-NEXT: add a2, a0, a2
1902 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
1903 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 1
1904 ; RV64ZVE32F-NEXT: vse32.v v11, (a2)
1905 ; RV64ZVE32F-NEXT: .LBB29_4: # %else2
1906 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
1907 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4
1908 ; RV64ZVE32F-NEXT: andi a2, a1, 4
1909 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
1910 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
1911 ; RV64ZVE32F-NEXT: bnez a2, .LBB29_12
1912 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
1913 ; RV64ZVE32F-NEXT: andi a2, a1, 8
1914 ; RV64ZVE32F-NEXT: bnez a2, .LBB29_13
1915 ; RV64ZVE32F-NEXT: .LBB29_6: # %else6
1916 ; RV64ZVE32F-NEXT: andi a2, a1, 16
1917 ; RV64ZVE32F-NEXT: bnez a2, .LBB29_14
1918 ; RV64ZVE32F-NEXT: .LBB29_7: # %else8
1919 ; RV64ZVE32F-NEXT: andi a2, a1, 32
1920 ; RV64ZVE32F-NEXT: beqz a2, .LBB29_9
1921 ; RV64ZVE32F-NEXT: .LBB29_8: # %cond.store9
1922 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1923 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 1
1924 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
1925 ; RV64ZVE32F-NEXT: slli a2, a2, 2
1926 ; RV64ZVE32F-NEXT: add a2, a0, a2
1927 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
1928 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5
1929 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1930 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
1931 ; RV64ZVE32F-NEXT: .LBB29_9: # %else10
1932 ; RV64ZVE32F-NEXT: andi a2, a1, 64
1933 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
1934 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2
1935 ; RV64ZVE32F-NEXT: bnez a2, .LBB29_15
1936 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
1937 ; RV64ZVE32F-NEXT: andi a1, a1, -128
1938 ; RV64ZVE32F-NEXT: bnez a1, .LBB29_16
1939 ; RV64ZVE32F-NEXT: .LBB29_11: # %else14
1940 ; RV64ZVE32F-NEXT: ret
1941 ; RV64ZVE32F-NEXT: .LBB29_12: # %cond.store3
1942 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
1943 ; RV64ZVE32F-NEXT: slli a2, a2, 2
1944 ; RV64ZVE32F-NEXT: add a2, a0, a2
1945 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
1946 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2
1947 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1948 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
1949 ; RV64ZVE32F-NEXT: andi a2, a1, 8
1950 ; RV64ZVE32F-NEXT: beqz a2, .LBB29_6
1951 ; RV64ZVE32F-NEXT: .LBB29_13: # %cond.store5
1952 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1953 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
1954 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
1955 ; RV64ZVE32F-NEXT: slli a2, a2, 2
1956 ; RV64ZVE32F-NEXT: add a2, a0, a2
1957 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
1958 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
1959 ; RV64ZVE32F-NEXT: vse32.v v10, (a2)
1960 ; RV64ZVE32F-NEXT: andi a2, a1, 16
1961 ; RV64ZVE32F-NEXT: beqz a2, .LBB29_7
1962 ; RV64ZVE32F-NEXT: .LBB29_14: # %cond.store7
1963 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
1964 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
1965 ; RV64ZVE32F-NEXT: slli a2, a2, 2
1966 ; RV64ZVE32F-NEXT: add a2, a0, a2
1967 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
1968 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
1969 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1970 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
1971 ; RV64ZVE32F-NEXT: andi a2, a1, 32
1972 ; RV64ZVE32F-NEXT: bnez a2, .LBB29_8
1973 ; RV64ZVE32F-NEXT: j .LBB29_9
1974 ; RV64ZVE32F-NEXT: .LBB29_15: # %cond.store11
1975 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
1976 ; RV64ZVE32F-NEXT: slli a2, a2, 2
1977 ; RV64ZVE32F-NEXT: add a2, a0, a2
1978 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
1979 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6
1980 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1981 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
1982 ; RV64ZVE32F-NEXT: andi a1, a1, -128
1983 ; RV64ZVE32F-NEXT: beqz a1, .LBB29_11
1984 ; RV64ZVE32F-NEXT: .LBB29_16: # %cond.store13
1985 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1986 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
1987 ; RV64ZVE32F-NEXT: vmv.x.s a1, v10
1988 ; RV64ZVE32F-NEXT: slli a1, a1, 2
1989 ; RV64ZVE32F-NEXT: add a0, a0, a1
1990 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
1991 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
1992 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1993 ; RV64ZVE32F-NEXT: vse32.v v8, (a0)
1994 ; RV64ZVE32F-NEXT: ret
1995 %ptrs = getelementptr inbounds i32, ptr %base, <8 x i8> %idxs
1996 call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %val, <8 x ptr> %ptrs, i32 4, <8 x i1> %m)
2000 define void @mscatter_baseidx_sext_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) {
2001 ; RV32-LABEL: mscatter_baseidx_sext_v8i8_v8i32:
2003 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
2004 ; RV32-NEXT: vsext.vf4 v12, v10
2005 ; RV32-NEXT: vsll.vi v10, v12, 2
2006 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
2009 ; RV64-LABEL: mscatter_baseidx_sext_v8i8_v8i32:
2011 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
2012 ; RV64-NEXT: vsext.vf8 v12, v10
2013 ; RV64-NEXT: vsll.vi v12, v12, 2
2014 ; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
2015 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
2018 ; RV64ZVE32F-LABEL: mscatter_baseidx_sext_v8i8_v8i32:
2019 ; RV64ZVE32F: # %bb.0:
2020 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
2021 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
2022 ; RV64ZVE32F-NEXT: andi a2, a1, 1
2023 ; RV64ZVE32F-NEXT: beqz a2, .LBB30_2
2024 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
2025 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2026 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2027 ; RV64ZVE32F-NEXT: add a2, a0, a2
2028 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2029 ; RV64ZVE32F-NEXT: vse32.v v8, (a2)
2030 ; RV64ZVE32F-NEXT: .LBB30_2: # %else
2031 ; RV64ZVE32F-NEXT: andi a2, a1, 2
2032 ; RV64ZVE32F-NEXT: beqz a2, .LBB30_4
2033 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
2034 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
2035 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1
2036 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
2037 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2038 ; RV64ZVE32F-NEXT: add a2, a0, a2
2039 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2040 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 1
2041 ; RV64ZVE32F-NEXT: vse32.v v11, (a2)
2042 ; RV64ZVE32F-NEXT: .LBB30_4: # %else2
2043 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
2044 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4
2045 ; RV64ZVE32F-NEXT: andi a2, a1, 4
2046 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
2047 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
2048 ; RV64ZVE32F-NEXT: bnez a2, .LBB30_12
2049 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
2050 ; RV64ZVE32F-NEXT: andi a2, a1, 8
2051 ; RV64ZVE32F-NEXT: bnez a2, .LBB30_13
2052 ; RV64ZVE32F-NEXT: .LBB30_6: # %else6
2053 ; RV64ZVE32F-NEXT: andi a2, a1, 16
2054 ; RV64ZVE32F-NEXT: bnez a2, .LBB30_14
2055 ; RV64ZVE32F-NEXT: .LBB30_7: # %else8
2056 ; RV64ZVE32F-NEXT: andi a2, a1, 32
2057 ; RV64ZVE32F-NEXT: beqz a2, .LBB30_9
2058 ; RV64ZVE32F-NEXT: .LBB30_8: # %cond.store9
2059 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
2060 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 1
2061 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2062 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2063 ; RV64ZVE32F-NEXT: add a2, a0, a2
2064 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
2065 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5
2066 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2067 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
2068 ; RV64ZVE32F-NEXT: .LBB30_9: # %else10
2069 ; RV64ZVE32F-NEXT: andi a2, a1, 64
2070 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
2071 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2
2072 ; RV64ZVE32F-NEXT: bnez a2, .LBB30_15
2073 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
2074 ; RV64ZVE32F-NEXT: andi a1, a1, -128
2075 ; RV64ZVE32F-NEXT: bnez a1, .LBB30_16
2076 ; RV64ZVE32F-NEXT: .LBB30_11: # %else14
2077 ; RV64ZVE32F-NEXT: ret
2078 ; RV64ZVE32F-NEXT: .LBB30_12: # %cond.store3
2079 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2080 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2081 ; RV64ZVE32F-NEXT: add a2, a0, a2
2082 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2083 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2
2084 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2085 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
2086 ; RV64ZVE32F-NEXT: andi a2, a1, 8
2087 ; RV64ZVE32F-NEXT: beqz a2, .LBB30_6
2088 ; RV64ZVE32F-NEXT: .LBB30_13: # %cond.store5
2089 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
2090 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
2091 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2092 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2093 ; RV64ZVE32F-NEXT: add a2, a0, a2
2094 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2095 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
2096 ; RV64ZVE32F-NEXT: vse32.v v10, (a2)
2097 ; RV64ZVE32F-NEXT: andi a2, a1, 16
2098 ; RV64ZVE32F-NEXT: beqz a2, .LBB30_7
2099 ; RV64ZVE32F-NEXT: .LBB30_14: # %cond.store7
2100 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
2101 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
2102 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2103 ; RV64ZVE32F-NEXT: add a2, a0, a2
2104 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
2105 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
2106 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2107 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
2108 ; RV64ZVE32F-NEXT: andi a2, a1, 32
2109 ; RV64ZVE32F-NEXT: bnez a2, .LBB30_8
2110 ; RV64ZVE32F-NEXT: j .LBB30_9
2111 ; RV64ZVE32F-NEXT: .LBB30_15: # %cond.store11
2112 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2113 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2114 ; RV64ZVE32F-NEXT: add a2, a0, a2
2115 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
2116 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6
2117 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2118 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
2119 ; RV64ZVE32F-NEXT: andi a1, a1, -128
2120 ; RV64ZVE32F-NEXT: beqz a1, .LBB30_11
2121 ; RV64ZVE32F-NEXT: .LBB30_16: # %cond.store13
2122 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
2123 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
2124 ; RV64ZVE32F-NEXT: vmv.x.s a1, v10
2125 ; RV64ZVE32F-NEXT: slli a1, a1, 2
2126 ; RV64ZVE32F-NEXT: add a0, a0, a1
2127 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
2128 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
2129 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2130 ; RV64ZVE32F-NEXT: vse32.v v8, (a0)
2131 ; RV64ZVE32F-NEXT: ret
2132 %eidxs = sext <8 x i8> %idxs to <8 x i32>
2133 %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %eidxs
2134 call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %val, <8 x ptr> %ptrs, i32 4, <8 x i1> %m)
2138 define void @mscatter_baseidx_zext_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) {
2139 ; RV32-LABEL: mscatter_baseidx_zext_v8i8_v8i32:
2141 ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
2142 ; RV32-NEXT: vzext.vf2 v11, v10
2143 ; RV32-NEXT: vsll.vi v10, v11, 2
2144 ; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
2145 ; RV32-NEXT: vsoxei16.v v8, (a0), v10, v0.t
2148 ; RV64-LABEL: mscatter_baseidx_zext_v8i8_v8i32:
2150 ; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
2151 ; RV64-NEXT: vzext.vf2 v11, v10
2152 ; RV64-NEXT: vsll.vi v10, v11, 2
2153 ; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
2154 ; RV64-NEXT: vsoxei16.v v8, (a0), v10, v0.t
2157 ; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i8_v8i32:
2158 ; RV64ZVE32F: # %bb.0:
2159 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
2160 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
2161 ; RV64ZVE32F-NEXT: andi a2, a1, 1
2162 ; RV64ZVE32F-NEXT: beqz a2, .LBB31_2
2163 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
2164 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2165 ; RV64ZVE32F-NEXT: andi a2, a2, 255
2166 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2167 ; RV64ZVE32F-NEXT: add a2, a0, a2
2168 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2169 ; RV64ZVE32F-NEXT: vse32.v v8, (a2)
2170 ; RV64ZVE32F-NEXT: .LBB31_2: # %else
2171 ; RV64ZVE32F-NEXT: andi a2, a1, 2
2172 ; RV64ZVE32F-NEXT: beqz a2, .LBB31_4
2173 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
2174 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
2175 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1
2176 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
2177 ; RV64ZVE32F-NEXT: andi a2, a2, 255
2178 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2179 ; RV64ZVE32F-NEXT: add a2, a0, a2
2180 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2181 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 1
2182 ; RV64ZVE32F-NEXT: vse32.v v11, (a2)
2183 ; RV64ZVE32F-NEXT: .LBB31_4: # %else2
2184 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
2185 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4
2186 ; RV64ZVE32F-NEXT: andi a2, a1, 4
2187 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
2188 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
2189 ; RV64ZVE32F-NEXT: bnez a2, .LBB31_12
2190 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
2191 ; RV64ZVE32F-NEXT: andi a2, a1, 8
2192 ; RV64ZVE32F-NEXT: bnez a2, .LBB31_13
2193 ; RV64ZVE32F-NEXT: .LBB31_6: # %else6
2194 ; RV64ZVE32F-NEXT: andi a2, a1, 16
2195 ; RV64ZVE32F-NEXT: bnez a2, .LBB31_14
2196 ; RV64ZVE32F-NEXT: .LBB31_7: # %else8
2197 ; RV64ZVE32F-NEXT: andi a2, a1, 32
2198 ; RV64ZVE32F-NEXT: beqz a2, .LBB31_9
2199 ; RV64ZVE32F-NEXT: .LBB31_8: # %cond.store9
2200 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
2201 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 1
2202 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2203 ; RV64ZVE32F-NEXT: andi a2, a2, 255
2204 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2205 ; RV64ZVE32F-NEXT: add a2, a0, a2
2206 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
2207 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5
2208 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2209 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
2210 ; RV64ZVE32F-NEXT: .LBB31_9: # %else10
2211 ; RV64ZVE32F-NEXT: andi a2, a1, 64
2212 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
2213 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2
2214 ; RV64ZVE32F-NEXT: bnez a2, .LBB31_15
2215 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
2216 ; RV64ZVE32F-NEXT: andi a1, a1, -128
2217 ; RV64ZVE32F-NEXT: bnez a1, .LBB31_16
2218 ; RV64ZVE32F-NEXT: .LBB31_11: # %else14
2219 ; RV64ZVE32F-NEXT: ret
2220 ; RV64ZVE32F-NEXT: .LBB31_12: # %cond.store3
2221 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2222 ; RV64ZVE32F-NEXT: andi a2, a2, 255
2223 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2224 ; RV64ZVE32F-NEXT: add a2, a0, a2
2225 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2226 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2
2227 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2228 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
2229 ; RV64ZVE32F-NEXT: andi a2, a1, 8
2230 ; RV64ZVE32F-NEXT: beqz a2, .LBB31_6
2231 ; RV64ZVE32F-NEXT: .LBB31_13: # %cond.store5
2232 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
2233 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
2234 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2235 ; RV64ZVE32F-NEXT: andi a2, a2, 255
2236 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2237 ; RV64ZVE32F-NEXT: add a2, a0, a2
2238 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2239 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
2240 ; RV64ZVE32F-NEXT: vse32.v v10, (a2)
2241 ; RV64ZVE32F-NEXT: andi a2, a1, 16
2242 ; RV64ZVE32F-NEXT: beqz a2, .LBB31_7
2243 ; RV64ZVE32F-NEXT: .LBB31_14: # %cond.store7
2244 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
2245 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
2246 ; RV64ZVE32F-NEXT: andi a2, a2, 255
2247 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2248 ; RV64ZVE32F-NEXT: add a2, a0, a2
2249 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
2250 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
2251 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2252 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
2253 ; RV64ZVE32F-NEXT: andi a2, a1, 32
2254 ; RV64ZVE32F-NEXT: bnez a2, .LBB31_8
2255 ; RV64ZVE32F-NEXT: j .LBB31_9
2256 ; RV64ZVE32F-NEXT: .LBB31_15: # %cond.store11
2257 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2258 ; RV64ZVE32F-NEXT: andi a2, a2, 255
2259 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2260 ; RV64ZVE32F-NEXT: add a2, a0, a2
2261 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
2262 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6
2263 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2264 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
2265 ; RV64ZVE32F-NEXT: andi a1, a1, -128
2266 ; RV64ZVE32F-NEXT: beqz a1, .LBB31_11
2267 ; RV64ZVE32F-NEXT: .LBB31_16: # %cond.store13
2268 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
2269 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
2270 ; RV64ZVE32F-NEXT: vmv.x.s a1, v10
2271 ; RV64ZVE32F-NEXT: andi a1, a1, 255
2272 ; RV64ZVE32F-NEXT: slli a1, a1, 2
2273 ; RV64ZVE32F-NEXT: add a0, a0, a1
2274 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
2275 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
2276 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2277 ; RV64ZVE32F-NEXT: vse32.v v8, (a0)
2278 ; RV64ZVE32F-NEXT: ret
2279 %eidxs = zext <8 x i8> %idxs to <8 x i32>
2280 %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %eidxs
2281 call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %val, <8 x ptr> %ptrs, i32 4, <8 x i1> %m)
2285 define void @mscatter_baseidx_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m) {
2286 ; RV32-LABEL: mscatter_baseidx_v8i16_v8i32:
2288 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
2289 ; RV32-NEXT: vsext.vf2 v12, v10
2290 ; RV32-NEXT: vsll.vi v10, v12, 2
2291 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
2294 ; RV64-LABEL: mscatter_baseidx_v8i16_v8i32:
2296 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
2297 ; RV64-NEXT: vsext.vf4 v12, v10
2298 ; RV64-NEXT: vsll.vi v12, v12, 2
2299 ; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
2300 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
2303 ; RV64ZVE32F-LABEL: mscatter_baseidx_v8i16_v8i32:
2304 ; RV64ZVE32F: # %bb.0:
2305 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
2306 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
2307 ; RV64ZVE32F-NEXT: andi a2, a1, 1
2308 ; RV64ZVE32F-NEXT: beqz a2, .LBB32_2
2309 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
2310 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
2311 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2312 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2313 ; RV64ZVE32F-NEXT: add a2, a0, a2
2314 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2315 ; RV64ZVE32F-NEXT: vse32.v v8, (a2)
2316 ; RV64ZVE32F-NEXT: .LBB32_2: # %else
2317 ; RV64ZVE32F-NEXT: andi a2, a1, 2
2318 ; RV64ZVE32F-NEXT: beqz a2, .LBB32_4
2319 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
2320 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
2321 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1
2322 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
2323 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2324 ; RV64ZVE32F-NEXT: add a2, a0, a2
2325 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2326 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 1
2327 ; RV64ZVE32F-NEXT: vse32.v v11, (a2)
2328 ; RV64ZVE32F-NEXT: .LBB32_4: # %else2
2329 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
2330 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4
2331 ; RV64ZVE32F-NEXT: andi a2, a1, 4
2332 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
2333 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
2334 ; RV64ZVE32F-NEXT: bnez a2, .LBB32_12
2335 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
2336 ; RV64ZVE32F-NEXT: andi a2, a1, 8
2337 ; RV64ZVE32F-NEXT: bnez a2, .LBB32_13
2338 ; RV64ZVE32F-NEXT: .LBB32_6: # %else6
2339 ; RV64ZVE32F-NEXT: andi a2, a1, 16
2340 ; RV64ZVE32F-NEXT: bnez a2, .LBB32_14
2341 ; RV64ZVE32F-NEXT: .LBB32_7: # %else8
2342 ; RV64ZVE32F-NEXT: andi a2, a1, 32
2343 ; RV64ZVE32F-NEXT: beqz a2, .LBB32_9
2344 ; RV64ZVE32F-NEXT: .LBB32_8: # %cond.store9
2345 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
2346 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 1
2347 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2348 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2349 ; RV64ZVE32F-NEXT: add a2, a0, a2
2350 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
2351 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5
2352 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2353 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
2354 ; RV64ZVE32F-NEXT: .LBB32_9: # %else10
2355 ; RV64ZVE32F-NEXT: andi a2, a1, 64
2356 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
2357 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2
2358 ; RV64ZVE32F-NEXT: bnez a2, .LBB32_15
2359 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
2360 ; RV64ZVE32F-NEXT: andi a1, a1, -128
2361 ; RV64ZVE32F-NEXT: bnez a1, .LBB32_16
2362 ; RV64ZVE32F-NEXT: .LBB32_11: # %else14
2363 ; RV64ZVE32F-NEXT: ret
2364 ; RV64ZVE32F-NEXT: .LBB32_12: # %cond.store3
2365 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2366 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2367 ; RV64ZVE32F-NEXT: add a2, a0, a2
2368 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2369 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2
2370 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2371 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
2372 ; RV64ZVE32F-NEXT: andi a2, a1, 8
2373 ; RV64ZVE32F-NEXT: beqz a2, .LBB32_6
2374 ; RV64ZVE32F-NEXT: .LBB32_13: # %cond.store5
2375 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
2376 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
2377 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2378 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2379 ; RV64ZVE32F-NEXT: add a2, a0, a2
2380 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2381 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
2382 ; RV64ZVE32F-NEXT: vse32.v v10, (a2)
2383 ; RV64ZVE32F-NEXT: andi a2, a1, 16
2384 ; RV64ZVE32F-NEXT: beqz a2, .LBB32_7
2385 ; RV64ZVE32F-NEXT: .LBB32_14: # %cond.store7
2386 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
2387 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
2388 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2389 ; RV64ZVE32F-NEXT: add a2, a0, a2
2390 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
2391 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
2392 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2393 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
2394 ; RV64ZVE32F-NEXT: andi a2, a1, 32
2395 ; RV64ZVE32F-NEXT: bnez a2, .LBB32_8
2396 ; RV64ZVE32F-NEXT: j .LBB32_9
2397 ; RV64ZVE32F-NEXT: .LBB32_15: # %cond.store11
2398 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2399 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2400 ; RV64ZVE32F-NEXT: add a2, a0, a2
2401 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
2402 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6
2403 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2404 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
2405 ; RV64ZVE32F-NEXT: andi a1, a1, -128
2406 ; RV64ZVE32F-NEXT: beqz a1, .LBB32_11
2407 ; RV64ZVE32F-NEXT: .LBB32_16: # %cond.store13
2408 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
2409 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
2410 ; RV64ZVE32F-NEXT: vmv.x.s a1, v10
2411 ; RV64ZVE32F-NEXT: slli a1, a1, 2
2412 ; RV64ZVE32F-NEXT: add a0, a0, a1
2413 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
2414 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
2415 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2416 ; RV64ZVE32F-NEXT: vse32.v v8, (a0)
2417 ; RV64ZVE32F-NEXT: ret
2418 %ptrs = getelementptr inbounds i32, ptr %base, <8 x i16> %idxs
2419 call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %val, <8 x ptr> %ptrs, i32 4, <8 x i1> %m)
2423 define void @mscatter_baseidx_sext_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m) {
2424 ; RV32-LABEL: mscatter_baseidx_sext_v8i16_v8i32:
2426 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
2427 ; RV32-NEXT: vsext.vf2 v12, v10
2428 ; RV32-NEXT: vsll.vi v10, v12, 2
2429 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
2432 ; RV64-LABEL: mscatter_baseidx_sext_v8i16_v8i32:
2434 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
2435 ; RV64-NEXT: vsext.vf4 v12, v10
2436 ; RV64-NEXT: vsll.vi v12, v12, 2
2437 ; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
2438 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
2441 ; RV64ZVE32F-LABEL: mscatter_baseidx_sext_v8i16_v8i32:
2442 ; RV64ZVE32F: # %bb.0:
2443 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
2444 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
2445 ; RV64ZVE32F-NEXT: andi a2, a1, 1
2446 ; RV64ZVE32F-NEXT: beqz a2, .LBB33_2
2447 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
2448 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
2449 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2450 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2451 ; RV64ZVE32F-NEXT: add a2, a0, a2
2452 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2453 ; RV64ZVE32F-NEXT: vse32.v v8, (a2)
2454 ; RV64ZVE32F-NEXT: .LBB33_2: # %else
2455 ; RV64ZVE32F-NEXT: andi a2, a1, 2
2456 ; RV64ZVE32F-NEXT: beqz a2, .LBB33_4
2457 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
2458 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
2459 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1
2460 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
2461 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2462 ; RV64ZVE32F-NEXT: add a2, a0, a2
2463 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2464 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 1
2465 ; RV64ZVE32F-NEXT: vse32.v v11, (a2)
2466 ; RV64ZVE32F-NEXT: .LBB33_4: # %else2
2467 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
2468 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4
2469 ; RV64ZVE32F-NEXT: andi a2, a1, 4
2470 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
2471 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
2472 ; RV64ZVE32F-NEXT: bnez a2, .LBB33_12
2473 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
2474 ; RV64ZVE32F-NEXT: andi a2, a1, 8
2475 ; RV64ZVE32F-NEXT: bnez a2, .LBB33_13
2476 ; RV64ZVE32F-NEXT: .LBB33_6: # %else6
2477 ; RV64ZVE32F-NEXT: andi a2, a1, 16
2478 ; RV64ZVE32F-NEXT: bnez a2, .LBB33_14
2479 ; RV64ZVE32F-NEXT: .LBB33_7: # %else8
2480 ; RV64ZVE32F-NEXT: andi a2, a1, 32
2481 ; RV64ZVE32F-NEXT: beqz a2, .LBB33_9
2482 ; RV64ZVE32F-NEXT: .LBB33_8: # %cond.store9
2483 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
2484 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 1
2485 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2486 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2487 ; RV64ZVE32F-NEXT: add a2, a0, a2
2488 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
2489 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5
2490 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2491 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
2492 ; RV64ZVE32F-NEXT: .LBB33_9: # %else10
2493 ; RV64ZVE32F-NEXT: andi a2, a1, 64
2494 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
2495 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2
2496 ; RV64ZVE32F-NEXT: bnez a2, .LBB33_15
2497 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
2498 ; RV64ZVE32F-NEXT: andi a1, a1, -128
2499 ; RV64ZVE32F-NEXT: bnez a1, .LBB33_16
2500 ; RV64ZVE32F-NEXT: .LBB33_11: # %else14
2501 ; RV64ZVE32F-NEXT: ret
2502 ; RV64ZVE32F-NEXT: .LBB33_12: # %cond.store3
2503 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2504 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2505 ; RV64ZVE32F-NEXT: add a2, a0, a2
2506 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2507 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2
2508 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2509 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
2510 ; RV64ZVE32F-NEXT: andi a2, a1, 8
2511 ; RV64ZVE32F-NEXT: beqz a2, .LBB33_6
2512 ; RV64ZVE32F-NEXT: .LBB33_13: # %cond.store5
2513 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
2514 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
2515 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2516 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2517 ; RV64ZVE32F-NEXT: add a2, a0, a2
2518 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2519 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
2520 ; RV64ZVE32F-NEXT: vse32.v v10, (a2)
2521 ; RV64ZVE32F-NEXT: andi a2, a1, 16
2522 ; RV64ZVE32F-NEXT: beqz a2, .LBB33_7
2523 ; RV64ZVE32F-NEXT: .LBB33_14: # %cond.store7
2524 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
2525 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
2526 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2527 ; RV64ZVE32F-NEXT: add a2, a0, a2
2528 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
2529 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
2530 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2531 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
2532 ; RV64ZVE32F-NEXT: andi a2, a1, 32
2533 ; RV64ZVE32F-NEXT: bnez a2, .LBB33_8
2534 ; RV64ZVE32F-NEXT: j .LBB33_9
2535 ; RV64ZVE32F-NEXT: .LBB33_15: # %cond.store11
2536 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2537 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2538 ; RV64ZVE32F-NEXT: add a2, a0, a2
2539 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
2540 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6
2541 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2542 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
2543 ; RV64ZVE32F-NEXT: andi a1, a1, -128
2544 ; RV64ZVE32F-NEXT: beqz a1, .LBB33_11
2545 ; RV64ZVE32F-NEXT: .LBB33_16: # %cond.store13
2546 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
2547 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
2548 ; RV64ZVE32F-NEXT: vmv.x.s a1, v10
2549 ; RV64ZVE32F-NEXT: slli a1, a1, 2
2550 ; RV64ZVE32F-NEXT: add a0, a0, a1
2551 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
2552 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
2553 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2554 ; RV64ZVE32F-NEXT: vse32.v v8, (a0)
2555 ; RV64ZVE32F-NEXT: ret
2556 %eidxs = sext <8 x i16> %idxs to <8 x i32>
2557 %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %eidxs
2558 call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %val, <8 x ptr> %ptrs, i32 4, <8 x i1> %m)
2562 define void @mscatter_baseidx_zext_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m) {
2563 ; RV32-LABEL: mscatter_baseidx_zext_v8i16_v8i32:
2565 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
2566 ; RV32-NEXT: vzext.vf2 v12, v10
2567 ; RV32-NEXT: vsll.vi v10, v12, 2
2568 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
2571 ; RV64-LABEL: mscatter_baseidx_zext_v8i16_v8i32:
2573 ; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma
2574 ; RV64-NEXT: vzext.vf2 v12, v10
2575 ; RV64-NEXT: vsll.vi v10, v12, 2
2576 ; RV64-NEXT: vsoxei32.v v8, (a0), v10, v0.t
2579 ; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i16_v8i32:
2580 ; RV64ZVE32F: # %bb.0:
2581 ; RV64ZVE32F-NEXT: lui a1, 16
2582 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
2583 ; RV64ZVE32F-NEXT: vmv.x.s a2, v0
2584 ; RV64ZVE32F-NEXT: andi a3, a2, 1
2585 ; RV64ZVE32F-NEXT: addiw a1, a1, -1
2586 ; RV64ZVE32F-NEXT: beqz a3, .LBB34_2
2587 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
2588 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
2589 ; RV64ZVE32F-NEXT: vmv.x.s a3, v10
2590 ; RV64ZVE32F-NEXT: and a3, a3, a1
2591 ; RV64ZVE32F-NEXT: slli a3, a3, 2
2592 ; RV64ZVE32F-NEXT: add a3, a0, a3
2593 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2594 ; RV64ZVE32F-NEXT: vse32.v v8, (a3)
2595 ; RV64ZVE32F-NEXT: .LBB34_2: # %else
2596 ; RV64ZVE32F-NEXT: andi a3, a2, 2
2597 ; RV64ZVE32F-NEXT: beqz a3, .LBB34_4
2598 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
2599 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
2600 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1
2601 ; RV64ZVE32F-NEXT: vmv.x.s a3, v11
2602 ; RV64ZVE32F-NEXT: and a3, a3, a1
2603 ; RV64ZVE32F-NEXT: slli a3, a3, 2
2604 ; RV64ZVE32F-NEXT: add a3, a0, a3
2605 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2606 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 1
2607 ; RV64ZVE32F-NEXT: vse32.v v11, (a3)
2608 ; RV64ZVE32F-NEXT: .LBB34_4: # %else2
2609 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
2610 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4
2611 ; RV64ZVE32F-NEXT: andi a3, a2, 4
2612 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
2613 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
2614 ; RV64ZVE32F-NEXT: bnez a3, .LBB34_12
2615 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
2616 ; RV64ZVE32F-NEXT: andi a3, a2, 8
2617 ; RV64ZVE32F-NEXT: bnez a3, .LBB34_13
2618 ; RV64ZVE32F-NEXT: .LBB34_6: # %else6
2619 ; RV64ZVE32F-NEXT: andi a3, a2, 16
2620 ; RV64ZVE32F-NEXT: bnez a3, .LBB34_14
2621 ; RV64ZVE32F-NEXT: .LBB34_7: # %else8
2622 ; RV64ZVE32F-NEXT: andi a3, a2, 32
2623 ; RV64ZVE32F-NEXT: beqz a3, .LBB34_9
2624 ; RV64ZVE32F-NEXT: .LBB34_8: # %cond.store9
2625 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
2626 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 1
2627 ; RV64ZVE32F-NEXT: vmv.x.s a3, v10
2628 ; RV64ZVE32F-NEXT: and a3, a3, a1
2629 ; RV64ZVE32F-NEXT: slli a3, a3, 2
2630 ; RV64ZVE32F-NEXT: add a3, a0, a3
2631 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
2632 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5
2633 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2634 ; RV64ZVE32F-NEXT: vse32.v v12, (a3)
2635 ; RV64ZVE32F-NEXT: .LBB34_9: # %else10
2636 ; RV64ZVE32F-NEXT: andi a3, a2, 64
2637 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
2638 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2
2639 ; RV64ZVE32F-NEXT: bnez a3, .LBB34_15
2640 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
2641 ; RV64ZVE32F-NEXT: andi a2, a2, -128
2642 ; RV64ZVE32F-NEXT: bnez a2, .LBB34_16
2643 ; RV64ZVE32F-NEXT: .LBB34_11: # %else14
2644 ; RV64ZVE32F-NEXT: ret
2645 ; RV64ZVE32F-NEXT: .LBB34_12: # %cond.store3
2646 ; RV64ZVE32F-NEXT: vmv.x.s a3, v10
2647 ; RV64ZVE32F-NEXT: and a3, a3, a1
2648 ; RV64ZVE32F-NEXT: slli a3, a3, 2
2649 ; RV64ZVE32F-NEXT: add a3, a0, a3
2650 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2651 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2
2652 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2653 ; RV64ZVE32F-NEXT: vse32.v v12, (a3)
2654 ; RV64ZVE32F-NEXT: andi a3, a2, 8
2655 ; RV64ZVE32F-NEXT: beqz a3, .LBB34_6
2656 ; RV64ZVE32F-NEXT: .LBB34_13: # %cond.store5
2657 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
2658 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
2659 ; RV64ZVE32F-NEXT: vmv.x.s a3, v10
2660 ; RV64ZVE32F-NEXT: and a3, a3, a1
2661 ; RV64ZVE32F-NEXT: slli a3, a3, 2
2662 ; RV64ZVE32F-NEXT: add a3, a0, a3
2663 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2664 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
2665 ; RV64ZVE32F-NEXT: vse32.v v10, (a3)
2666 ; RV64ZVE32F-NEXT: andi a3, a2, 16
2667 ; RV64ZVE32F-NEXT: beqz a3, .LBB34_7
2668 ; RV64ZVE32F-NEXT: .LBB34_14: # %cond.store7
2669 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
2670 ; RV64ZVE32F-NEXT: vmv.x.s a3, v11
2671 ; RV64ZVE32F-NEXT: and a3, a3, a1
2672 ; RV64ZVE32F-NEXT: slli a3, a3, 2
2673 ; RV64ZVE32F-NEXT: add a3, a0, a3
2674 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
2675 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
2676 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2677 ; RV64ZVE32F-NEXT: vse32.v v12, (a3)
2678 ; RV64ZVE32F-NEXT: andi a3, a2, 32
2679 ; RV64ZVE32F-NEXT: bnez a3, .LBB34_8
2680 ; RV64ZVE32F-NEXT: j .LBB34_9
2681 ; RV64ZVE32F-NEXT: .LBB34_15: # %cond.store11
2682 ; RV64ZVE32F-NEXT: vmv.x.s a3, v10
2683 ; RV64ZVE32F-NEXT: and a3, a3, a1
2684 ; RV64ZVE32F-NEXT: slli a3, a3, 2
2685 ; RV64ZVE32F-NEXT: add a3, a0, a3
2686 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
2687 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6
2688 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2689 ; RV64ZVE32F-NEXT: vse32.v v12, (a3)
2690 ; RV64ZVE32F-NEXT: andi a2, a2, -128
2691 ; RV64ZVE32F-NEXT: beqz a2, .LBB34_11
2692 ; RV64ZVE32F-NEXT: .LBB34_16: # %cond.store13
2693 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
2694 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
2695 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2696 ; RV64ZVE32F-NEXT: and a1, a2, a1
2697 ; RV64ZVE32F-NEXT: slli a1, a1, 2
2698 ; RV64ZVE32F-NEXT: add a0, a0, a1
2699 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
2700 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
2701 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2702 ; RV64ZVE32F-NEXT: vse32.v v8, (a0)
2703 ; RV64ZVE32F-NEXT: ret
2704 %eidxs = zext <8 x i16> %idxs to <8 x i32>
2705 %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %eidxs
2706 call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %val, <8 x ptr> %ptrs, i32 4, <8 x i1> %m)
2710 define void @mscatter_baseidx_v8i32(<8 x i32> %val, ptr %base, <8 x i32> %idxs, <8 x i1> %m) {
2711 ; RV32-LABEL: mscatter_baseidx_v8i32:
2713 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
2714 ; RV32-NEXT: vsll.vi v10, v10, 2
2715 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
2718 ; RV64-LABEL: mscatter_baseidx_v8i32:
2720 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
2721 ; RV64-NEXT: vsext.vf2 v12, v10
2722 ; RV64-NEXT: vsll.vi v12, v12, 2
2723 ; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
2724 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
2727 ; RV64ZVE32F-LABEL: mscatter_baseidx_v8i32:
2728 ; RV64ZVE32F: # %bb.0:
2729 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
2730 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
2731 ; RV64ZVE32F-NEXT: andi a2, a1, 1
2732 ; RV64ZVE32F-NEXT: beqz a2, .LBB35_2
2733 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
2734 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2735 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2736 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2737 ; RV64ZVE32F-NEXT: add a2, a0, a2
2738 ; RV64ZVE32F-NEXT: vse32.v v8, (a2)
2739 ; RV64ZVE32F-NEXT: .LBB35_2: # %else
2740 ; RV64ZVE32F-NEXT: andi a2, a1, 2
2741 ; RV64ZVE32F-NEXT: beqz a2, .LBB35_4
2742 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
2743 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2744 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v10, 1
2745 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12
2746 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2747 ; RV64ZVE32F-NEXT: add a2, a0, a2
2748 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 1
2749 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
2750 ; RV64ZVE32F-NEXT: .LBB35_4: # %else2
2751 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
2752 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v10, 4
2753 ; RV64ZVE32F-NEXT: andi a2, a1, 4
2754 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
2755 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
2756 ; RV64ZVE32F-NEXT: bnez a2, .LBB35_12
2757 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
2758 ; RV64ZVE32F-NEXT: andi a2, a1, 8
2759 ; RV64ZVE32F-NEXT: bnez a2, .LBB35_13
2760 ; RV64ZVE32F-NEXT: .LBB35_6: # %else6
2761 ; RV64ZVE32F-NEXT: andi a2, a1, 16
2762 ; RV64ZVE32F-NEXT: bnez a2, .LBB35_14
2763 ; RV64ZVE32F-NEXT: .LBB35_7: # %else8
2764 ; RV64ZVE32F-NEXT: andi a2, a1, 32
2765 ; RV64ZVE32F-NEXT: beqz a2, .LBB35_9
2766 ; RV64ZVE32F-NEXT: .LBB35_8: # %cond.store9
2767 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2768 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v12, 1
2769 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2770 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2771 ; RV64ZVE32F-NEXT: add a2, a0, a2
2772 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
2773 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 5
2774 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2775 ; RV64ZVE32F-NEXT: vse32.v v10, (a2)
2776 ; RV64ZVE32F-NEXT: .LBB35_9: # %else10
2777 ; RV64ZVE32F-NEXT: andi a2, a1, 64
2778 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
2779 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v12, 2
2780 ; RV64ZVE32F-NEXT: bnez a2, .LBB35_15
2781 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
2782 ; RV64ZVE32F-NEXT: andi a1, a1, -128
2783 ; RV64ZVE32F-NEXT: bnez a1, .LBB35_16
2784 ; RV64ZVE32F-NEXT: .LBB35_11: # %else14
2785 ; RV64ZVE32F-NEXT: ret
2786 ; RV64ZVE32F-NEXT: .LBB35_12: # %cond.store3
2787 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2788 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2789 ; RV64ZVE32F-NEXT: add a2, a0, a2
2790 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2
2791 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2792 ; RV64ZVE32F-NEXT: vse32.v v11, (a2)
2793 ; RV64ZVE32F-NEXT: andi a2, a1, 8
2794 ; RV64ZVE32F-NEXT: beqz a2, .LBB35_6
2795 ; RV64ZVE32F-NEXT: .LBB35_13: # %cond.store5
2796 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2797 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
2798 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2799 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2800 ; RV64ZVE32F-NEXT: add a2, a0, a2
2801 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
2802 ; RV64ZVE32F-NEXT: vse32.v v10, (a2)
2803 ; RV64ZVE32F-NEXT: andi a2, a1, 16
2804 ; RV64ZVE32F-NEXT: beqz a2, .LBB35_7
2805 ; RV64ZVE32F-NEXT: .LBB35_14: # %cond.store7
2806 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
2807 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12
2808 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2809 ; RV64ZVE32F-NEXT: add a2, a0, a2
2810 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
2811 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2812 ; RV64ZVE32F-NEXT: vse32.v v10, (a2)
2813 ; RV64ZVE32F-NEXT: andi a2, a1, 32
2814 ; RV64ZVE32F-NEXT: bnez a2, .LBB35_8
2815 ; RV64ZVE32F-NEXT: j .LBB35_9
2816 ; RV64ZVE32F-NEXT: .LBB35_15: # %cond.store11
2817 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
2818 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2819 ; RV64ZVE32F-NEXT: add a2, a0, a2
2820 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
2821 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6
2822 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2823 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
2824 ; RV64ZVE32F-NEXT: andi a1, a1, -128
2825 ; RV64ZVE32F-NEXT: beqz a1, .LBB35_11
2826 ; RV64ZVE32F-NEXT: .LBB35_16: # %cond.store13
2827 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2828 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
2829 ; RV64ZVE32F-NEXT: vmv.x.s a1, v10
2830 ; RV64ZVE32F-NEXT: slli a1, a1, 2
2831 ; RV64ZVE32F-NEXT: add a0, a0, a1
2832 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
2833 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
2834 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2835 ; RV64ZVE32F-NEXT: vse32.v v8, (a0)
2836 ; RV64ZVE32F-NEXT: ret
2837 %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %idxs
2838 call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %val, <8 x ptr> %ptrs, i32 4, <8 x i1> %m)
2842 declare void @llvm.masked.scatter.v1i64.v1p0(<1 x i64>, <1 x ptr>, i32, <1 x i1>)
2844 define void @mscatter_v1i64(<1 x i64> %val, <1 x ptr> %ptrs, <1 x i1> %m) {
2845 ; RV32V-LABEL: mscatter_v1i64:
2847 ; RV32V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
2848 ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t
2851 ; RV64-LABEL: mscatter_v1i64:
2853 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
2854 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
2857 ; RV32ZVE32F-LABEL: mscatter_v1i64:
2858 ; RV32ZVE32F: # %bb.0:
2859 ; RV32ZVE32F-NEXT: vsetvli a2, zero, e8, mf4, ta, ma
2860 ; RV32ZVE32F-NEXT: vfirst.m a2, v0
2861 ; RV32ZVE32F-NEXT: bnez a2, .LBB36_2
2862 ; RV32ZVE32F-NEXT: # %bb.1: # %cond.store
2863 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2864 ; RV32ZVE32F-NEXT: vmv.x.s a2, v8
2865 ; RV32ZVE32F-NEXT: sw a1, 4(a2)
2866 ; RV32ZVE32F-NEXT: sw a0, 0(a2)
2867 ; RV32ZVE32F-NEXT: .LBB36_2: # %else
2868 ; RV32ZVE32F-NEXT: ret
2870 ; RV64ZVE32F-LABEL: mscatter_v1i64:
2871 ; RV64ZVE32F: # %bb.0:
2872 ; RV64ZVE32F-NEXT: vsetvli a2, zero, e8, mf4, ta, ma
2873 ; RV64ZVE32F-NEXT: vfirst.m a2, v0
2874 ; RV64ZVE32F-NEXT: bnez a2, .LBB36_2
2875 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
2876 ; RV64ZVE32F-NEXT: sd a0, 0(a1)
2877 ; RV64ZVE32F-NEXT: .LBB36_2: # %else
2878 ; RV64ZVE32F-NEXT: ret
2879 call void @llvm.masked.scatter.v1i64.v1p0(<1 x i64> %val, <1 x ptr> %ptrs, i32 8, <1 x i1> %m)
2883 declare void @llvm.masked.scatter.v2i64.v2p0(<2 x i64>, <2 x ptr>, i32, <2 x i1>)
2885 define void @mscatter_v2i64(<2 x i64> %val, <2 x ptr> %ptrs, <2 x i1> %m) {
2886 ; RV32V-LABEL: mscatter_v2i64:
2888 ; RV32V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
2889 ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t
2892 ; RV64-LABEL: mscatter_v2i64:
2894 ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
2895 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
2898 ; RV32ZVE32F-LABEL: mscatter_v2i64:
2899 ; RV32ZVE32F: # %bb.0:
2900 ; RV32ZVE32F-NEXT: lw a2, 12(a0)
2901 ; RV32ZVE32F-NEXT: lw a1, 8(a0)
2902 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
2903 ; RV32ZVE32F-NEXT: vmv.x.s a3, v0
2904 ; RV32ZVE32F-NEXT: andi a4, a3, 1
2905 ; RV32ZVE32F-NEXT: bnez a4, .LBB37_3
2906 ; RV32ZVE32F-NEXT: # %bb.1: # %else
2907 ; RV32ZVE32F-NEXT: andi a3, a3, 2
2908 ; RV32ZVE32F-NEXT: bnez a3, .LBB37_4
2909 ; RV32ZVE32F-NEXT: .LBB37_2: # %else2
2910 ; RV32ZVE32F-NEXT: ret
2911 ; RV32ZVE32F-NEXT: .LBB37_3: # %cond.store
2912 ; RV32ZVE32F-NEXT: lw a4, 4(a0)
2913 ; RV32ZVE32F-NEXT: lw a0, 0(a0)
2914 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
2915 ; RV32ZVE32F-NEXT: vmv.x.s a5, v8
2916 ; RV32ZVE32F-NEXT: sw a4, 4(a5)
2917 ; RV32ZVE32F-NEXT: sw a0, 0(a5)
2918 ; RV32ZVE32F-NEXT: andi a3, a3, 2
2919 ; RV32ZVE32F-NEXT: beqz a3, .LBB37_2
2920 ; RV32ZVE32F-NEXT: .LBB37_4: # %cond.store1
2921 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2922 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 1
2923 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
2924 ; RV32ZVE32F-NEXT: sw a2, 4(a0)
2925 ; RV32ZVE32F-NEXT: sw a1, 0(a0)
2926 ; RV32ZVE32F-NEXT: ret
2928 ; RV64ZVE32F-LABEL: mscatter_v2i64:
2929 ; RV64ZVE32F: # %bb.0:
2930 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
2931 ; RV64ZVE32F-NEXT: vmv.x.s a4, v0
2932 ; RV64ZVE32F-NEXT: andi a5, a4, 1
2933 ; RV64ZVE32F-NEXT: bnez a5, .LBB37_3
2934 ; RV64ZVE32F-NEXT: # %bb.1: # %else
2935 ; RV64ZVE32F-NEXT: andi a4, a4, 2
2936 ; RV64ZVE32F-NEXT: bnez a4, .LBB37_4
2937 ; RV64ZVE32F-NEXT: .LBB37_2: # %else2
2938 ; RV64ZVE32F-NEXT: ret
2939 ; RV64ZVE32F-NEXT: .LBB37_3: # %cond.store
2940 ; RV64ZVE32F-NEXT: sd a0, 0(a2)
2941 ; RV64ZVE32F-NEXT: andi a4, a4, 2
2942 ; RV64ZVE32F-NEXT: beqz a4, .LBB37_2
2943 ; RV64ZVE32F-NEXT: .LBB37_4: # %cond.store1
2944 ; RV64ZVE32F-NEXT: sd a1, 0(a3)
2945 ; RV64ZVE32F-NEXT: ret
2946 call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> %val, <2 x ptr> %ptrs, i32 8, <2 x i1> %m)
2950 declare void @llvm.masked.scatter.v4i64.v4p0(<4 x i64>, <4 x ptr>, i32, <4 x i1>)
2952 define void @mscatter_v4i64(<4 x i64> %val, <4 x ptr> %ptrs, <4 x i1> %m) {
2953 ; RV32V-LABEL: mscatter_v4i64:
2955 ; RV32V-NEXT: vsetivli zero, 4, e64, m2, ta, ma
2956 ; RV32V-NEXT: vsoxei32.v v8, (zero), v10, v0.t
2959 ; RV64-LABEL: mscatter_v4i64:
2961 ; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
2962 ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
2965 ; RV32ZVE32F-LABEL: mscatter_v4i64:
2966 ; RV32ZVE32F: # %bb.0:
2967 ; RV32ZVE32F-NEXT: lw a1, 28(a0)
2968 ; RV32ZVE32F-NEXT: lw a2, 24(a0)
2969 ; RV32ZVE32F-NEXT: lw a3, 20(a0)
2970 ; RV32ZVE32F-NEXT: lw a4, 16(a0)
2971 ; RV32ZVE32F-NEXT: lw a7, 12(a0)
2972 ; RV32ZVE32F-NEXT: lw a6, 8(a0)
2973 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
2974 ; RV32ZVE32F-NEXT: vmv.x.s a5, v0
2975 ; RV32ZVE32F-NEXT: andi t0, a5, 1
2976 ; RV32ZVE32F-NEXT: bnez t0, .LBB38_5
2977 ; RV32ZVE32F-NEXT: # %bb.1: # %else
2978 ; RV32ZVE32F-NEXT: andi a0, a5, 2
2979 ; RV32ZVE32F-NEXT: bnez a0, .LBB38_6
2980 ; RV32ZVE32F-NEXT: .LBB38_2: # %else2
2981 ; RV32ZVE32F-NEXT: andi a0, a5, 4
2982 ; RV32ZVE32F-NEXT: bnez a0, .LBB38_7
2983 ; RV32ZVE32F-NEXT: .LBB38_3: # %else4
2984 ; RV32ZVE32F-NEXT: andi a5, a5, 8
2985 ; RV32ZVE32F-NEXT: bnez a5, .LBB38_8
2986 ; RV32ZVE32F-NEXT: .LBB38_4: # %else6
2987 ; RV32ZVE32F-NEXT: ret
2988 ; RV32ZVE32F-NEXT: .LBB38_5: # %cond.store
2989 ; RV32ZVE32F-NEXT: lw t0, 4(a0)
2990 ; RV32ZVE32F-NEXT: lw a0, 0(a0)
2991 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
2992 ; RV32ZVE32F-NEXT: vmv.x.s t1, v8
2993 ; RV32ZVE32F-NEXT: sw t0, 4(t1)
2994 ; RV32ZVE32F-NEXT: sw a0, 0(t1)
2995 ; RV32ZVE32F-NEXT: andi a0, a5, 2
2996 ; RV32ZVE32F-NEXT: beqz a0, .LBB38_2
2997 ; RV32ZVE32F-NEXT: .LBB38_6: # %cond.store1
2998 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2999 ; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 1
3000 ; RV32ZVE32F-NEXT: vmv.x.s a0, v9
3001 ; RV32ZVE32F-NEXT: sw a7, 4(a0)
3002 ; RV32ZVE32F-NEXT: sw a6, 0(a0)
3003 ; RV32ZVE32F-NEXT: andi a0, a5, 4
3004 ; RV32ZVE32F-NEXT: beqz a0, .LBB38_3
3005 ; RV32ZVE32F-NEXT: .LBB38_7: # %cond.store3
3006 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
3007 ; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 2
3008 ; RV32ZVE32F-NEXT: vmv.x.s a0, v9
3009 ; RV32ZVE32F-NEXT: sw a4, 0(a0)
3010 ; RV32ZVE32F-NEXT: sw a3, 4(a0)
3011 ; RV32ZVE32F-NEXT: andi a5, a5, 8
3012 ; RV32ZVE32F-NEXT: beqz a5, .LBB38_4
3013 ; RV32ZVE32F-NEXT: .LBB38_8: # %cond.store5
3014 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
3015 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 3
3016 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
3017 ; RV32ZVE32F-NEXT: sw a2, 0(a0)
3018 ; RV32ZVE32F-NEXT: sw a1, 4(a0)
3019 ; RV32ZVE32F-NEXT: ret
3021 ; RV64ZVE32F-LABEL: mscatter_v4i64:
3022 ; RV64ZVE32F: # %bb.0:
3023 ; RV64ZVE32F-NEXT: ld a2, 24(a1)
3024 ; RV64ZVE32F-NEXT: ld a4, 16(a1)
3025 ; RV64ZVE32F-NEXT: ld a7, 8(a1)
3026 ; RV64ZVE32F-NEXT: ld a3, 24(a0)
3027 ; RV64ZVE32F-NEXT: ld a5, 16(a0)
3028 ; RV64ZVE32F-NEXT: ld t0, 8(a0)
3029 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
3030 ; RV64ZVE32F-NEXT: vmv.x.s a6, v0
3031 ; RV64ZVE32F-NEXT: andi t1, a6, 1
3032 ; RV64ZVE32F-NEXT: bnez t1, .LBB38_5
3033 ; RV64ZVE32F-NEXT: # %bb.1: # %else
3034 ; RV64ZVE32F-NEXT: andi a0, a6, 2
3035 ; RV64ZVE32F-NEXT: bnez a0, .LBB38_6
3036 ; RV64ZVE32F-NEXT: .LBB38_2: # %else2
3037 ; RV64ZVE32F-NEXT: andi a0, a6, 4
3038 ; RV64ZVE32F-NEXT: bnez a0, .LBB38_7
3039 ; RV64ZVE32F-NEXT: .LBB38_3: # %else4
3040 ; RV64ZVE32F-NEXT: andi a0, a6, 8
3041 ; RV64ZVE32F-NEXT: bnez a0, .LBB38_8
3042 ; RV64ZVE32F-NEXT: .LBB38_4: # %else6
3043 ; RV64ZVE32F-NEXT: ret
3044 ; RV64ZVE32F-NEXT: .LBB38_5: # %cond.store
3045 ; RV64ZVE32F-NEXT: ld a1, 0(a1)
3046 ; RV64ZVE32F-NEXT: ld a0, 0(a0)
3047 ; RV64ZVE32F-NEXT: sd a0, 0(a1)
3048 ; RV64ZVE32F-NEXT: andi a0, a6, 2
3049 ; RV64ZVE32F-NEXT: beqz a0, .LBB38_2
3050 ; RV64ZVE32F-NEXT: .LBB38_6: # %cond.store1
3051 ; RV64ZVE32F-NEXT: sd t0, 0(a7)
3052 ; RV64ZVE32F-NEXT: andi a0, a6, 4
3053 ; RV64ZVE32F-NEXT: beqz a0, .LBB38_3
3054 ; RV64ZVE32F-NEXT: .LBB38_7: # %cond.store3
3055 ; RV64ZVE32F-NEXT: sd a5, 0(a4)
3056 ; RV64ZVE32F-NEXT: andi a0, a6, 8
3057 ; RV64ZVE32F-NEXT: beqz a0, .LBB38_4
3058 ; RV64ZVE32F-NEXT: .LBB38_8: # %cond.store5
3059 ; RV64ZVE32F-NEXT: sd a3, 0(a2)
3060 ; RV64ZVE32F-NEXT: ret
3061 call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> %val, <4 x ptr> %ptrs, i32 8, <4 x i1> %m)
3065 define void @mscatter_truemask_v4i64(<4 x i64> %val, <4 x ptr> %ptrs) {
3066 ; RV32V-LABEL: mscatter_truemask_v4i64:
3068 ; RV32V-NEXT: vsetivli zero, 4, e64, m2, ta, ma
3069 ; RV32V-NEXT: vsoxei32.v v8, (zero), v10
3072 ; RV64-LABEL: mscatter_truemask_v4i64:
3074 ; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
3075 ; RV64-NEXT: vsoxei64.v v8, (zero), v10
3078 ; RV32ZVE32F-LABEL: mscatter_truemask_v4i64:
3079 ; RV32ZVE32F: # %bb.0:
3080 ; RV32ZVE32F-NEXT: lw a1, 28(a0)
3081 ; RV32ZVE32F-NEXT: lw a2, 24(a0)
3082 ; RV32ZVE32F-NEXT: lw a3, 20(a0)
3083 ; RV32ZVE32F-NEXT: lw a4, 16(a0)
3084 ; RV32ZVE32F-NEXT: lw a5, 12(a0)
3085 ; RV32ZVE32F-NEXT: lw a6, 0(a0)
3086 ; RV32ZVE32F-NEXT: lw a7, 4(a0)
3087 ; RV32ZVE32F-NEXT: lw a0, 8(a0)
3088 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
3089 ; RV32ZVE32F-NEXT: vmv.x.s t0, v8
3090 ; RV32ZVE32F-NEXT: sw a6, 0(t0)
3091 ; RV32ZVE32F-NEXT: sw a7, 4(t0)
3092 ; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 1
3093 ; RV32ZVE32F-NEXT: vmv.x.s a6, v9
3094 ; RV32ZVE32F-NEXT: sw a0, 0(a6)
3095 ; RV32ZVE32F-NEXT: sw a5, 4(a6)
3096 ; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 2
3097 ; RV32ZVE32F-NEXT: vmv.x.s a0, v9
3098 ; RV32ZVE32F-NEXT: sw a4, 0(a0)
3099 ; RV32ZVE32F-NEXT: sw a3, 4(a0)
3100 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 3
3101 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
3102 ; RV32ZVE32F-NEXT: sw a2, 0(a0)
3103 ; RV32ZVE32F-NEXT: sw a1, 4(a0)
3104 ; RV32ZVE32F-NEXT: ret
3106 ; RV64ZVE32F-LABEL: mscatter_truemask_v4i64:
3107 ; RV64ZVE32F: # %bb.0:
3108 ; RV64ZVE32F-NEXT: ld a2, 24(a1)
3109 ; RV64ZVE32F-NEXT: ld a3, 16(a1)
3110 ; RV64ZVE32F-NEXT: ld a4, 8(a1)
3111 ; RV64ZVE32F-NEXT: ld a1, 0(a1)
3112 ; RV64ZVE32F-NEXT: ld a5, 0(a0)
3113 ; RV64ZVE32F-NEXT: ld a6, 8(a0)
3114 ; RV64ZVE32F-NEXT: ld a7, 16(a0)
3115 ; RV64ZVE32F-NEXT: ld a0, 24(a0)
3116 ; RV64ZVE32F-NEXT: sd a5, 0(a1)
3117 ; RV64ZVE32F-NEXT: sd a6, 0(a4)
3118 ; RV64ZVE32F-NEXT: sd a7, 0(a3)
3119 ; RV64ZVE32F-NEXT: sd a0, 0(a2)
3120 ; RV64ZVE32F-NEXT: ret
3121 call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> %val, <4 x ptr> %ptrs, i32 8, <4 x i1> splat (i1 1))
3125 define void @mscatter_falsemask_v4i64(<4 x i64> %val, <4 x ptr> %ptrs) {
3126 ; CHECK-LABEL: mscatter_falsemask_v4i64:
3129 call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> %val, <4 x ptr> %ptrs, i32 8, <4 x i1> zeroinitializer)
3133 declare void @llvm.masked.scatter.v8i64.v8p0(<8 x i64>, <8 x ptr>, i32, <8 x i1>)
3135 define void @mscatter_v8i64(<8 x i64> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
3136 ; RV32V-LABEL: mscatter_v8i64:
3138 ; RV32V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
3139 ; RV32V-NEXT: vsoxei32.v v8, (zero), v12, v0.t
3142 ; RV64-LABEL: mscatter_v8i64:
3144 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
3145 ; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t
3148 ; RV32ZVE32F-LABEL: mscatter_v8i64:
3149 ; RV32ZVE32F: # %bb.0:
3150 ; RV32ZVE32F-NEXT: addi sp, sp, -16
3151 ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
3152 ; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
3153 ; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
3154 ; RV32ZVE32F-NEXT: sw s2, 4(sp) # 4-byte Folded Spill
3155 ; RV32ZVE32F-NEXT: .cfi_offset s0, -4
3156 ; RV32ZVE32F-NEXT: .cfi_offset s1, -8
3157 ; RV32ZVE32F-NEXT: .cfi_offset s2, -12
3158 ; RV32ZVE32F-NEXT: lw a1, 60(a0)
3159 ; RV32ZVE32F-NEXT: lw a2, 56(a0)
3160 ; RV32ZVE32F-NEXT: lw a3, 52(a0)
3161 ; RV32ZVE32F-NEXT: lw a4, 48(a0)
3162 ; RV32ZVE32F-NEXT: lw a5, 44(a0)
3163 ; RV32ZVE32F-NEXT: lw a7, 40(a0)
3164 ; RV32ZVE32F-NEXT: lw t0, 36(a0)
3165 ; RV32ZVE32F-NEXT: lw t1, 32(a0)
3166 ; RV32ZVE32F-NEXT: lw t2, 28(a0)
3167 ; RV32ZVE32F-NEXT: lw t3, 24(a0)
3168 ; RV32ZVE32F-NEXT: lw t4, 20(a0)
3169 ; RV32ZVE32F-NEXT: lw t5, 16(a0)
3170 ; RV32ZVE32F-NEXT: lw s0, 12(a0)
3171 ; RV32ZVE32F-NEXT: lw t6, 8(a0)
3172 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
3173 ; RV32ZVE32F-NEXT: vmv.x.s a6, v0
3174 ; RV32ZVE32F-NEXT: andi s1, a6, 1
3175 ; RV32ZVE32F-NEXT: bnez s1, .LBB41_10
3176 ; RV32ZVE32F-NEXT: # %bb.1: # %else
3177 ; RV32ZVE32F-NEXT: andi a0, a6, 2
3178 ; RV32ZVE32F-NEXT: bnez a0, .LBB41_11
3179 ; RV32ZVE32F-NEXT: .LBB41_2: # %else2
3180 ; RV32ZVE32F-NEXT: andi a0, a6, 4
3181 ; RV32ZVE32F-NEXT: bnez a0, .LBB41_12
3182 ; RV32ZVE32F-NEXT: .LBB41_3: # %else4
3183 ; RV32ZVE32F-NEXT: andi a0, a6, 8
3184 ; RV32ZVE32F-NEXT: bnez a0, .LBB41_13
3185 ; RV32ZVE32F-NEXT: .LBB41_4: # %else6
3186 ; RV32ZVE32F-NEXT: andi a0, a6, 16
3187 ; RV32ZVE32F-NEXT: bnez a0, .LBB41_14
3188 ; RV32ZVE32F-NEXT: .LBB41_5: # %else8
3189 ; RV32ZVE32F-NEXT: andi a0, a6, 32
3190 ; RV32ZVE32F-NEXT: bnez a0, .LBB41_15
3191 ; RV32ZVE32F-NEXT: .LBB41_6: # %else10
3192 ; RV32ZVE32F-NEXT: andi a0, a6, 64
3193 ; RV32ZVE32F-NEXT: bnez a0, .LBB41_16
3194 ; RV32ZVE32F-NEXT: .LBB41_7: # %else12
3195 ; RV32ZVE32F-NEXT: andi a0, a6, -128
3196 ; RV32ZVE32F-NEXT: beqz a0, .LBB41_9
3197 ; RV32ZVE32F-NEXT: .LBB41_8: # %cond.store13
3198 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
3199 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
3200 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
3201 ; RV32ZVE32F-NEXT: sw a2, 0(a0)
3202 ; RV32ZVE32F-NEXT: sw a1, 4(a0)
3203 ; RV32ZVE32F-NEXT: .LBB41_9: # %else14
3204 ; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
3205 ; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
3206 ; RV32ZVE32F-NEXT: lw s2, 4(sp) # 4-byte Folded Reload
3207 ; RV32ZVE32F-NEXT: addi sp, sp, 16
3208 ; RV32ZVE32F-NEXT: ret
3209 ; RV32ZVE32F-NEXT: .LBB41_10: # %cond.store
3210 ; RV32ZVE32F-NEXT: lw s1, 4(a0)
3211 ; RV32ZVE32F-NEXT: lw a0, 0(a0)
3212 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
3213 ; RV32ZVE32F-NEXT: vmv.x.s s2, v8
3214 ; RV32ZVE32F-NEXT: sw s1, 4(s2)
3215 ; RV32ZVE32F-NEXT: sw a0, 0(s2)
3216 ; RV32ZVE32F-NEXT: andi a0, a6, 2
3217 ; RV32ZVE32F-NEXT: beqz a0, .LBB41_2
3218 ; RV32ZVE32F-NEXT: .LBB41_11: # %cond.store1
3219 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
3220 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
3221 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
3222 ; RV32ZVE32F-NEXT: sw s0, 4(a0)
3223 ; RV32ZVE32F-NEXT: sw t6, 0(a0)
3224 ; RV32ZVE32F-NEXT: andi a0, a6, 4
3225 ; RV32ZVE32F-NEXT: beqz a0, .LBB41_3
3226 ; RV32ZVE32F-NEXT: .LBB41_12: # %cond.store3
3227 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
3228 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
3229 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
3230 ; RV32ZVE32F-NEXT: sw t5, 0(a0)
3231 ; RV32ZVE32F-NEXT: sw t4, 4(a0)
3232 ; RV32ZVE32F-NEXT: andi a0, a6, 8
3233 ; RV32ZVE32F-NEXT: beqz a0, .LBB41_4
3234 ; RV32ZVE32F-NEXT: .LBB41_13: # %cond.store5
3235 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
3236 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
3237 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
3238 ; RV32ZVE32F-NEXT: sw t3, 0(a0)
3239 ; RV32ZVE32F-NEXT: sw t2, 4(a0)
3240 ; RV32ZVE32F-NEXT: andi a0, a6, 16
3241 ; RV32ZVE32F-NEXT: beqz a0, .LBB41_5
3242 ; RV32ZVE32F-NEXT: .LBB41_14: # %cond.store7
3243 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
3244 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
3245 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
3246 ; RV32ZVE32F-NEXT: sw t1, 0(a0)
3247 ; RV32ZVE32F-NEXT: sw t0, 4(a0)
3248 ; RV32ZVE32F-NEXT: andi a0, a6, 32
3249 ; RV32ZVE32F-NEXT: beqz a0, .LBB41_6
3250 ; RV32ZVE32F-NEXT: .LBB41_15: # %cond.store9
3251 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
3252 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
3253 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
3254 ; RV32ZVE32F-NEXT: sw a7, 0(a0)
3255 ; RV32ZVE32F-NEXT: sw a5, 4(a0)
3256 ; RV32ZVE32F-NEXT: andi a0, a6, 64
3257 ; RV32ZVE32F-NEXT: beqz a0, .LBB41_7
3258 ; RV32ZVE32F-NEXT: .LBB41_16: # %cond.store11
3259 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
3260 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
3261 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
3262 ; RV32ZVE32F-NEXT: sw a4, 0(a0)
3263 ; RV32ZVE32F-NEXT: sw a3, 4(a0)
3264 ; RV32ZVE32F-NEXT: andi a0, a6, -128
3265 ; RV32ZVE32F-NEXT: bnez a0, .LBB41_8
3266 ; RV32ZVE32F-NEXT: j .LBB41_9
3268 ; RV64ZVE32F-LABEL: mscatter_v8i64:
3269 ; RV64ZVE32F: # %bb.0:
3270 ; RV64ZVE32F-NEXT: addi sp, sp, -32
3271 ; RV64ZVE32F-NEXT: .cfi_def_cfa_offset 32
3272 ; RV64ZVE32F-NEXT: sd s0, 24(sp) # 8-byte Folded Spill
3273 ; RV64ZVE32F-NEXT: sd s1, 16(sp) # 8-byte Folded Spill
3274 ; RV64ZVE32F-NEXT: sd s2, 8(sp) # 8-byte Folded Spill
3275 ; RV64ZVE32F-NEXT: .cfi_offset s0, -8
3276 ; RV64ZVE32F-NEXT: .cfi_offset s1, -16
3277 ; RV64ZVE32F-NEXT: .cfi_offset s2, -24
3278 ; RV64ZVE32F-NEXT: ld a2, 56(a1)
3279 ; RV64ZVE32F-NEXT: ld a4, 48(a1)
3280 ; RV64ZVE32F-NEXT: ld a6, 40(a1)
3281 ; RV64ZVE32F-NEXT: ld t1, 32(a1)
3282 ; RV64ZVE32F-NEXT: ld t3, 24(a1)
3283 ; RV64ZVE32F-NEXT: ld t5, 16(a1)
3284 ; RV64ZVE32F-NEXT: ld s0, 8(a1)
3285 ; RV64ZVE32F-NEXT: ld a3, 56(a0)
3286 ; RV64ZVE32F-NEXT: ld a5, 48(a0)
3287 ; RV64ZVE32F-NEXT: ld t0, 40(a0)
3288 ; RV64ZVE32F-NEXT: ld t2, 32(a0)
3289 ; RV64ZVE32F-NEXT: ld t4, 24(a0)
3290 ; RV64ZVE32F-NEXT: ld t6, 16(a0)
3291 ; RV64ZVE32F-NEXT: ld s1, 8(a0)
3292 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
3293 ; RV64ZVE32F-NEXT: vmv.x.s a7, v0
3294 ; RV64ZVE32F-NEXT: andi s2, a7, 1
3295 ; RV64ZVE32F-NEXT: bnez s2, .LBB41_10
3296 ; RV64ZVE32F-NEXT: # %bb.1: # %else
3297 ; RV64ZVE32F-NEXT: andi a0, a7, 2
3298 ; RV64ZVE32F-NEXT: bnez a0, .LBB41_11
3299 ; RV64ZVE32F-NEXT: .LBB41_2: # %else2
3300 ; RV64ZVE32F-NEXT: andi a0, a7, 4
3301 ; RV64ZVE32F-NEXT: bnez a0, .LBB41_12
3302 ; RV64ZVE32F-NEXT: .LBB41_3: # %else4
3303 ; RV64ZVE32F-NEXT: andi a0, a7, 8
3304 ; RV64ZVE32F-NEXT: bnez a0, .LBB41_13
3305 ; RV64ZVE32F-NEXT: .LBB41_4: # %else6
3306 ; RV64ZVE32F-NEXT: andi a0, a7, 16
3307 ; RV64ZVE32F-NEXT: bnez a0, .LBB41_14
3308 ; RV64ZVE32F-NEXT: .LBB41_5: # %else8
3309 ; RV64ZVE32F-NEXT: andi a0, a7, 32
3310 ; RV64ZVE32F-NEXT: bnez a0, .LBB41_15
3311 ; RV64ZVE32F-NEXT: .LBB41_6: # %else10
3312 ; RV64ZVE32F-NEXT: andi a0, a7, 64
3313 ; RV64ZVE32F-NEXT: bnez a0, .LBB41_16
3314 ; RV64ZVE32F-NEXT: .LBB41_7: # %else12
3315 ; RV64ZVE32F-NEXT: andi a0, a7, -128
3316 ; RV64ZVE32F-NEXT: beqz a0, .LBB41_9
3317 ; RV64ZVE32F-NEXT: .LBB41_8: # %cond.store13
3318 ; RV64ZVE32F-NEXT: sd a3, 0(a2)
3319 ; RV64ZVE32F-NEXT: .LBB41_9: # %else14
3320 ; RV64ZVE32F-NEXT: ld s0, 24(sp) # 8-byte Folded Reload
3321 ; RV64ZVE32F-NEXT: ld s1, 16(sp) # 8-byte Folded Reload
3322 ; RV64ZVE32F-NEXT: ld s2, 8(sp) # 8-byte Folded Reload
3323 ; RV64ZVE32F-NEXT: addi sp, sp, 32
3324 ; RV64ZVE32F-NEXT: ret
3325 ; RV64ZVE32F-NEXT: .LBB41_10: # %cond.store
3326 ; RV64ZVE32F-NEXT: ld a1, 0(a1)
3327 ; RV64ZVE32F-NEXT: ld a0, 0(a0)
3328 ; RV64ZVE32F-NEXT: sd a0, 0(a1)
3329 ; RV64ZVE32F-NEXT: andi a0, a7, 2
3330 ; RV64ZVE32F-NEXT: beqz a0, .LBB41_2
3331 ; RV64ZVE32F-NEXT: .LBB41_11: # %cond.store1
3332 ; RV64ZVE32F-NEXT: sd s1, 0(s0)
3333 ; RV64ZVE32F-NEXT: andi a0, a7, 4
3334 ; RV64ZVE32F-NEXT: beqz a0, .LBB41_3
3335 ; RV64ZVE32F-NEXT: .LBB41_12: # %cond.store3
3336 ; RV64ZVE32F-NEXT: sd t6, 0(t5)
3337 ; RV64ZVE32F-NEXT: andi a0, a7, 8
3338 ; RV64ZVE32F-NEXT: beqz a0, .LBB41_4
3339 ; RV64ZVE32F-NEXT: .LBB41_13: # %cond.store5
3340 ; RV64ZVE32F-NEXT: sd t4, 0(t3)
3341 ; RV64ZVE32F-NEXT: andi a0, a7, 16
3342 ; RV64ZVE32F-NEXT: beqz a0, .LBB41_5
3343 ; RV64ZVE32F-NEXT: .LBB41_14: # %cond.store7
3344 ; RV64ZVE32F-NEXT: sd t2, 0(t1)
3345 ; RV64ZVE32F-NEXT: andi a0, a7, 32
3346 ; RV64ZVE32F-NEXT: beqz a0, .LBB41_6
3347 ; RV64ZVE32F-NEXT: .LBB41_15: # %cond.store9
3348 ; RV64ZVE32F-NEXT: sd t0, 0(a6)
3349 ; RV64ZVE32F-NEXT: andi a0, a7, 64
3350 ; RV64ZVE32F-NEXT: beqz a0, .LBB41_7
3351 ; RV64ZVE32F-NEXT: .LBB41_16: # %cond.store11
3352 ; RV64ZVE32F-NEXT: sd a5, 0(a4)
3353 ; RV64ZVE32F-NEXT: andi a0, a7, -128
3354 ; RV64ZVE32F-NEXT: bnez a0, .LBB41_8
3355 ; RV64ZVE32F-NEXT: j .LBB41_9
3356 call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m)
3360 define void @mscatter_baseidx_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) {
3361 ; RV32V-LABEL: mscatter_baseidx_v8i8_v8i64:
3363 ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
3364 ; RV32V-NEXT: vsext.vf4 v14, v12
3365 ; RV32V-NEXT: vsll.vi v12, v14, 3
3366 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
3367 ; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t
3370 ; RV64-LABEL: mscatter_baseidx_v8i8_v8i64:
3372 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
3373 ; RV64-NEXT: vsext.vf8 v16, v12
3374 ; RV64-NEXT: vsll.vi v12, v16, 3
3375 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
3378 ; RV32ZVE32F-LABEL: mscatter_baseidx_v8i8_v8i64:
3379 ; RV32ZVE32F: # %bb.0:
3380 ; RV32ZVE32F-NEXT: addi sp, sp, -16
3381 ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
3382 ; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
3383 ; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
3384 ; RV32ZVE32F-NEXT: sw s2, 4(sp) # 4-byte Folded Spill
3385 ; RV32ZVE32F-NEXT: .cfi_offset s0, -4
3386 ; RV32ZVE32F-NEXT: .cfi_offset s1, -8
3387 ; RV32ZVE32F-NEXT: .cfi_offset s2, -12
3388 ; RV32ZVE32F-NEXT: lw a2, 60(a0)
3389 ; RV32ZVE32F-NEXT: lw a3, 56(a0)
3390 ; RV32ZVE32F-NEXT: lw a4, 52(a0)
3391 ; RV32ZVE32F-NEXT: lw a5, 48(a0)
3392 ; RV32ZVE32F-NEXT: lw a6, 44(a0)
3393 ; RV32ZVE32F-NEXT: lw a7, 40(a0)
3394 ; RV32ZVE32F-NEXT: lw t1, 36(a0)
3395 ; RV32ZVE32F-NEXT: lw t2, 32(a0)
3396 ; RV32ZVE32F-NEXT: lw t3, 28(a0)
3397 ; RV32ZVE32F-NEXT: lw t4, 24(a0)
3398 ; RV32ZVE32F-NEXT: lw t5, 20(a0)
3399 ; RV32ZVE32F-NEXT: lw t6, 16(a0)
3400 ; RV32ZVE32F-NEXT: lw s1, 12(a0)
3401 ; RV32ZVE32F-NEXT: lw s0, 8(a0)
3402 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
3403 ; RV32ZVE32F-NEXT: vsext.vf4 v10, v8
3404 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3
3405 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
3406 ; RV32ZVE32F-NEXT: vmv.x.s t0, v0
3407 ; RV32ZVE32F-NEXT: andi s2, t0, 1
3408 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
3409 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
3410 ; RV32ZVE32F-NEXT: bnez s2, .LBB42_10
3411 ; RV32ZVE32F-NEXT: # %bb.1: # %else
3412 ; RV32ZVE32F-NEXT: andi a0, t0, 2
3413 ; RV32ZVE32F-NEXT: bnez a0, .LBB42_11
3414 ; RV32ZVE32F-NEXT: .LBB42_2: # %else2
3415 ; RV32ZVE32F-NEXT: andi a0, t0, 4
3416 ; RV32ZVE32F-NEXT: bnez a0, .LBB42_12
3417 ; RV32ZVE32F-NEXT: .LBB42_3: # %else4
3418 ; RV32ZVE32F-NEXT: andi a0, t0, 8
3419 ; RV32ZVE32F-NEXT: bnez a0, .LBB42_13
3420 ; RV32ZVE32F-NEXT: .LBB42_4: # %else6
3421 ; RV32ZVE32F-NEXT: andi a0, t0, 16
3422 ; RV32ZVE32F-NEXT: bnez a0, .LBB42_14
3423 ; RV32ZVE32F-NEXT: .LBB42_5: # %else8
3424 ; RV32ZVE32F-NEXT: andi a0, t0, 32
3425 ; RV32ZVE32F-NEXT: bnez a0, .LBB42_15
3426 ; RV32ZVE32F-NEXT: .LBB42_6: # %else10
3427 ; RV32ZVE32F-NEXT: andi a0, t0, 64
3428 ; RV32ZVE32F-NEXT: bnez a0, .LBB42_16
3429 ; RV32ZVE32F-NEXT: .LBB42_7: # %else12
3430 ; RV32ZVE32F-NEXT: andi a0, t0, -128
3431 ; RV32ZVE32F-NEXT: beqz a0, .LBB42_9
3432 ; RV32ZVE32F-NEXT: .LBB42_8: # %cond.store13
3433 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
3434 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
3435 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
3436 ; RV32ZVE32F-NEXT: sw a3, 0(a0)
3437 ; RV32ZVE32F-NEXT: sw a2, 4(a0)
3438 ; RV32ZVE32F-NEXT: .LBB42_9: # %else14
3439 ; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
3440 ; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
3441 ; RV32ZVE32F-NEXT: lw s2, 4(sp) # 4-byte Folded Reload
3442 ; RV32ZVE32F-NEXT: addi sp, sp, 16
3443 ; RV32ZVE32F-NEXT: ret
3444 ; RV32ZVE32F-NEXT: .LBB42_10: # %cond.store
3445 ; RV32ZVE32F-NEXT: lw a1, 4(a0)
3446 ; RV32ZVE32F-NEXT: lw a0, 0(a0)
3447 ; RV32ZVE32F-NEXT: vmv.x.s s2, v8
3448 ; RV32ZVE32F-NEXT: sw a1, 4(s2)
3449 ; RV32ZVE32F-NEXT: sw a0, 0(s2)
3450 ; RV32ZVE32F-NEXT: andi a0, t0, 2
3451 ; RV32ZVE32F-NEXT: beqz a0, .LBB42_2
3452 ; RV32ZVE32F-NEXT: .LBB42_11: # %cond.store1
3453 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
3454 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
3455 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
3456 ; RV32ZVE32F-NEXT: sw s1, 4(a0)
3457 ; RV32ZVE32F-NEXT: sw s0, 0(a0)
3458 ; RV32ZVE32F-NEXT: andi a0, t0, 4
3459 ; RV32ZVE32F-NEXT: beqz a0, .LBB42_3
3460 ; RV32ZVE32F-NEXT: .LBB42_12: # %cond.store3
3461 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
3462 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
3463 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
3464 ; RV32ZVE32F-NEXT: sw t6, 0(a0)
3465 ; RV32ZVE32F-NEXT: sw t5, 4(a0)
3466 ; RV32ZVE32F-NEXT: andi a0, t0, 8
3467 ; RV32ZVE32F-NEXT: beqz a0, .LBB42_4
3468 ; RV32ZVE32F-NEXT: .LBB42_13: # %cond.store5
3469 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
3470 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
3471 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
3472 ; RV32ZVE32F-NEXT: sw t4, 0(a0)
3473 ; RV32ZVE32F-NEXT: sw t3, 4(a0)
3474 ; RV32ZVE32F-NEXT: andi a0, t0, 16
3475 ; RV32ZVE32F-NEXT: beqz a0, .LBB42_5
3476 ; RV32ZVE32F-NEXT: .LBB42_14: # %cond.store7
3477 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
3478 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
3479 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
3480 ; RV32ZVE32F-NEXT: sw t2, 0(a0)
3481 ; RV32ZVE32F-NEXT: sw t1, 4(a0)
3482 ; RV32ZVE32F-NEXT: andi a0, t0, 32
3483 ; RV32ZVE32F-NEXT: beqz a0, .LBB42_6
3484 ; RV32ZVE32F-NEXT: .LBB42_15: # %cond.store9
3485 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
3486 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
3487 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
3488 ; RV32ZVE32F-NEXT: sw a7, 0(a0)
3489 ; RV32ZVE32F-NEXT: sw a6, 4(a0)
3490 ; RV32ZVE32F-NEXT: andi a0, t0, 64
3491 ; RV32ZVE32F-NEXT: beqz a0, .LBB42_7
3492 ; RV32ZVE32F-NEXT: .LBB42_16: # %cond.store11
3493 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
3494 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
3495 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
3496 ; RV32ZVE32F-NEXT: sw a5, 0(a0)
3497 ; RV32ZVE32F-NEXT: sw a4, 4(a0)
3498 ; RV32ZVE32F-NEXT: andi a0, t0, -128
3499 ; RV32ZVE32F-NEXT: bnez a0, .LBB42_8
3500 ; RV32ZVE32F-NEXT: j .LBB42_9
3502 ; RV64ZVE32F-LABEL: mscatter_baseidx_v8i8_v8i64:
3503 ; RV64ZVE32F: # %bb.0:
3504 ; RV64ZVE32F-NEXT: ld a2, 56(a0)
3505 ; RV64ZVE32F-NEXT: ld a3, 48(a0)
3506 ; RV64ZVE32F-NEXT: ld a5, 40(a0)
3507 ; RV64ZVE32F-NEXT: ld a6, 32(a0)
3508 ; RV64ZVE32F-NEXT: ld a7, 24(a0)
3509 ; RV64ZVE32F-NEXT: ld t0, 16(a0)
3510 ; RV64ZVE32F-NEXT: ld t1, 8(a0)
3511 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
3512 ; RV64ZVE32F-NEXT: vmv.x.s a4, v0
3513 ; RV64ZVE32F-NEXT: andi t2, a4, 1
3514 ; RV64ZVE32F-NEXT: beqz t2, .LBB42_2
3515 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
3516 ; RV64ZVE32F-NEXT: ld a0, 0(a0)
3517 ; RV64ZVE32F-NEXT: vmv.x.s t2, v8
3518 ; RV64ZVE32F-NEXT: slli t2, t2, 3
3519 ; RV64ZVE32F-NEXT: add t2, a1, t2
3520 ; RV64ZVE32F-NEXT: sd a0, 0(t2)
3521 ; RV64ZVE32F-NEXT: .LBB42_2: # %else
3522 ; RV64ZVE32F-NEXT: andi a0, a4, 2
3523 ; RV64ZVE32F-NEXT: beqz a0, .LBB42_4
3524 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
3525 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
3526 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
3527 ; RV64ZVE32F-NEXT: vmv.x.s a0, v9
3528 ; RV64ZVE32F-NEXT: slli a0, a0, 3
3529 ; RV64ZVE32F-NEXT: add a0, a1, a0
3530 ; RV64ZVE32F-NEXT: sd t1, 0(a0)
3531 ; RV64ZVE32F-NEXT: .LBB42_4: # %else2
3532 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
3533 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
3534 ; RV64ZVE32F-NEXT: andi a0, a4, 4
3535 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
3536 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
3537 ; RV64ZVE32F-NEXT: bnez a0, .LBB42_12
3538 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
3539 ; RV64ZVE32F-NEXT: andi a0, a4, 8
3540 ; RV64ZVE32F-NEXT: bnez a0, .LBB42_13
3541 ; RV64ZVE32F-NEXT: .LBB42_6: # %else6
3542 ; RV64ZVE32F-NEXT: andi a0, a4, 16
3543 ; RV64ZVE32F-NEXT: bnez a0, .LBB42_14
3544 ; RV64ZVE32F-NEXT: .LBB42_7: # %else8
3545 ; RV64ZVE32F-NEXT: andi a0, a4, 32
3546 ; RV64ZVE32F-NEXT: beqz a0, .LBB42_9
3547 ; RV64ZVE32F-NEXT: .LBB42_8: # %cond.store9
3548 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
3549 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
3550 ; RV64ZVE32F-NEXT: slli a0, a0, 3
3551 ; RV64ZVE32F-NEXT: add a0, a1, a0
3552 ; RV64ZVE32F-NEXT: sd a5, 0(a0)
3553 ; RV64ZVE32F-NEXT: .LBB42_9: # %else10
3554 ; RV64ZVE32F-NEXT: andi a0, a4, 64
3555 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
3556 ; RV64ZVE32F-NEXT: bnez a0, .LBB42_15
3557 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
3558 ; RV64ZVE32F-NEXT: andi a0, a4, -128
3559 ; RV64ZVE32F-NEXT: bnez a0, .LBB42_16
3560 ; RV64ZVE32F-NEXT: .LBB42_11: # %else14
3561 ; RV64ZVE32F-NEXT: ret
3562 ; RV64ZVE32F-NEXT: .LBB42_12: # %cond.store3
3563 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
3564 ; RV64ZVE32F-NEXT: slli a0, a0, 3
3565 ; RV64ZVE32F-NEXT: add a0, a1, a0
3566 ; RV64ZVE32F-NEXT: sd t0, 0(a0)
3567 ; RV64ZVE32F-NEXT: andi a0, a4, 8
3568 ; RV64ZVE32F-NEXT: beqz a0, .LBB42_6
3569 ; RV64ZVE32F-NEXT: .LBB42_13: # %cond.store5
3570 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
3571 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
3572 ; RV64ZVE32F-NEXT: slli a0, a0, 3
3573 ; RV64ZVE32F-NEXT: add a0, a1, a0
3574 ; RV64ZVE32F-NEXT: sd a7, 0(a0)
3575 ; RV64ZVE32F-NEXT: andi a0, a4, 16
3576 ; RV64ZVE32F-NEXT: beqz a0, .LBB42_7
3577 ; RV64ZVE32F-NEXT: .LBB42_14: # %cond.store7
3578 ; RV64ZVE32F-NEXT: vmv.x.s a0, v9
3579 ; RV64ZVE32F-NEXT: slli a0, a0, 3
3580 ; RV64ZVE32F-NEXT: add a0, a1, a0
3581 ; RV64ZVE32F-NEXT: sd a6, 0(a0)
3582 ; RV64ZVE32F-NEXT: andi a0, a4, 32
3583 ; RV64ZVE32F-NEXT: bnez a0, .LBB42_8
3584 ; RV64ZVE32F-NEXT: j .LBB42_9
3585 ; RV64ZVE32F-NEXT: .LBB42_15: # %cond.store11
3586 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
3587 ; RV64ZVE32F-NEXT: slli a0, a0, 3
3588 ; RV64ZVE32F-NEXT: add a0, a1, a0
3589 ; RV64ZVE32F-NEXT: sd a3, 0(a0)
3590 ; RV64ZVE32F-NEXT: andi a0, a4, -128
3591 ; RV64ZVE32F-NEXT: beqz a0, .LBB42_11
3592 ; RV64ZVE32F-NEXT: .LBB42_16: # %cond.store13
3593 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
3594 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
3595 ; RV64ZVE32F-NEXT: slli a0, a0, 3
3596 ; RV64ZVE32F-NEXT: add a0, a1, a0
3597 ; RV64ZVE32F-NEXT: sd a2, 0(a0)
3598 ; RV64ZVE32F-NEXT: ret
3599 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i8> %idxs
3600 call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m)
3604 define void @mscatter_baseidx_sext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) {
3605 ; RV32V-LABEL: mscatter_baseidx_sext_v8i8_v8i64:
3607 ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
3608 ; RV32V-NEXT: vsext.vf4 v14, v12
3609 ; RV32V-NEXT: vsll.vi v12, v14, 3
3610 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
3611 ; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t
3614 ; RV64-LABEL: mscatter_baseidx_sext_v8i8_v8i64:
3616 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
3617 ; RV64-NEXT: vsext.vf8 v16, v12
3618 ; RV64-NEXT: vsll.vi v12, v16, 3
3619 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
3622 ; RV32ZVE32F-LABEL: mscatter_baseidx_sext_v8i8_v8i64:
3623 ; RV32ZVE32F: # %bb.0:
3624 ; RV32ZVE32F-NEXT: addi sp, sp, -16
3625 ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
3626 ; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
3627 ; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
3628 ; RV32ZVE32F-NEXT: sw s2, 4(sp) # 4-byte Folded Spill
3629 ; RV32ZVE32F-NEXT: .cfi_offset s0, -4
3630 ; RV32ZVE32F-NEXT: .cfi_offset s1, -8
3631 ; RV32ZVE32F-NEXT: .cfi_offset s2, -12
3632 ; RV32ZVE32F-NEXT: lw a2, 60(a0)
3633 ; RV32ZVE32F-NEXT: lw a3, 56(a0)
3634 ; RV32ZVE32F-NEXT: lw a4, 52(a0)
3635 ; RV32ZVE32F-NEXT: lw a5, 48(a0)
3636 ; RV32ZVE32F-NEXT: lw a6, 44(a0)
3637 ; RV32ZVE32F-NEXT: lw a7, 40(a0)
3638 ; RV32ZVE32F-NEXT: lw t1, 36(a0)
3639 ; RV32ZVE32F-NEXT: lw t2, 32(a0)
3640 ; RV32ZVE32F-NEXT: lw t3, 28(a0)
3641 ; RV32ZVE32F-NEXT: lw t4, 24(a0)
3642 ; RV32ZVE32F-NEXT: lw t5, 20(a0)
3643 ; RV32ZVE32F-NEXT: lw t6, 16(a0)
3644 ; RV32ZVE32F-NEXT: lw s1, 12(a0)
3645 ; RV32ZVE32F-NEXT: lw s0, 8(a0)
3646 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
3647 ; RV32ZVE32F-NEXT: vsext.vf4 v10, v8
3648 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3
3649 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
3650 ; RV32ZVE32F-NEXT: vmv.x.s t0, v0
3651 ; RV32ZVE32F-NEXT: andi s2, t0, 1
3652 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
3653 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
3654 ; RV32ZVE32F-NEXT: bnez s2, .LBB43_10
3655 ; RV32ZVE32F-NEXT: # %bb.1: # %else
3656 ; RV32ZVE32F-NEXT: andi a0, t0, 2
3657 ; RV32ZVE32F-NEXT: bnez a0, .LBB43_11
3658 ; RV32ZVE32F-NEXT: .LBB43_2: # %else2
3659 ; RV32ZVE32F-NEXT: andi a0, t0, 4
3660 ; RV32ZVE32F-NEXT: bnez a0, .LBB43_12
3661 ; RV32ZVE32F-NEXT: .LBB43_3: # %else4
3662 ; RV32ZVE32F-NEXT: andi a0, t0, 8
3663 ; RV32ZVE32F-NEXT: bnez a0, .LBB43_13
3664 ; RV32ZVE32F-NEXT: .LBB43_4: # %else6
3665 ; RV32ZVE32F-NEXT: andi a0, t0, 16
3666 ; RV32ZVE32F-NEXT: bnez a0, .LBB43_14
3667 ; RV32ZVE32F-NEXT: .LBB43_5: # %else8
3668 ; RV32ZVE32F-NEXT: andi a0, t0, 32
3669 ; RV32ZVE32F-NEXT: bnez a0, .LBB43_15
3670 ; RV32ZVE32F-NEXT: .LBB43_6: # %else10
3671 ; RV32ZVE32F-NEXT: andi a0, t0, 64
3672 ; RV32ZVE32F-NEXT: bnez a0, .LBB43_16
3673 ; RV32ZVE32F-NEXT: .LBB43_7: # %else12
3674 ; RV32ZVE32F-NEXT: andi a0, t0, -128
3675 ; RV32ZVE32F-NEXT: beqz a0, .LBB43_9
3676 ; RV32ZVE32F-NEXT: .LBB43_8: # %cond.store13
3677 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
3678 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
3679 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
3680 ; RV32ZVE32F-NEXT: sw a3, 0(a0)
3681 ; RV32ZVE32F-NEXT: sw a2, 4(a0)
3682 ; RV32ZVE32F-NEXT: .LBB43_9: # %else14
3683 ; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
3684 ; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
3685 ; RV32ZVE32F-NEXT: lw s2, 4(sp) # 4-byte Folded Reload
3686 ; RV32ZVE32F-NEXT: addi sp, sp, 16
3687 ; RV32ZVE32F-NEXT: ret
3688 ; RV32ZVE32F-NEXT: .LBB43_10: # %cond.store
3689 ; RV32ZVE32F-NEXT: lw a1, 4(a0)
3690 ; RV32ZVE32F-NEXT: lw a0, 0(a0)
3691 ; RV32ZVE32F-NEXT: vmv.x.s s2, v8
3692 ; RV32ZVE32F-NEXT: sw a1, 4(s2)
3693 ; RV32ZVE32F-NEXT: sw a0, 0(s2)
3694 ; RV32ZVE32F-NEXT: andi a0, t0, 2
3695 ; RV32ZVE32F-NEXT: beqz a0, .LBB43_2
3696 ; RV32ZVE32F-NEXT: .LBB43_11: # %cond.store1
3697 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
3698 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
3699 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
3700 ; RV32ZVE32F-NEXT: sw s1, 4(a0)
3701 ; RV32ZVE32F-NEXT: sw s0, 0(a0)
3702 ; RV32ZVE32F-NEXT: andi a0, t0, 4
3703 ; RV32ZVE32F-NEXT: beqz a0, .LBB43_3
3704 ; RV32ZVE32F-NEXT: .LBB43_12: # %cond.store3
3705 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
3706 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
3707 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
3708 ; RV32ZVE32F-NEXT: sw t6, 0(a0)
3709 ; RV32ZVE32F-NEXT: sw t5, 4(a0)
3710 ; RV32ZVE32F-NEXT: andi a0, t0, 8
3711 ; RV32ZVE32F-NEXT: beqz a0, .LBB43_4
3712 ; RV32ZVE32F-NEXT: .LBB43_13: # %cond.store5
3713 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
3714 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
3715 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
3716 ; RV32ZVE32F-NEXT: sw t4, 0(a0)
3717 ; RV32ZVE32F-NEXT: sw t3, 4(a0)
3718 ; RV32ZVE32F-NEXT: andi a0, t0, 16
3719 ; RV32ZVE32F-NEXT: beqz a0, .LBB43_5
3720 ; RV32ZVE32F-NEXT: .LBB43_14: # %cond.store7
3721 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
3722 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
3723 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
3724 ; RV32ZVE32F-NEXT: sw t2, 0(a0)
3725 ; RV32ZVE32F-NEXT: sw t1, 4(a0)
3726 ; RV32ZVE32F-NEXT: andi a0, t0, 32
3727 ; RV32ZVE32F-NEXT: beqz a0, .LBB43_6
3728 ; RV32ZVE32F-NEXT: .LBB43_15: # %cond.store9
3729 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
3730 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
3731 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
3732 ; RV32ZVE32F-NEXT: sw a7, 0(a0)
3733 ; RV32ZVE32F-NEXT: sw a6, 4(a0)
3734 ; RV32ZVE32F-NEXT: andi a0, t0, 64
3735 ; RV32ZVE32F-NEXT: beqz a0, .LBB43_7
3736 ; RV32ZVE32F-NEXT: .LBB43_16: # %cond.store11
3737 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
3738 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
3739 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
3740 ; RV32ZVE32F-NEXT: sw a5, 0(a0)
3741 ; RV32ZVE32F-NEXT: sw a4, 4(a0)
3742 ; RV32ZVE32F-NEXT: andi a0, t0, -128
3743 ; RV32ZVE32F-NEXT: bnez a0, .LBB43_8
3744 ; RV32ZVE32F-NEXT: j .LBB43_9
3746 ; RV64ZVE32F-LABEL: mscatter_baseidx_sext_v8i8_v8i64:
3747 ; RV64ZVE32F: # %bb.0:
3748 ; RV64ZVE32F-NEXT: ld a2, 56(a0)
3749 ; RV64ZVE32F-NEXT: ld a3, 48(a0)
3750 ; RV64ZVE32F-NEXT: ld a5, 40(a0)
3751 ; RV64ZVE32F-NEXT: ld a6, 32(a0)
3752 ; RV64ZVE32F-NEXT: ld a7, 24(a0)
3753 ; RV64ZVE32F-NEXT: ld t0, 16(a0)
3754 ; RV64ZVE32F-NEXT: ld t1, 8(a0)
3755 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
3756 ; RV64ZVE32F-NEXT: vmv.x.s a4, v0
3757 ; RV64ZVE32F-NEXT: andi t2, a4, 1
3758 ; RV64ZVE32F-NEXT: beqz t2, .LBB43_2
3759 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
3760 ; RV64ZVE32F-NEXT: ld a0, 0(a0)
3761 ; RV64ZVE32F-NEXT: vmv.x.s t2, v8
3762 ; RV64ZVE32F-NEXT: slli t2, t2, 3
3763 ; RV64ZVE32F-NEXT: add t2, a1, t2
3764 ; RV64ZVE32F-NEXT: sd a0, 0(t2)
3765 ; RV64ZVE32F-NEXT: .LBB43_2: # %else
3766 ; RV64ZVE32F-NEXT: andi a0, a4, 2
3767 ; RV64ZVE32F-NEXT: beqz a0, .LBB43_4
3768 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
3769 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
3770 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
3771 ; RV64ZVE32F-NEXT: vmv.x.s a0, v9
3772 ; RV64ZVE32F-NEXT: slli a0, a0, 3
3773 ; RV64ZVE32F-NEXT: add a0, a1, a0
3774 ; RV64ZVE32F-NEXT: sd t1, 0(a0)
3775 ; RV64ZVE32F-NEXT: .LBB43_4: # %else2
3776 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
3777 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
3778 ; RV64ZVE32F-NEXT: andi a0, a4, 4
3779 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
3780 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
3781 ; RV64ZVE32F-NEXT: bnez a0, .LBB43_12
3782 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
3783 ; RV64ZVE32F-NEXT: andi a0, a4, 8
3784 ; RV64ZVE32F-NEXT: bnez a0, .LBB43_13
3785 ; RV64ZVE32F-NEXT: .LBB43_6: # %else6
3786 ; RV64ZVE32F-NEXT: andi a0, a4, 16
3787 ; RV64ZVE32F-NEXT: bnez a0, .LBB43_14
3788 ; RV64ZVE32F-NEXT: .LBB43_7: # %else8
3789 ; RV64ZVE32F-NEXT: andi a0, a4, 32
3790 ; RV64ZVE32F-NEXT: beqz a0, .LBB43_9
3791 ; RV64ZVE32F-NEXT: .LBB43_8: # %cond.store9
3792 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
3793 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
3794 ; RV64ZVE32F-NEXT: slli a0, a0, 3
3795 ; RV64ZVE32F-NEXT: add a0, a1, a0
3796 ; RV64ZVE32F-NEXT: sd a5, 0(a0)
3797 ; RV64ZVE32F-NEXT: .LBB43_9: # %else10
3798 ; RV64ZVE32F-NEXT: andi a0, a4, 64
3799 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
3800 ; RV64ZVE32F-NEXT: bnez a0, .LBB43_15
3801 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
3802 ; RV64ZVE32F-NEXT: andi a0, a4, -128
3803 ; RV64ZVE32F-NEXT: bnez a0, .LBB43_16
3804 ; RV64ZVE32F-NEXT: .LBB43_11: # %else14
3805 ; RV64ZVE32F-NEXT: ret
3806 ; RV64ZVE32F-NEXT: .LBB43_12: # %cond.store3
3807 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
3808 ; RV64ZVE32F-NEXT: slli a0, a0, 3
3809 ; RV64ZVE32F-NEXT: add a0, a1, a0
3810 ; RV64ZVE32F-NEXT: sd t0, 0(a0)
3811 ; RV64ZVE32F-NEXT: andi a0, a4, 8
3812 ; RV64ZVE32F-NEXT: beqz a0, .LBB43_6
3813 ; RV64ZVE32F-NEXT: .LBB43_13: # %cond.store5
3814 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
3815 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
3816 ; RV64ZVE32F-NEXT: slli a0, a0, 3
3817 ; RV64ZVE32F-NEXT: add a0, a1, a0
3818 ; RV64ZVE32F-NEXT: sd a7, 0(a0)
3819 ; RV64ZVE32F-NEXT: andi a0, a4, 16
3820 ; RV64ZVE32F-NEXT: beqz a0, .LBB43_7
3821 ; RV64ZVE32F-NEXT: .LBB43_14: # %cond.store7
3822 ; RV64ZVE32F-NEXT: vmv.x.s a0, v9
3823 ; RV64ZVE32F-NEXT: slli a0, a0, 3
3824 ; RV64ZVE32F-NEXT: add a0, a1, a0
3825 ; RV64ZVE32F-NEXT: sd a6, 0(a0)
3826 ; RV64ZVE32F-NEXT: andi a0, a4, 32
3827 ; RV64ZVE32F-NEXT: bnez a0, .LBB43_8
3828 ; RV64ZVE32F-NEXT: j .LBB43_9
3829 ; RV64ZVE32F-NEXT: .LBB43_15: # %cond.store11
3830 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
3831 ; RV64ZVE32F-NEXT: slli a0, a0, 3
3832 ; RV64ZVE32F-NEXT: add a0, a1, a0
3833 ; RV64ZVE32F-NEXT: sd a3, 0(a0)
3834 ; RV64ZVE32F-NEXT: andi a0, a4, -128
3835 ; RV64ZVE32F-NEXT: beqz a0, .LBB43_11
3836 ; RV64ZVE32F-NEXT: .LBB43_16: # %cond.store13
3837 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
3838 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
3839 ; RV64ZVE32F-NEXT: slli a0, a0, 3
3840 ; RV64ZVE32F-NEXT: add a0, a1, a0
3841 ; RV64ZVE32F-NEXT: sd a2, 0(a0)
3842 ; RV64ZVE32F-NEXT: ret
3843 %eidxs = sext <8 x i8> %idxs to <8 x i64>
3844 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs
3845 call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m)
3849 define void @mscatter_baseidx_zext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) {
3850 ; RV32V-LABEL: mscatter_baseidx_zext_v8i8_v8i64:
3852 ; RV32V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
3853 ; RV32V-NEXT: vzext.vf2 v13, v12
3854 ; RV32V-NEXT: vsll.vi v12, v13, 3
3855 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
3856 ; RV32V-NEXT: vsoxei16.v v8, (a0), v12, v0.t
3859 ; RV64-LABEL: mscatter_baseidx_zext_v8i8_v8i64:
3861 ; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
3862 ; RV64-NEXT: vzext.vf2 v13, v12
3863 ; RV64-NEXT: vsll.vi v12, v13, 3
3864 ; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, ma
3865 ; RV64-NEXT: vsoxei16.v v8, (a0), v12, v0.t
3868 ; RV32ZVE32F-LABEL: mscatter_baseidx_zext_v8i8_v8i64:
3869 ; RV32ZVE32F: # %bb.0:
3870 ; RV32ZVE32F-NEXT: addi sp, sp, -16
3871 ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
3872 ; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
3873 ; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
3874 ; RV32ZVE32F-NEXT: sw s2, 4(sp) # 4-byte Folded Spill
3875 ; RV32ZVE32F-NEXT: .cfi_offset s0, -4
3876 ; RV32ZVE32F-NEXT: .cfi_offset s1, -8
3877 ; RV32ZVE32F-NEXT: .cfi_offset s2, -12
3878 ; RV32ZVE32F-NEXT: lw a2, 60(a0)
3879 ; RV32ZVE32F-NEXT: lw a3, 56(a0)
3880 ; RV32ZVE32F-NEXT: lw a4, 52(a0)
3881 ; RV32ZVE32F-NEXT: lw a5, 48(a0)
3882 ; RV32ZVE32F-NEXT: lw a6, 44(a0)
3883 ; RV32ZVE32F-NEXT: lw a7, 40(a0)
3884 ; RV32ZVE32F-NEXT: lw t1, 36(a0)
3885 ; RV32ZVE32F-NEXT: lw t2, 32(a0)
3886 ; RV32ZVE32F-NEXT: lw t3, 28(a0)
3887 ; RV32ZVE32F-NEXT: lw t4, 24(a0)
3888 ; RV32ZVE32F-NEXT: lw t5, 20(a0)
3889 ; RV32ZVE32F-NEXT: lw t6, 16(a0)
3890 ; RV32ZVE32F-NEXT: lw s1, 12(a0)
3891 ; RV32ZVE32F-NEXT: lw s0, 8(a0)
3892 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
3893 ; RV32ZVE32F-NEXT: vzext.vf4 v10, v8
3894 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3
3895 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
3896 ; RV32ZVE32F-NEXT: vmv.x.s t0, v0
3897 ; RV32ZVE32F-NEXT: andi s2, t0, 1
3898 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
3899 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
3900 ; RV32ZVE32F-NEXT: bnez s2, .LBB44_10
3901 ; RV32ZVE32F-NEXT: # %bb.1: # %else
3902 ; RV32ZVE32F-NEXT: andi a0, t0, 2
3903 ; RV32ZVE32F-NEXT: bnez a0, .LBB44_11
3904 ; RV32ZVE32F-NEXT: .LBB44_2: # %else2
3905 ; RV32ZVE32F-NEXT: andi a0, t0, 4
3906 ; RV32ZVE32F-NEXT: bnez a0, .LBB44_12
3907 ; RV32ZVE32F-NEXT: .LBB44_3: # %else4
3908 ; RV32ZVE32F-NEXT: andi a0, t0, 8
3909 ; RV32ZVE32F-NEXT: bnez a0, .LBB44_13
3910 ; RV32ZVE32F-NEXT: .LBB44_4: # %else6
3911 ; RV32ZVE32F-NEXT: andi a0, t0, 16
3912 ; RV32ZVE32F-NEXT: bnez a0, .LBB44_14
3913 ; RV32ZVE32F-NEXT: .LBB44_5: # %else8
3914 ; RV32ZVE32F-NEXT: andi a0, t0, 32
3915 ; RV32ZVE32F-NEXT: bnez a0, .LBB44_15
3916 ; RV32ZVE32F-NEXT: .LBB44_6: # %else10
3917 ; RV32ZVE32F-NEXT: andi a0, t0, 64
3918 ; RV32ZVE32F-NEXT: bnez a0, .LBB44_16
3919 ; RV32ZVE32F-NEXT: .LBB44_7: # %else12
3920 ; RV32ZVE32F-NEXT: andi a0, t0, -128
3921 ; RV32ZVE32F-NEXT: beqz a0, .LBB44_9
3922 ; RV32ZVE32F-NEXT: .LBB44_8: # %cond.store13
3923 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
3924 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
3925 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
3926 ; RV32ZVE32F-NEXT: sw a3, 0(a0)
3927 ; RV32ZVE32F-NEXT: sw a2, 4(a0)
3928 ; RV32ZVE32F-NEXT: .LBB44_9: # %else14
3929 ; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
3930 ; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
3931 ; RV32ZVE32F-NEXT: lw s2, 4(sp) # 4-byte Folded Reload
3932 ; RV32ZVE32F-NEXT: addi sp, sp, 16
3933 ; RV32ZVE32F-NEXT: ret
3934 ; RV32ZVE32F-NEXT: .LBB44_10: # %cond.store
3935 ; RV32ZVE32F-NEXT: lw a1, 4(a0)
3936 ; RV32ZVE32F-NEXT: lw a0, 0(a0)
3937 ; RV32ZVE32F-NEXT: vmv.x.s s2, v8
3938 ; RV32ZVE32F-NEXT: sw a1, 4(s2)
3939 ; RV32ZVE32F-NEXT: sw a0, 0(s2)
3940 ; RV32ZVE32F-NEXT: andi a0, t0, 2
3941 ; RV32ZVE32F-NEXT: beqz a0, .LBB44_2
3942 ; RV32ZVE32F-NEXT: .LBB44_11: # %cond.store1
3943 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
3944 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
3945 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
3946 ; RV32ZVE32F-NEXT: sw s1, 4(a0)
3947 ; RV32ZVE32F-NEXT: sw s0, 0(a0)
3948 ; RV32ZVE32F-NEXT: andi a0, t0, 4
3949 ; RV32ZVE32F-NEXT: beqz a0, .LBB44_3
3950 ; RV32ZVE32F-NEXT: .LBB44_12: # %cond.store3
3951 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
3952 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
3953 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
3954 ; RV32ZVE32F-NEXT: sw t6, 0(a0)
3955 ; RV32ZVE32F-NEXT: sw t5, 4(a0)
3956 ; RV32ZVE32F-NEXT: andi a0, t0, 8
3957 ; RV32ZVE32F-NEXT: beqz a0, .LBB44_4
3958 ; RV32ZVE32F-NEXT: .LBB44_13: # %cond.store5
3959 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
3960 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
3961 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
3962 ; RV32ZVE32F-NEXT: sw t4, 0(a0)
3963 ; RV32ZVE32F-NEXT: sw t3, 4(a0)
3964 ; RV32ZVE32F-NEXT: andi a0, t0, 16
3965 ; RV32ZVE32F-NEXT: beqz a0, .LBB44_5
3966 ; RV32ZVE32F-NEXT: .LBB44_14: # %cond.store7
3967 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
3968 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
3969 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
3970 ; RV32ZVE32F-NEXT: sw t2, 0(a0)
3971 ; RV32ZVE32F-NEXT: sw t1, 4(a0)
3972 ; RV32ZVE32F-NEXT: andi a0, t0, 32
3973 ; RV32ZVE32F-NEXT: beqz a0, .LBB44_6
3974 ; RV32ZVE32F-NEXT: .LBB44_15: # %cond.store9
3975 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
3976 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
3977 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
3978 ; RV32ZVE32F-NEXT: sw a7, 0(a0)
3979 ; RV32ZVE32F-NEXT: sw a6, 4(a0)
3980 ; RV32ZVE32F-NEXT: andi a0, t0, 64
3981 ; RV32ZVE32F-NEXT: beqz a0, .LBB44_7
3982 ; RV32ZVE32F-NEXT: .LBB44_16: # %cond.store11
3983 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
3984 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
3985 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
3986 ; RV32ZVE32F-NEXT: sw a5, 0(a0)
3987 ; RV32ZVE32F-NEXT: sw a4, 4(a0)
3988 ; RV32ZVE32F-NEXT: andi a0, t0, -128
3989 ; RV32ZVE32F-NEXT: bnez a0, .LBB44_8
3990 ; RV32ZVE32F-NEXT: j .LBB44_9
3992 ; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i8_v8i64:
3993 ; RV64ZVE32F: # %bb.0:
3994 ; RV64ZVE32F-NEXT: ld a2, 56(a0)
3995 ; RV64ZVE32F-NEXT: ld a3, 48(a0)
3996 ; RV64ZVE32F-NEXT: ld a5, 40(a0)
3997 ; RV64ZVE32F-NEXT: ld a6, 32(a0)
3998 ; RV64ZVE32F-NEXT: ld a7, 24(a0)
3999 ; RV64ZVE32F-NEXT: ld t0, 16(a0)
4000 ; RV64ZVE32F-NEXT: ld t1, 8(a0)
4001 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
4002 ; RV64ZVE32F-NEXT: vmv.x.s a4, v0
4003 ; RV64ZVE32F-NEXT: andi t2, a4, 1
4004 ; RV64ZVE32F-NEXT: beqz t2, .LBB44_2
4005 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
4006 ; RV64ZVE32F-NEXT: ld a0, 0(a0)
4007 ; RV64ZVE32F-NEXT: vmv.x.s t2, v8
4008 ; RV64ZVE32F-NEXT: andi t2, t2, 255
4009 ; RV64ZVE32F-NEXT: slli t2, t2, 3
4010 ; RV64ZVE32F-NEXT: add t2, a1, t2
4011 ; RV64ZVE32F-NEXT: sd a0, 0(t2)
4012 ; RV64ZVE32F-NEXT: .LBB44_2: # %else
4013 ; RV64ZVE32F-NEXT: andi a0, a4, 2
4014 ; RV64ZVE32F-NEXT: beqz a0, .LBB44_4
4015 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
4016 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
4017 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
4018 ; RV64ZVE32F-NEXT: vmv.x.s a0, v9
4019 ; RV64ZVE32F-NEXT: andi a0, a0, 255
4020 ; RV64ZVE32F-NEXT: slli a0, a0, 3
4021 ; RV64ZVE32F-NEXT: add a0, a1, a0
4022 ; RV64ZVE32F-NEXT: sd t1, 0(a0)
4023 ; RV64ZVE32F-NEXT: .LBB44_4: # %else2
4024 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
4025 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
4026 ; RV64ZVE32F-NEXT: andi a0, a4, 4
4027 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
4028 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
4029 ; RV64ZVE32F-NEXT: bnez a0, .LBB44_12
4030 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
4031 ; RV64ZVE32F-NEXT: andi a0, a4, 8
4032 ; RV64ZVE32F-NEXT: bnez a0, .LBB44_13
4033 ; RV64ZVE32F-NEXT: .LBB44_6: # %else6
4034 ; RV64ZVE32F-NEXT: andi a0, a4, 16
4035 ; RV64ZVE32F-NEXT: bnez a0, .LBB44_14
4036 ; RV64ZVE32F-NEXT: .LBB44_7: # %else8
4037 ; RV64ZVE32F-NEXT: andi a0, a4, 32
4038 ; RV64ZVE32F-NEXT: beqz a0, .LBB44_9
4039 ; RV64ZVE32F-NEXT: .LBB44_8: # %cond.store9
4040 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
4041 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
4042 ; RV64ZVE32F-NEXT: andi a0, a0, 255
4043 ; RV64ZVE32F-NEXT: slli a0, a0, 3
4044 ; RV64ZVE32F-NEXT: add a0, a1, a0
4045 ; RV64ZVE32F-NEXT: sd a5, 0(a0)
4046 ; RV64ZVE32F-NEXT: .LBB44_9: # %else10
4047 ; RV64ZVE32F-NEXT: andi a0, a4, 64
4048 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
4049 ; RV64ZVE32F-NEXT: bnez a0, .LBB44_15
4050 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
4051 ; RV64ZVE32F-NEXT: andi a0, a4, -128
4052 ; RV64ZVE32F-NEXT: bnez a0, .LBB44_16
4053 ; RV64ZVE32F-NEXT: .LBB44_11: # %else14
4054 ; RV64ZVE32F-NEXT: ret
4055 ; RV64ZVE32F-NEXT: .LBB44_12: # %cond.store3
4056 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
4057 ; RV64ZVE32F-NEXT: andi a0, a0, 255
4058 ; RV64ZVE32F-NEXT: slli a0, a0, 3
4059 ; RV64ZVE32F-NEXT: add a0, a1, a0
4060 ; RV64ZVE32F-NEXT: sd t0, 0(a0)
4061 ; RV64ZVE32F-NEXT: andi a0, a4, 8
4062 ; RV64ZVE32F-NEXT: beqz a0, .LBB44_6
4063 ; RV64ZVE32F-NEXT: .LBB44_13: # %cond.store5
4064 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
4065 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
4066 ; RV64ZVE32F-NEXT: andi a0, a0, 255
4067 ; RV64ZVE32F-NEXT: slli a0, a0, 3
4068 ; RV64ZVE32F-NEXT: add a0, a1, a0
4069 ; RV64ZVE32F-NEXT: sd a7, 0(a0)
4070 ; RV64ZVE32F-NEXT: andi a0, a4, 16
4071 ; RV64ZVE32F-NEXT: beqz a0, .LBB44_7
4072 ; RV64ZVE32F-NEXT: .LBB44_14: # %cond.store7
4073 ; RV64ZVE32F-NEXT: vmv.x.s a0, v9
4074 ; RV64ZVE32F-NEXT: andi a0, a0, 255
4075 ; RV64ZVE32F-NEXT: slli a0, a0, 3
4076 ; RV64ZVE32F-NEXT: add a0, a1, a0
4077 ; RV64ZVE32F-NEXT: sd a6, 0(a0)
4078 ; RV64ZVE32F-NEXT: andi a0, a4, 32
4079 ; RV64ZVE32F-NEXT: bnez a0, .LBB44_8
4080 ; RV64ZVE32F-NEXT: j .LBB44_9
4081 ; RV64ZVE32F-NEXT: .LBB44_15: # %cond.store11
4082 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
4083 ; RV64ZVE32F-NEXT: andi a0, a0, 255
4084 ; RV64ZVE32F-NEXT: slli a0, a0, 3
4085 ; RV64ZVE32F-NEXT: add a0, a1, a0
4086 ; RV64ZVE32F-NEXT: sd a3, 0(a0)
4087 ; RV64ZVE32F-NEXT: andi a0, a4, -128
4088 ; RV64ZVE32F-NEXT: beqz a0, .LBB44_11
4089 ; RV64ZVE32F-NEXT: .LBB44_16: # %cond.store13
4090 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
4091 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
4092 ; RV64ZVE32F-NEXT: andi a0, a0, 255
4093 ; RV64ZVE32F-NEXT: slli a0, a0, 3
4094 ; RV64ZVE32F-NEXT: add a0, a1, a0
4095 ; RV64ZVE32F-NEXT: sd a2, 0(a0)
4096 ; RV64ZVE32F-NEXT: ret
4097 %eidxs = zext <8 x i8> %idxs to <8 x i64>
4098 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs
4099 call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m)
4103 define void @mscatter_baseidx_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m) {
4104 ; RV32V-LABEL: mscatter_baseidx_v8i16_v8i64:
4106 ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
4107 ; RV32V-NEXT: vsext.vf2 v14, v12
4108 ; RV32V-NEXT: vsll.vi v12, v14, 3
4109 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
4110 ; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t
4113 ; RV64-LABEL: mscatter_baseidx_v8i16_v8i64:
4115 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
4116 ; RV64-NEXT: vsext.vf4 v16, v12
4117 ; RV64-NEXT: vsll.vi v12, v16, 3
4118 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
4121 ; RV32ZVE32F-LABEL: mscatter_baseidx_v8i16_v8i64:
4122 ; RV32ZVE32F: # %bb.0:
4123 ; RV32ZVE32F-NEXT: addi sp, sp, -16
4124 ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
4125 ; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
4126 ; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
4127 ; RV32ZVE32F-NEXT: sw s2, 4(sp) # 4-byte Folded Spill
4128 ; RV32ZVE32F-NEXT: .cfi_offset s0, -4
4129 ; RV32ZVE32F-NEXT: .cfi_offset s1, -8
4130 ; RV32ZVE32F-NEXT: .cfi_offset s2, -12
4131 ; RV32ZVE32F-NEXT: lw a2, 60(a0)
4132 ; RV32ZVE32F-NEXT: lw a3, 56(a0)
4133 ; RV32ZVE32F-NEXT: lw a4, 52(a0)
4134 ; RV32ZVE32F-NEXT: lw a5, 48(a0)
4135 ; RV32ZVE32F-NEXT: lw a6, 44(a0)
4136 ; RV32ZVE32F-NEXT: lw a7, 40(a0)
4137 ; RV32ZVE32F-NEXT: lw t1, 36(a0)
4138 ; RV32ZVE32F-NEXT: lw t2, 32(a0)
4139 ; RV32ZVE32F-NEXT: lw t3, 28(a0)
4140 ; RV32ZVE32F-NEXT: lw t4, 24(a0)
4141 ; RV32ZVE32F-NEXT: lw t5, 20(a0)
4142 ; RV32ZVE32F-NEXT: lw t6, 16(a0)
4143 ; RV32ZVE32F-NEXT: lw s1, 12(a0)
4144 ; RV32ZVE32F-NEXT: lw s0, 8(a0)
4145 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
4146 ; RV32ZVE32F-NEXT: vsext.vf2 v10, v8
4147 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3
4148 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
4149 ; RV32ZVE32F-NEXT: vmv.x.s t0, v0
4150 ; RV32ZVE32F-NEXT: andi s2, t0, 1
4151 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
4152 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
4153 ; RV32ZVE32F-NEXT: bnez s2, .LBB45_10
4154 ; RV32ZVE32F-NEXT: # %bb.1: # %else
4155 ; RV32ZVE32F-NEXT: andi a0, t0, 2
4156 ; RV32ZVE32F-NEXT: bnez a0, .LBB45_11
4157 ; RV32ZVE32F-NEXT: .LBB45_2: # %else2
4158 ; RV32ZVE32F-NEXT: andi a0, t0, 4
4159 ; RV32ZVE32F-NEXT: bnez a0, .LBB45_12
4160 ; RV32ZVE32F-NEXT: .LBB45_3: # %else4
4161 ; RV32ZVE32F-NEXT: andi a0, t0, 8
4162 ; RV32ZVE32F-NEXT: bnez a0, .LBB45_13
4163 ; RV32ZVE32F-NEXT: .LBB45_4: # %else6
4164 ; RV32ZVE32F-NEXT: andi a0, t0, 16
4165 ; RV32ZVE32F-NEXT: bnez a0, .LBB45_14
4166 ; RV32ZVE32F-NEXT: .LBB45_5: # %else8
4167 ; RV32ZVE32F-NEXT: andi a0, t0, 32
4168 ; RV32ZVE32F-NEXT: bnez a0, .LBB45_15
4169 ; RV32ZVE32F-NEXT: .LBB45_6: # %else10
4170 ; RV32ZVE32F-NEXT: andi a0, t0, 64
4171 ; RV32ZVE32F-NEXT: bnez a0, .LBB45_16
4172 ; RV32ZVE32F-NEXT: .LBB45_7: # %else12
4173 ; RV32ZVE32F-NEXT: andi a0, t0, -128
4174 ; RV32ZVE32F-NEXT: beqz a0, .LBB45_9
4175 ; RV32ZVE32F-NEXT: .LBB45_8: # %cond.store13
4176 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
4177 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
4178 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
4179 ; RV32ZVE32F-NEXT: sw a3, 0(a0)
4180 ; RV32ZVE32F-NEXT: sw a2, 4(a0)
4181 ; RV32ZVE32F-NEXT: .LBB45_9: # %else14
4182 ; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
4183 ; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
4184 ; RV32ZVE32F-NEXT: lw s2, 4(sp) # 4-byte Folded Reload
4185 ; RV32ZVE32F-NEXT: addi sp, sp, 16
4186 ; RV32ZVE32F-NEXT: ret
4187 ; RV32ZVE32F-NEXT: .LBB45_10: # %cond.store
4188 ; RV32ZVE32F-NEXT: lw a1, 4(a0)
4189 ; RV32ZVE32F-NEXT: lw a0, 0(a0)
4190 ; RV32ZVE32F-NEXT: vmv.x.s s2, v8
4191 ; RV32ZVE32F-NEXT: sw a1, 4(s2)
4192 ; RV32ZVE32F-NEXT: sw a0, 0(s2)
4193 ; RV32ZVE32F-NEXT: andi a0, t0, 2
4194 ; RV32ZVE32F-NEXT: beqz a0, .LBB45_2
4195 ; RV32ZVE32F-NEXT: .LBB45_11: # %cond.store1
4196 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
4197 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
4198 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
4199 ; RV32ZVE32F-NEXT: sw s1, 4(a0)
4200 ; RV32ZVE32F-NEXT: sw s0, 0(a0)
4201 ; RV32ZVE32F-NEXT: andi a0, t0, 4
4202 ; RV32ZVE32F-NEXT: beqz a0, .LBB45_3
4203 ; RV32ZVE32F-NEXT: .LBB45_12: # %cond.store3
4204 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
4205 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
4206 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
4207 ; RV32ZVE32F-NEXT: sw t6, 0(a0)
4208 ; RV32ZVE32F-NEXT: sw t5, 4(a0)
4209 ; RV32ZVE32F-NEXT: andi a0, t0, 8
4210 ; RV32ZVE32F-NEXT: beqz a0, .LBB45_4
4211 ; RV32ZVE32F-NEXT: .LBB45_13: # %cond.store5
4212 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
4213 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
4214 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
4215 ; RV32ZVE32F-NEXT: sw t4, 0(a0)
4216 ; RV32ZVE32F-NEXT: sw t3, 4(a0)
4217 ; RV32ZVE32F-NEXT: andi a0, t0, 16
4218 ; RV32ZVE32F-NEXT: beqz a0, .LBB45_5
4219 ; RV32ZVE32F-NEXT: .LBB45_14: # %cond.store7
4220 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
4221 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
4222 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
4223 ; RV32ZVE32F-NEXT: sw t2, 0(a0)
4224 ; RV32ZVE32F-NEXT: sw t1, 4(a0)
4225 ; RV32ZVE32F-NEXT: andi a0, t0, 32
4226 ; RV32ZVE32F-NEXT: beqz a0, .LBB45_6
4227 ; RV32ZVE32F-NEXT: .LBB45_15: # %cond.store9
4228 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
4229 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
4230 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
4231 ; RV32ZVE32F-NEXT: sw a7, 0(a0)
4232 ; RV32ZVE32F-NEXT: sw a6, 4(a0)
4233 ; RV32ZVE32F-NEXT: andi a0, t0, 64
4234 ; RV32ZVE32F-NEXT: beqz a0, .LBB45_7
4235 ; RV32ZVE32F-NEXT: .LBB45_16: # %cond.store11
4236 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
4237 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
4238 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
4239 ; RV32ZVE32F-NEXT: sw a5, 0(a0)
4240 ; RV32ZVE32F-NEXT: sw a4, 4(a0)
4241 ; RV32ZVE32F-NEXT: andi a0, t0, -128
4242 ; RV32ZVE32F-NEXT: bnez a0, .LBB45_8
4243 ; RV32ZVE32F-NEXT: j .LBB45_9
4245 ; RV64ZVE32F-LABEL: mscatter_baseidx_v8i16_v8i64:
4246 ; RV64ZVE32F: # %bb.0:
4247 ; RV64ZVE32F-NEXT: ld a2, 56(a0)
4248 ; RV64ZVE32F-NEXT: ld a3, 48(a0)
4249 ; RV64ZVE32F-NEXT: ld a5, 40(a0)
4250 ; RV64ZVE32F-NEXT: ld a6, 32(a0)
4251 ; RV64ZVE32F-NEXT: ld a7, 24(a0)
4252 ; RV64ZVE32F-NEXT: ld t0, 16(a0)
4253 ; RV64ZVE32F-NEXT: ld t1, 8(a0)
4254 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
4255 ; RV64ZVE32F-NEXT: vmv.x.s a4, v0
4256 ; RV64ZVE32F-NEXT: andi t2, a4, 1
4257 ; RV64ZVE32F-NEXT: beqz t2, .LBB45_2
4258 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
4259 ; RV64ZVE32F-NEXT: ld a0, 0(a0)
4260 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
4261 ; RV64ZVE32F-NEXT: vmv.x.s t2, v8
4262 ; RV64ZVE32F-NEXT: slli t2, t2, 3
4263 ; RV64ZVE32F-NEXT: add t2, a1, t2
4264 ; RV64ZVE32F-NEXT: sd a0, 0(t2)
4265 ; RV64ZVE32F-NEXT: .LBB45_2: # %else
4266 ; RV64ZVE32F-NEXT: andi a0, a4, 2
4267 ; RV64ZVE32F-NEXT: beqz a0, .LBB45_4
4268 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
4269 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
4270 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
4271 ; RV64ZVE32F-NEXT: vmv.x.s a0, v9
4272 ; RV64ZVE32F-NEXT: slli a0, a0, 3
4273 ; RV64ZVE32F-NEXT: add a0, a1, a0
4274 ; RV64ZVE32F-NEXT: sd t1, 0(a0)
4275 ; RV64ZVE32F-NEXT: .LBB45_4: # %else2
4276 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
4277 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
4278 ; RV64ZVE32F-NEXT: andi a0, a4, 4
4279 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
4280 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
4281 ; RV64ZVE32F-NEXT: bnez a0, .LBB45_12
4282 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
4283 ; RV64ZVE32F-NEXT: andi a0, a4, 8
4284 ; RV64ZVE32F-NEXT: bnez a0, .LBB45_13
4285 ; RV64ZVE32F-NEXT: .LBB45_6: # %else6
4286 ; RV64ZVE32F-NEXT: andi a0, a4, 16
4287 ; RV64ZVE32F-NEXT: bnez a0, .LBB45_14
4288 ; RV64ZVE32F-NEXT: .LBB45_7: # %else8
4289 ; RV64ZVE32F-NEXT: andi a0, a4, 32
4290 ; RV64ZVE32F-NEXT: beqz a0, .LBB45_9
4291 ; RV64ZVE32F-NEXT: .LBB45_8: # %cond.store9
4292 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
4293 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
4294 ; RV64ZVE32F-NEXT: slli a0, a0, 3
4295 ; RV64ZVE32F-NEXT: add a0, a1, a0
4296 ; RV64ZVE32F-NEXT: sd a5, 0(a0)
4297 ; RV64ZVE32F-NEXT: .LBB45_9: # %else10
4298 ; RV64ZVE32F-NEXT: andi a0, a4, 64
4299 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
4300 ; RV64ZVE32F-NEXT: bnez a0, .LBB45_15
4301 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
4302 ; RV64ZVE32F-NEXT: andi a0, a4, -128
4303 ; RV64ZVE32F-NEXT: bnez a0, .LBB45_16
4304 ; RV64ZVE32F-NEXT: .LBB45_11: # %else14
4305 ; RV64ZVE32F-NEXT: ret
4306 ; RV64ZVE32F-NEXT: .LBB45_12: # %cond.store3
4307 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
4308 ; RV64ZVE32F-NEXT: slli a0, a0, 3
4309 ; RV64ZVE32F-NEXT: add a0, a1, a0
4310 ; RV64ZVE32F-NEXT: sd t0, 0(a0)
4311 ; RV64ZVE32F-NEXT: andi a0, a4, 8
4312 ; RV64ZVE32F-NEXT: beqz a0, .LBB45_6
4313 ; RV64ZVE32F-NEXT: .LBB45_13: # %cond.store5
4314 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
4315 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
4316 ; RV64ZVE32F-NEXT: slli a0, a0, 3
4317 ; RV64ZVE32F-NEXT: add a0, a1, a0
4318 ; RV64ZVE32F-NEXT: sd a7, 0(a0)
4319 ; RV64ZVE32F-NEXT: andi a0, a4, 16
4320 ; RV64ZVE32F-NEXT: beqz a0, .LBB45_7
4321 ; RV64ZVE32F-NEXT: .LBB45_14: # %cond.store7
4322 ; RV64ZVE32F-NEXT: vmv.x.s a0, v9
4323 ; RV64ZVE32F-NEXT: slli a0, a0, 3
4324 ; RV64ZVE32F-NEXT: add a0, a1, a0
4325 ; RV64ZVE32F-NEXT: sd a6, 0(a0)
4326 ; RV64ZVE32F-NEXT: andi a0, a4, 32
4327 ; RV64ZVE32F-NEXT: bnez a0, .LBB45_8
4328 ; RV64ZVE32F-NEXT: j .LBB45_9
4329 ; RV64ZVE32F-NEXT: .LBB45_15: # %cond.store11
4330 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
4331 ; RV64ZVE32F-NEXT: slli a0, a0, 3
4332 ; RV64ZVE32F-NEXT: add a0, a1, a0
4333 ; RV64ZVE32F-NEXT: sd a3, 0(a0)
4334 ; RV64ZVE32F-NEXT: andi a0, a4, -128
4335 ; RV64ZVE32F-NEXT: beqz a0, .LBB45_11
4336 ; RV64ZVE32F-NEXT: .LBB45_16: # %cond.store13
4337 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
4338 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
4339 ; RV64ZVE32F-NEXT: slli a0, a0, 3
4340 ; RV64ZVE32F-NEXT: add a0, a1, a0
4341 ; RV64ZVE32F-NEXT: sd a2, 0(a0)
4342 ; RV64ZVE32F-NEXT: ret
4343 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i16> %idxs
4344 call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m)
4348 define void @mscatter_baseidx_sext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m) {
4349 ; RV32V-LABEL: mscatter_baseidx_sext_v8i16_v8i64:
4351 ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
4352 ; RV32V-NEXT: vsext.vf2 v14, v12
4353 ; RV32V-NEXT: vsll.vi v12, v14, 3
4354 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
4355 ; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t
4358 ; RV64-LABEL: mscatter_baseidx_sext_v8i16_v8i64:
4360 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
4361 ; RV64-NEXT: vsext.vf4 v16, v12
4362 ; RV64-NEXT: vsll.vi v12, v16, 3
4363 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
4366 ; RV32ZVE32F-LABEL: mscatter_baseidx_sext_v8i16_v8i64:
4367 ; RV32ZVE32F: # %bb.0:
4368 ; RV32ZVE32F-NEXT: addi sp, sp, -16
4369 ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
4370 ; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
4371 ; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
4372 ; RV32ZVE32F-NEXT: sw s2, 4(sp) # 4-byte Folded Spill
4373 ; RV32ZVE32F-NEXT: .cfi_offset s0, -4
4374 ; RV32ZVE32F-NEXT: .cfi_offset s1, -8
4375 ; RV32ZVE32F-NEXT: .cfi_offset s2, -12
4376 ; RV32ZVE32F-NEXT: lw a2, 60(a0)
4377 ; RV32ZVE32F-NEXT: lw a3, 56(a0)
4378 ; RV32ZVE32F-NEXT: lw a4, 52(a0)
4379 ; RV32ZVE32F-NEXT: lw a5, 48(a0)
4380 ; RV32ZVE32F-NEXT: lw a6, 44(a0)
4381 ; RV32ZVE32F-NEXT: lw a7, 40(a0)
4382 ; RV32ZVE32F-NEXT: lw t1, 36(a0)
4383 ; RV32ZVE32F-NEXT: lw t2, 32(a0)
4384 ; RV32ZVE32F-NEXT: lw t3, 28(a0)
4385 ; RV32ZVE32F-NEXT: lw t4, 24(a0)
4386 ; RV32ZVE32F-NEXT: lw t5, 20(a0)
4387 ; RV32ZVE32F-NEXT: lw t6, 16(a0)
4388 ; RV32ZVE32F-NEXT: lw s1, 12(a0)
4389 ; RV32ZVE32F-NEXT: lw s0, 8(a0)
4390 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
4391 ; RV32ZVE32F-NEXT: vsext.vf2 v10, v8
4392 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3
4393 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
4394 ; RV32ZVE32F-NEXT: vmv.x.s t0, v0
4395 ; RV32ZVE32F-NEXT: andi s2, t0, 1
4396 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
4397 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
4398 ; RV32ZVE32F-NEXT: bnez s2, .LBB46_10
4399 ; RV32ZVE32F-NEXT: # %bb.1: # %else
4400 ; RV32ZVE32F-NEXT: andi a0, t0, 2
4401 ; RV32ZVE32F-NEXT: bnez a0, .LBB46_11
4402 ; RV32ZVE32F-NEXT: .LBB46_2: # %else2
4403 ; RV32ZVE32F-NEXT: andi a0, t0, 4
4404 ; RV32ZVE32F-NEXT: bnez a0, .LBB46_12
4405 ; RV32ZVE32F-NEXT: .LBB46_3: # %else4
4406 ; RV32ZVE32F-NEXT: andi a0, t0, 8
4407 ; RV32ZVE32F-NEXT: bnez a0, .LBB46_13
4408 ; RV32ZVE32F-NEXT: .LBB46_4: # %else6
4409 ; RV32ZVE32F-NEXT: andi a0, t0, 16
4410 ; RV32ZVE32F-NEXT: bnez a0, .LBB46_14
4411 ; RV32ZVE32F-NEXT: .LBB46_5: # %else8
4412 ; RV32ZVE32F-NEXT: andi a0, t0, 32
4413 ; RV32ZVE32F-NEXT: bnez a0, .LBB46_15
4414 ; RV32ZVE32F-NEXT: .LBB46_6: # %else10
4415 ; RV32ZVE32F-NEXT: andi a0, t0, 64
4416 ; RV32ZVE32F-NEXT: bnez a0, .LBB46_16
4417 ; RV32ZVE32F-NEXT: .LBB46_7: # %else12
4418 ; RV32ZVE32F-NEXT: andi a0, t0, -128
4419 ; RV32ZVE32F-NEXT: beqz a0, .LBB46_9
4420 ; RV32ZVE32F-NEXT: .LBB46_8: # %cond.store13
4421 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
4422 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
4423 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
4424 ; RV32ZVE32F-NEXT: sw a3, 0(a0)
4425 ; RV32ZVE32F-NEXT: sw a2, 4(a0)
4426 ; RV32ZVE32F-NEXT: .LBB46_9: # %else14
4427 ; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
4428 ; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
4429 ; RV32ZVE32F-NEXT: lw s2, 4(sp) # 4-byte Folded Reload
4430 ; RV32ZVE32F-NEXT: addi sp, sp, 16
4431 ; RV32ZVE32F-NEXT: ret
4432 ; RV32ZVE32F-NEXT: .LBB46_10: # %cond.store
4433 ; RV32ZVE32F-NEXT: lw a1, 4(a0)
4434 ; RV32ZVE32F-NEXT: lw a0, 0(a0)
4435 ; RV32ZVE32F-NEXT: vmv.x.s s2, v8
4436 ; RV32ZVE32F-NEXT: sw a1, 4(s2)
4437 ; RV32ZVE32F-NEXT: sw a0, 0(s2)
4438 ; RV32ZVE32F-NEXT: andi a0, t0, 2
4439 ; RV32ZVE32F-NEXT: beqz a0, .LBB46_2
4440 ; RV32ZVE32F-NEXT: .LBB46_11: # %cond.store1
4441 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
4442 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
4443 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
4444 ; RV32ZVE32F-NEXT: sw s1, 4(a0)
4445 ; RV32ZVE32F-NEXT: sw s0, 0(a0)
4446 ; RV32ZVE32F-NEXT: andi a0, t0, 4
4447 ; RV32ZVE32F-NEXT: beqz a0, .LBB46_3
4448 ; RV32ZVE32F-NEXT: .LBB46_12: # %cond.store3
4449 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
4450 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
4451 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
4452 ; RV32ZVE32F-NEXT: sw t6, 0(a0)
4453 ; RV32ZVE32F-NEXT: sw t5, 4(a0)
4454 ; RV32ZVE32F-NEXT: andi a0, t0, 8
4455 ; RV32ZVE32F-NEXT: beqz a0, .LBB46_4
4456 ; RV32ZVE32F-NEXT: .LBB46_13: # %cond.store5
4457 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
4458 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
4459 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
4460 ; RV32ZVE32F-NEXT: sw t4, 0(a0)
4461 ; RV32ZVE32F-NEXT: sw t3, 4(a0)
4462 ; RV32ZVE32F-NEXT: andi a0, t0, 16
4463 ; RV32ZVE32F-NEXT: beqz a0, .LBB46_5
4464 ; RV32ZVE32F-NEXT: .LBB46_14: # %cond.store7
4465 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
4466 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
4467 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
4468 ; RV32ZVE32F-NEXT: sw t2, 0(a0)
4469 ; RV32ZVE32F-NEXT: sw t1, 4(a0)
4470 ; RV32ZVE32F-NEXT: andi a0, t0, 32
4471 ; RV32ZVE32F-NEXT: beqz a0, .LBB46_6
4472 ; RV32ZVE32F-NEXT: .LBB46_15: # %cond.store9
4473 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
4474 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
4475 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
4476 ; RV32ZVE32F-NEXT: sw a7, 0(a0)
4477 ; RV32ZVE32F-NEXT: sw a6, 4(a0)
4478 ; RV32ZVE32F-NEXT: andi a0, t0, 64
4479 ; RV32ZVE32F-NEXT: beqz a0, .LBB46_7
4480 ; RV32ZVE32F-NEXT: .LBB46_16: # %cond.store11
4481 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
4482 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
4483 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
4484 ; RV32ZVE32F-NEXT: sw a5, 0(a0)
4485 ; RV32ZVE32F-NEXT: sw a4, 4(a0)
4486 ; RV32ZVE32F-NEXT: andi a0, t0, -128
4487 ; RV32ZVE32F-NEXT: bnez a0, .LBB46_8
4488 ; RV32ZVE32F-NEXT: j .LBB46_9
4490 ; RV64ZVE32F-LABEL: mscatter_baseidx_sext_v8i16_v8i64:
4491 ; RV64ZVE32F: # %bb.0:
4492 ; RV64ZVE32F-NEXT: ld a2, 56(a0)
4493 ; RV64ZVE32F-NEXT: ld a3, 48(a0)
4494 ; RV64ZVE32F-NEXT: ld a5, 40(a0)
4495 ; RV64ZVE32F-NEXT: ld a6, 32(a0)
4496 ; RV64ZVE32F-NEXT: ld a7, 24(a0)
4497 ; RV64ZVE32F-NEXT: ld t0, 16(a0)
4498 ; RV64ZVE32F-NEXT: ld t1, 8(a0)
4499 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
4500 ; RV64ZVE32F-NEXT: vmv.x.s a4, v0
4501 ; RV64ZVE32F-NEXT: andi t2, a4, 1
4502 ; RV64ZVE32F-NEXT: beqz t2, .LBB46_2
4503 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
4504 ; RV64ZVE32F-NEXT: ld a0, 0(a0)
4505 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
4506 ; RV64ZVE32F-NEXT: vmv.x.s t2, v8
4507 ; RV64ZVE32F-NEXT: slli t2, t2, 3
4508 ; RV64ZVE32F-NEXT: add t2, a1, t2
4509 ; RV64ZVE32F-NEXT: sd a0, 0(t2)
4510 ; RV64ZVE32F-NEXT: .LBB46_2: # %else
4511 ; RV64ZVE32F-NEXT: andi a0, a4, 2
4512 ; RV64ZVE32F-NEXT: beqz a0, .LBB46_4
4513 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
4514 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
4515 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
4516 ; RV64ZVE32F-NEXT: vmv.x.s a0, v9
4517 ; RV64ZVE32F-NEXT: slli a0, a0, 3
4518 ; RV64ZVE32F-NEXT: add a0, a1, a0
4519 ; RV64ZVE32F-NEXT: sd t1, 0(a0)
4520 ; RV64ZVE32F-NEXT: .LBB46_4: # %else2
4521 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
4522 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
4523 ; RV64ZVE32F-NEXT: andi a0, a4, 4
4524 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
4525 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
4526 ; RV64ZVE32F-NEXT: bnez a0, .LBB46_12
4527 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
4528 ; RV64ZVE32F-NEXT: andi a0, a4, 8
4529 ; RV64ZVE32F-NEXT: bnez a0, .LBB46_13
4530 ; RV64ZVE32F-NEXT: .LBB46_6: # %else6
4531 ; RV64ZVE32F-NEXT: andi a0, a4, 16
4532 ; RV64ZVE32F-NEXT: bnez a0, .LBB46_14
4533 ; RV64ZVE32F-NEXT: .LBB46_7: # %else8
4534 ; RV64ZVE32F-NEXT: andi a0, a4, 32
4535 ; RV64ZVE32F-NEXT: beqz a0, .LBB46_9
4536 ; RV64ZVE32F-NEXT: .LBB46_8: # %cond.store9
4537 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
4538 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
4539 ; RV64ZVE32F-NEXT: slli a0, a0, 3
4540 ; RV64ZVE32F-NEXT: add a0, a1, a0
4541 ; RV64ZVE32F-NEXT: sd a5, 0(a0)
4542 ; RV64ZVE32F-NEXT: .LBB46_9: # %else10
4543 ; RV64ZVE32F-NEXT: andi a0, a4, 64
4544 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
4545 ; RV64ZVE32F-NEXT: bnez a0, .LBB46_15
4546 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
4547 ; RV64ZVE32F-NEXT: andi a0, a4, -128
4548 ; RV64ZVE32F-NEXT: bnez a0, .LBB46_16
4549 ; RV64ZVE32F-NEXT: .LBB46_11: # %else14
4550 ; RV64ZVE32F-NEXT: ret
4551 ; RV64ZVE32F-NEXT: .LBB46_12: # %cond.store3
4552 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
4553 ; RV64ZVE32F-NEXT: slli a0, a0, 3
4554 ; RV64ZVE32F-NEXT: add a0, a1, a0
4555 ; RV64ZVE32F-NEXT: sd t0, 0(a0)
4556 ; RV64ZVE32F-NEXT: andi a0, a4, 8
4557 ; RV64ZVE32F-NEXT: beqz a0, .LBB46_6
4558 ; RV64ZVE32F-NEXT: .LBB46_13: # %cond.store5
4559 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
4560 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
4561 ; RV64ZVE32F-NEXT: slli a0, a0, 3
4562 ; RV64ZVE32F-NEXT: add a0, a1, a0
4563 ; RV64ZVE32F-NEXT: sd a7, 0(a0)
4564 ; RV64ZVE32F-NEXT: andi a0, a4, 16
4565 ; RV64ZVE32F-NEXT: beqz a0, .LBB46_7
4566 ; RV64ZVE32F-NEXT: .LBB46_14: # %cond.store7
4567 ; RV64ZVE32F-NEXT: vmv.x.s a0, v9
4568 ; RV64ZVE32F-NEXT: slli a0, a0, 3
4569 ; RV64ZVE32F-NEXT: add a0, a1, a0
4570 ; RV64ZVE32F-NEXT: sd a6, 0(a0)
4571 ; RV64ZVE32F-NEXT: andi a0, a4, 32
4572 ; RV64ZVE32F-NEXT: bnez a0, .LBB46_8
4573 ; RV64ZVE32F-NEXT: j .LBB46_9
4574 ; RV64ZVE32F-NEXT: .LBB46_15: # %cond.store11
4575 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
4576 ; RV64ZVE32F-NEXT: slli a0, a0, 3
4577 ; RV64ZVE32F-NEXT: add a0, a1, a0
4578 ; RV64ZVE32F-NEXT: sd a3, 0(a0)
4579 ; RV64ZVE32F-NEXT: andi a0, a4, -128
4580 ; RV64ZVE32F-NEXT: beqz a0, .LBB46_11
4581 ; RV64ZVE32F-NEXT: .LBB46_16: # %cond.store13
4582 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
4583 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
4584 ; RV64ZVE32F-NEXT: slli a0, a0, 3
4585 ; RV64ZVE32F-NEXT: add a0, a1, a0
4586 ; RV64ZVE32F-NEXT: sd a2, 0(a0)
4587 ; RV64ZVE32F-NEXT: ret
4588 %eidxs = sext <8 x i16> %idxs to <8 x i64>
4589 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs
4590 call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m)
4594 define void @mscatter_baseidx_zext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m) {
4595 ; RV32V-LABEL: mscatter_baseidx_zext_v8i16_v8i64:
4597 ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
4598 ; RV32V-NEXT: vzext.vf2 v14, v12
4599 ; RV32V-NEXT: vsll.vi v12, v14, 3
4600 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
4601 ; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t
4604 ; RV64-LABEL: mscatter_baseidx_zext_v8i16_v8i64:
4606 ; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma
4607 ; RV64-NEXT: vzext.vf2 v14, v12
4608 ; RV64-NEXT: vsll.vi v12, v14, 3
4609 ; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, ma
4610 ; RV64-NEXT: vsoxei32.v v8, (a0), v12, v0.t
4613 ; RV32ZVE32F-LABEL: mscatter_baseidx_zext_v8i16_v8i64:
4614 ; RV32ZVE32F: # %bb.0:
4615 ; RV32ZVE32F-NEXT: addi sp, sp, -16
4616 ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
4617 ; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
4618 ; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
4619 ; RV32ZVE32F-NEXT: sw s2, 4(sp) # 4-byte Folded Spill
4620 ; RV32ZVE32F-NEXT: .cfi_offset s0, -4
4621 ; RV32ZVE32F-NEXT: .cfi_offset s1, -8
4622 ; RV32ZVE32F-NEXT: .cfi_offset s2, -12
4623 ; RV32ZVE32F-NEXT: lw a2, 60(a0)
4624 ; RV32ZVE32F-NEXT: lw a3, 56(a0)
4625 ; RV32ZVE32F-NEXT: lw a4, 52(a0)
4626 ; RV32ZVE32F-NEXT: lw a5, 48(a0)
4627 ; RV32ZVE32F-NEXT: lw a6, 44(a0)
4628 ; RV32ZVE32F-NEXT: lw a7, 40(a0)
4629 ; RV32ZVE32F-NEXT: lw t1, 36(a0)
4630 ; RV32ZVE32F-NEXT: lw t2, 32(a0)
4631 ; RV32ZVE32F-NEXT: lw t3, 28(a0)
4632 ; RV32ZVE32F-NEXT: lw t4, 24(a0)
4633 ; RV32ZVE32F-NEXT: lw t5, 20(a0)
4634 ; RV32ZVE32F-NEXT: lw t6, 16(a0)
4635 ; RV32ZVE32F-NEXT: lw s1, 12(a0)
4636 ; RV32ZVE32F-NEXT: lw s0, 8(a0)
4637 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
4638 ; RV32ZVE32F-NEXT: vzext.vf2 v10, v8
4639 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3
4640 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
4641 ; RV32ZVE32F-NEXT: vmv.x.s t0, v0
4642 ; RV32ZVE32F-NEXT: andi s2, t0, 1
4643 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
4644 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
4645 ; RV32ZVE32F-NEXT: bnez s2, .LBB47_10
4646 ; RV32ZVE32F-NEXT: # %bb.1: # %else
4647 ; RV32ZVE32F-NEXT: andi a0, t0, 2
4648 ; RV32ZVE32F-NEXT: bnez a0, .LBB47_11
4649 ; RV32ZVE32F-NEXT: .LBB47_2: # %else2
4650 ; RV32ZVE32F-NEXT: andi a0, t0, 4
4651 ; RV32ZVE32F-NEXT: bnez a0, .LBB47_12
4652 ; RV32ZVE32F-NEXT: .LBB47_3: # %else4
4653 ; RV32ZVE32F-NEXT: andi a0, t0, 8
4654 ; RV32ZVE32F-NEXT: bnez a0, .LBB47_13
4655 ; RV32ZVE32F-NEXT: .LBB47_4: # %else6
4656 ; RV32ZVE32F-NEXT: andi a0, t0, 16
4657 ; RV32ZVE32F-NEXT: bnez a0, .LBB47_14
4658 ; RV32ZVE32F-NEXT: .LBB47_5: # %else8
4659 ; RV32ZVE32F-NEXT: andi a0, t0, 32
4660 ; RV32ZVE32F-NEXT: bnez a0, .LBB47_15
4661 ; RV32ZVE32F-NEXT: .LBB47_6: # %else10
4662 ; RV32ZVE32F-NEXT: andi a0, t0, 64
4663 ; RV32ZVE32F-NEXT: bnez a0, .LBB47_16
4664 ; RV32ZVE32F-NEXT: .LBB47_7: # %else12
4665 ; RV32ZVE32F-NEXT: andi a0, t0, -128
4666 ; RV32ZVE32F-NEXT: beqz a0, .LBB47_9
4667 ; RV32ZVE32F-NEXT: .LBB47_8: # %cond.store13
4668 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
4669 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
4670 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
4671 ; RV32ZVE32F-NEXT: sw a3, 0(a0)
4672 ; RV32ZVE32F-NEXT: sw a2, 4(a0)
4673 ; RV32ZVE32F-NEXT: .LBB47_9: # %else14
4674 ; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
4675 ; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
4676 ; RV32ZVE32F-NEXT: lw s2, 4(sp) # 4-byte Folded Reload
4677 ; RV32ZVE32F-NEXT: addi sp, sp, 16
4678 ; RV32ZVE32F-NEXT: ret
4679 ; RV32ZVE32F-NEXT: .LBB47_10: # %cond.store
4680 ; RV32ZVE32F-NEXT: lw a1, 4(a0)
4681 ; RV32ZVE32F-NEXT: lw a0, 0(a0)
4682 ; RV32ZVE32F-NEXT: vmv.x.s s2, v8
4683 ; RV32ZVE32F-NEXT: sw a1, 4(s2)
4684 ; RV32ZVE32F-NEXT: sw a0, 0(s2)
4685 ; RV32ZVE32F-NEXT: andi a0, t0, 2
4686 ; RV32ZVE32F-NEXT: beqz a0, .LBB47_2
4687 ; RV32ZVE32F-NEXT: .LBB47_11: # %cond.store1
4688 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
4689 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
4690 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
4691 ; RV32ZVE32F-NEXT: sw s1, 4(a0)
4692 ; RV32ZVE32F-NEXT: sw s0, 0(a0)
4693 ; RV32ZVE32F-NEXT: andi a0, t0, 4
4694 ; RV32ZVE32F-NEXT: beqz a0, .LBB47_3
4695 ; RV32ZVE32F-NEXT: .LBB47_12: # %cond.store3
4696 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
4697 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
4698 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
4699 ; RV32ZVE32F-NEXT: sw t6, 0(a0)
4700 ; RV32ZVE32F-NEXT: sw t5, 4(a0)
4701 ; RV32ZVE32F-NEXT: andi a0, t0, 8
4702 ; RV32ZVE32F-NEXT: beqz a0, .LBB47_4
4703 ; RV32ZVE32F-NEXT: .LBB47_13: # %cond.store5
4704 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
4705 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
4706 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
4707 ; RV32ZVE32F-NEXT: sw t4, 0(a0)
4708 ; RV32ZVE32F-NEXT: sw t3, 4(a0)
4709 ; RV32ZVE32F-NEXT: andi a0, t0, 16
4710 ; RV32ZVE32F-NEXT: beqz a0, .LBB47_5
4711 ; RV32ZVE32F-NEXT: .LBB47_14: # %cond.store7
4712 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
4713 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
4714 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
4715 ; RV32ZVE32F-NEXT: sw t2, 0(a0)
4716 ; RV32ZVE32F-NEXT: sw t1, 4(a0)
4717 ; RV32ZVE32F-NEXT: andi a0, t0, 32
4718 ; RV32ZVE32F-NEXT: beqz a0, .LBB47_6
4719 ; RV32ZVE32F-NEXT: .LBB47_15: # %cond.store9
4720 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
4721 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
4722 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
4723 ; RV32ZVE32F-NEXT: sw a7, 0(a0)
4724 ; RV32ZVE32F-NEXT: sw a6, 4(a0)
4725 ; RV32ZVE32F-NEXT: andi a0, t0, 64
4726 ; RV32ZVE32F-NEXT: beqz a0, .LBB47_7
4727 ; RV32ZVE32F-NEXT: .LBB47_16: # %cond.store11
4728 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
4729 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
4730 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
4731 ; RV32ZVE32F-NEXT: sw a5, 0(a0)
4732 ; RV32ZVE32F-NEXT: sw a4, 4(a0)
4733 ; RV32ZVE32F-NEXT: andi a0, t0, -128
4734 ; RV32ZVE32F-NEXT: bnez a0, .LBB47_8
4735 ; RV32ZVE32F-NEXT: j .LBB47_9
4737 ; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i16_v8i64:
4738 ; RV64ZVE32F: # %bb.0:
4739 ; RV64ZVE32F-NEXT: ld a2, 56(a0)
4740 ; RV64ZVE32F-NEXT: ld a3, 48(a0)
4741 ; RV64ZVE32F-NEXT: ld a6, 40(a0)
4742 ; RV64ZVE32F-NEXT: ld a7, 32(a0)
4743 ; RV64ZVE32F-NEXT: ld t0, 24(a0)
4744 ; RV64ZVE32F-NEXT: ld t1, 16(a0)
4745 ; RV64ZVE32F-NEXT: ld t2, 8(a0)
4746 ; RV64ZVE32F-NEXT: lui a4, 16
4747 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
4748 ; RV64ZVE32F-NEXT: vmv.x.s a5, v0
4749 ; RV64ZVE32F-NEXT: andi t3, a5, 1
4750 ; RV64ZVE32F-NEXT: addiw a4, a4, -1
4751 ; RV64ZVE32F-NEXT: beqz t3, .LBB47_2
4752 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
4753 ; RV64ZVE32F-NEXT: ld a0, 0(a0)
4754 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
4755 ; RV64ZVE32F-NEXT: vmv.x.s t3, v8
4756 ; RV64ZVE32F-NEXT: and t3, t3, a4
4757 ; RV64ZVE32F-NEXT: slli t3, t3, 3
4758 ; RV64ZVE32F-NEXT: add t3, a1, t3
4759 ; RV64ZVE32F-NEXT: sd a0, 0(t3)
4760 ; RV64ZVE32F-NEXT: .LBB47_2: # %else
4761 ; RV64ZVE32F-NEXT: andi a0, a5, 2
4762 ; RV64ZVE32F-NEXT: beqz a0, .LBB47_4
4763 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
4764 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
4765 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
4766 ; RV64ZVE32F-NEXT: vmv.x.s a0, v9
4767 ; RV64ZVE32F-NEXT: and a0, a0, a4
4768 ; RV64ZVE32F-NEXT: slli a0, a0, 3
4769 ; RV64ZVE32F-NEXT: add a0, a1, a0
4770 ; RV64ZVE32F-NEXT: sd t2, 0(a0)
4771 ; RV64ZVE32F-NEXT: .LBB47_4: # %else2
4772 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
4773 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
4774 ; RV64ZVE32F-NEXT: andi a0, a5, 4
4775 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
4776 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
4777 ; RV64ZVE32F-NEXT: bnez a0, .LBB47_12
4778 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
4779 ; RV64ZVE32F-NEXT: andi a0, a5, 8
4780 ; RV64ZVE32F-NEXT: bnez a0, .LBB47_13
4781 ; RV64ZVE32F-NEXT: .LBB47_6: # %else6
4782 ; RV64ZVE32F-NEXT: andi a0, a5, 16
4783 ; RV64ZVE32F-NEXT: bnez a0, .LBB47_14
4784 ; RV64ZVE32F-NEXT: .LBB47_7: # %else8
4785 ; RV64ZVE32F-NEXT: andi a0, a5, 32
4786 ; RV64ZVE32F-NEXT: beqz a0, .LBB47_9
4787 ; RV64ZVE32F-NEXT: .LBB47_8: # %cond.store9
4788 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
4789 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
4790 ; RV64ZVE32F-NEXT: and a0, a0, a4
4791 ; RV64ZVE32F-NEXT: slli a0, a0, 3
4792 ; RV64ZVE32F-NEXT: add a0, a1, a0
4793 ; RV64ZVE32F-NEXT: sd a6, 0(a0)
4794 ; RV64ZVE32F-NEXT: .LBB47_9: # %else10
4795 ; RV64ZVE32F-NEXT: andi a0, a5, 64
4796 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
4797 ; RV64ZVE32F-NEXT: bnez a0, .LBB47_15
4798 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
4799 ; RV64ZVE32F-NEXT: andi a0, a5, -128
4800 ; RV64ZVE32F-NEXT: bnez a0, .LBB47_16
4801 ; RV64ZVE32F-NEXT: .LBB47_11: # %else14
4802 ; RV64ZVE32F-NEXT: ret
4803 ; RV64ZVE32F-NEXT: .LBB47_12: # %cond.store3
4804 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
4805 ; RV64ZVE32F-NEXT: and a0, a0, a4
4806 ; RV64ZVE32F-NEXT: slli a0, a0, 3
4807 ; RV64ZVE32F-NEXT: add a0, a1, a0
4808 ; RV64ZVE32F-NEXT: sd t1, 0(a0)
4809 ; RV64ZVE32F-NEXT: andi a0, a5, 8
4810 ; RV64ZVE32F-NEXT: beqz a0, .LBB47_6
4811 ; RV64ZVE32F-NEXT: .LBB47_13: # %cond.store5
4812 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
4813 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
4814 ; RV64ZVE32F-NEXT: and a0, a0, a4
4815 ; RV64ZVE32F-NEXT: slli a0, a0, 3
4816 ; RV64ZVE32F-NEXT: add a0, a1, a0
4817 ; RV64ZVE32F-NEXT: sd t0, 0(a0)
4818 ; RV64ZVE32F-NEXT: andi a0, a5, 16
4819 ; RV64ZVE32F-NEXT: beqz a0, .LBB47_7
4820 ; RV64ZVE32F-NEXT: .LBB47_14: # %cond.store7
4821 ; RV64ZVE32F-NEXT: vmv.x.s a0, v9
4822 ; RV64ZVE32F-NEXT: and a0, a0, a4
4823 ; RV64ZVE32F-NEXT: slli a0, a0, 3
4824 ; RV64ZVE32F-NEXT: add a0, a1, a0
4825 ; RV64ZVE32F-NEXT: sd a7, 0(a0)
4826 ; RV64ZVE32F-NEXT: andi a0, a5, 32
4827 ; RV64ZVE32F-NEXT: bnez a0, .LBB47_8
4828 ; RV64ZVE32F-NEXT: j .LBB47_9
4829 ; RV64ZVE32F-NEXT: .LBB47_15: # %cond.store11
4830 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
4831 ; RV64ZVE32F-NEXT: and a0, a0, a4
4832 ; RV64ZVE32F-NEXT: slli a0, a0, 3
4833 ; RV64ZVE32F-NEXT: add a0, a1, a0
4834 ; RV64ZVE32F-NEXT: sd a3, 0(a0)
4835 ; RV64ZVE32F-NEXT: andi a0, a5, -128
4836 ; RV64ZVE32F-NEXT: beqz a0, .LBB47_11
4837 ; RV64ZVE32F-NEXT: .LBB47_16: # %cond.store13
4838 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
4839 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
4840 ; RV64ZVE32F-NEXT: and a0, a0, a4
4841 ; RV64ZVE32F-NEXT: slli a0, a0, 3
4842 ; RV64ZVE32F-NEXT: add a0, a1, a0
4843 ; RV64ZVE32F-NEXT: sd a2, 0(a0)
4844 ; RV64ZVE32F-NEXT: ret
4845 %eidxs = zext <8 x i16> %idxs to <8 x i64>
4846 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs
4847 call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m)
4851 define void @mscatter_baseidx_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i32> %idxs, <8 x i1> %m) {
4852 ; RV32V-LABEL: mscatter_baseidx_v8i32_v8i64:
4854 ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
4855 ; RV32V-NEXT: vsll.vi v12, v12, 3
4856 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
4857 ; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t
4860 ; RV64-LABEL: mscatter_baseidx_v8i32_v8i64:
4862 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
4863 ; RV64-NEXT: vsext.vf2 v16, v12
4864 ; RV64-NEXT: vsll.vi v12, v16, 3
4865 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
4868 ; RV32ZVE32F-LABEL: mscatter_baseidx_v8i32_v8i64:
4869 ; RV32ZVE32F: # %bb.0:
4870 ; RV32ZVE32F-NEXT: addi sp, sp, -16
4871 ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
4872 ; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
4873 ; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
4874 ; RV32ZVE32F-NEXT: sw s2, 4(sp) # 4-byte Folded Spill
4875 ; RV32ZVE32F-NEXT: .cfi_offset s0, -4
4876 ; RV32ZVE32F-NEXT: .cfi_offset s1, -8
4877 ; RV32ZVE32F-NEXT: .cfi_offset s2, -12
4878 ; RV32ZVE32F-NEXT: lw a2, 60(a0)
4879 ; RV32ZVE32F-NEXT: lw a3, 56(a0)
4880 ; RV32ZVE32F-NEXT: lw a4, 52(a0)
4881 ; RV32ZVE32F-NEXT: lw a5, 48(a0)
4882 ; RV32ZVE32F-NEXT: lw a6, 44(a0)
4883 ; RV32ZVE32F-NEXT: lw t0, 40(a0)
4884 ; RV32ZVE32F-NEXT: lw t1, 36(a0)
4885 ; RV32ZVE32F-NEXT: lw t2, 32(a0)
4886 ; RV32ZVE32F-NEXT: lw t3, 28(a0)
4887 ; RV32ZVE32F-NEXT: lw t4, 24(a0)
4888 ; RV32ZVE32F-NEXT: lw t5, 20(a0)
4889 ; RV32ZVE32F-NEXT: lw t6, 16(a0)
4890 ; RV32ZVE32F-NEXT: lw s1, 12(a0)
4891 ; RV32ZVE32F-NEXT: lw s0, 8(a0)
4892 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
4893 ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3
4894 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
4895 ; RV32ZVE32F-NEXT: vmv.x.s a7, v0
4896 ; RV32ZVE32F-NEXT: andi s2, a7, 1
4897 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
4898 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
4899 ; RV32ZVE32F-NEXT: bnez s2, .LBB48_10
4900 ; RV32ZVE32F-NEXT: # %bb.1: # %else
4901 ; RV32ZVE32F-NEXT: andi a0, a7, 2
4902 ; RV32ZVE32F-NEXT: bnez a0, .LBB48_11
4903 ; RV32ZVE32F-NEXT: .LBB48_2: # %else2
4904 ; RV32ZVE32F-NEXT: andi a0, a7, 4
4905 ; RV32ZVE32F-NEXT: bnez a0, .LBB48_12
4906 ; RV32ZVE32F-NEXT: .LBB48_3: # %else4
4907 ; RV32ZVE32F-NEXT: andi a0, a7, 8
4908 ; RV32ZVE32F-NEXT: bnez a0, .LBB48_13
4909 ; RV32ZVE32F-NEXT: .LBB48_4: # %else6
4910 ; RV32ZVE32F-NEXT: andi a0, a7, 16
4911 ; RV32ZVE32F-NEXT: bnez a0, .LBB48_14
4912 ; RV32ZVE32F-NEXT: .LBB48_5: # %else8
4913 ; RV32ZVE32F-NEXT: andi a0, a7, 32
4914 ; RV32ZVE32F-NEXT: bnez a0, .LBB48_15
4915 ; RV32ZVE32F-NEXT: .LBB48_6: # %else10
4916 ; RV32ZVE32F-NEXT: andi a0, a7, 64
4917 ; RV32ZVE32F-NEXT: bnez a0, .LBB48_16
4918 ; RV32ZVE32F-NEXT: .LBB48_7: # %else12
4919 ; RV32ZVE32F-NEXT: andi a0, a7, -128
4920 ; RV32ZVE32F-NEXT: beqz a0, .LBB48_9
4921 ; RV32ZVE32F-NEXT: .LBB48_8: # %cond.store13
4922 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
4923 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
4924 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
4925 ; RV32ZVE32F-NEXT: sw a3, 0(a0)
4926 ; RV32ZVE32F-NEXT: sw a2, 4(a0)
4927 ; RV32ZVE32F-NEXT: .LBB48_9: # %else14
4928 ; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
4929 ; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
4930 ; RV32ZVE32F-NEXT: lw s2, 4(sp) # 4-byte Folded Reload
4931 ; RV32ZVE32F-NEXT: addi sp, sp, 16
4932 ; RV32ZVE32F-NEXT: ret
4933 ; RV32ZVE32F-NEXT: .LBB48_10: # %cond.store
4934 ; RV32ZVE32F-NEXT: lw a1, 4(a0)
4935 ; RV32ZVE32F-NEXT: lw a0, 0(a0)
4936 ; RV32ZVE32F-NEXT: vmv.x.s s2, v8
4937 ; RV32ZVE32F-NEXT: sw a1, 4(s2)
4938 ; RV32ZVE32F-NEXT: sw a0, 0(s2)
4939 ; RV32ZVE32F-NEXT: andi a0, a7, 2
4940 ; RV32ZVE32F-NEXT: beqz a0, .LBB48_2
4941 ; RV32ZVE32F-NEXT: .LBB48_11: # %cond.store1
4942 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
4943 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
4944 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
4945 ; RV32ZVE32F-NEXT: sw s1, 4(a0)
4946 ; RV32ZVE32F-NEXT: sw s0, 0(a0)
4947 ; RV32ZVE32F-NEXT: andi a0, a7, 4
4948 ; RV32ZVE32F-NEXT: beqz a0, .LBB48_3
4949 ; RV32ZVE32F-NEXT: .LBB48_12: # %cond.store3
4950 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
4951 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
4952 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
4953 ; RV32ZVE32F-NEXT: sw t6, 0(a0)
4954 ; RV32ZVE32F-NEXT: sw t5, 4(a0)
4955 ; RV32ZVE32F-NEXT: andi a0, a7, 8
4956 ; RV32ZVE32F-NEXT: beqz a0, .LBB48_4
4957 ; RV32ZVE32F-NEXT: .LBB48_13: # %cond.store5
4958 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
4959 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
4960 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
4961 ; RV32ZVE32F-NEXT: sw t4, 0(a0)
4962 ; RV32ZVE32F-NEXT: sw t3, 4(a0)
4963 ; RV32ZVE32F-NEXT: andi a0, a7, 16
4964 ; RV32ZVE32F-NEXT: beqz a0, .LBB48_5
4965 ; RV32ZVE32F-NEXT: .LBB48_14: # %cond.store7
4966 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
4967 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
4968 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
4969 ; RV32ZVE32F-NEXT: sw t2, 0(a0)
4970 ; RV32ZVE32F-NEXT: sw t1, 4(a0)
4971 ; RV32ZVE32F-NEXT: andi a0, a7, 32
4972 ; RV32ZVE32F-NEXT: beqz a0, .LBB48_6
4973 ; RV32ZVE32F-NEXT: .LBB48_15: # %cond.store9
4974 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
4975 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
4976 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
4977 ; RV32ZVE32F-NEXT: sw t0, 0(a0)
4978 ; RV32ZVE32F-NEXT: sw a6, 4(a0)
4979 ; RV32ZVE32F-NEXT: andi a0, a7, 64
4980 ; RV32ZVE32F-NEXT: beqz a0, .LBB48_7
4981 ; RV32ZVE32F-NEXT: .LBB48_16: # %cond.store11
4982 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
4983 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
4984 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
4985 ; RV32ZVE32F-NEXT: sw a5, 0(a0)
4986 ; RV32ZVE32F-NEXT: sw a4, 4(a0)
4987 ; RV32ZVE32F-NEXT: andi a0, a7, -128
4988 ; RV32ZVE32F-NEXT: bnez a0, .LBB48_8
4989 ; RV32ZVE32F-NEXT: j .LBB48_9
4991 ; RV64ZVE32F-LABEL: mscatter_baseidx_v8i32_v8i64:
4992 ; RV64ZVE32F: # %bb.0:
4993 ; RV64ZVE32F-NEXT: ld a2, 56(a0)
4994 ; RV64ZVE32F-NEXT: ld a3, 48(a0)
4995 ; RV64ZVE32F-NEXT: ld a5, 40(a0)
4996 ; RV64ZVE32F-NEXT: ld a6, 32(a0)
4997 ; RV64ZVE32F-NEXT: ld a7, 24(a0)
4998 ; RV64ZVE32F-NEXT: ld t0, 16(a0)
4999 ; RV64ZVE32F-NEXT: ld t1, 8(a0)
5000 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
5001 ; RV64ZVE32F-NEXT: vmv.x.s a4, v0
5002 ; RV64ZVE32F-NEXT: andi t2, a4, 1
5003 ; RV64ZVE32F-NEXT: beqz t2, .LBB48_2
5004 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
5005 ; RV64ZVE32F-NEXT: ld a0, 0(a0)
5006 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
5007 ; RV64ZVE32F-NEXT: vmv.x.s t2, v8
5008 ; RV64ZVE32F-NEXT: slli t2, t2, 3
5009 ; RV64ZVE32F-NEXT: add t2, a1, t2
5010 ; RV64ZVE32F-NEXT: sd a0, 0(t2)
5011 ; RV64ZVE32F-NEXT: .LBB48_2: # %else
5012 ; RV64ZVE32F-NEXT: andi a0, a4, 2
5013 ; RV64ZVE32F-NEXT: beqz a0, .LBB48_4
5014 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
5015 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
5016 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
5017 ; RV64ZVE32F-NEXT: vmv.x.s a0, v10
5018 ; RV64ZVE32F-NEXT: slli a0, a0, 3
5019 ; RV64ZVE32F-NEXT: add a0, a1, a0
5020 ; RV64ZVE32F-NEXT: sd t1, 0(a0)
5021 ; RV64ZVE32F-NEXT: .LBB48_4: # %else2
5022 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
5023 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
5024 ; RV64ZVE32F-NEXT: andi a0, a4, 4
5025 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
5026 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
5027 ; RV64ZVE32F-NEXT: bnez a0, .LBB48_12
5028 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
5029 ; RV64ZVE32F-NEXT: andi a0, a4, 8
5030 ; RV64ZVE32F-NEXT: bnez a0, .LBB48_13
5031 ; RV64ZVE32F-NEXT: .LBB48_6: # %else6
5032 ; RV64ZVE32F-NEXT: andi a0, a4, 16
5033 ; RV64ZVE32F-NEXT: bnez a0, .LBB48_14
5034 ; RV64ZVE32F-NEXT: .LBB48_7: # %else8
5035 ; RV64ZVE32F-NEXT: andi a0, a4, 32
5036 ; RV64ZVE32F-NEXT: beqz a0, .LBB48_9
5037 ; RV64ZVE32F-NEXT: .LBB48_8: # %cond.store9
5038 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
5039 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
5040 ; RV64ZVE32F-NEXT: slli a0, a0, 3
5041 ; RV64ZVE32F-NEXT: add a0, a1, a0
5042 ; RV64ZVE32F-NEXT: sd a5, 0(a0)
5043 ; RV64ZVE32F-NEXT: .LBB48_9: # %else10
5044 ; RV64ZVE32F-NEXT: andi a0, a4, 64
5045 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
5046 ; RV64ZVE32F-NEXT: bnez a0, .LBB48_15
5047 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
5048 ; RV64ZVE32F-NEXT: andi a0, a4, -128
5049 ; RV64ZVE32F-NEXT: bnez a0, .LBB48_16
5050 ; RV64ZVE32F-NEXT: .LBB48_11: # %else14
5051 ; RV64ZVE32F-NEXT: ret
5052 ; RV64ZVE32F-NEXT: .LBB48_12: # %cond.store3
5053 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
5054 ; RV64ZVE32F-NEXT: slli a0, a0, 3
5055 ; RV64ZVE32F-NEXT: add a0, a1, a0
5056 ; RV64ZVE32F-NEXT: sd t0, 0(a0)
5057 ; RV64ZVE32F-NEXT: andi a0, a4, 8
5058 ; RV64ZVE32F-NEXT: beqz a0, .LBB48_6
5059 ; RV64ZVE32F-NEXT: .LBB48_13: # %cond.store5
5060 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
5061 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
5062 ; RV64ZVE32F-NEXT: slli a0, a0, 3
5063 ; RV64ZVE32F-NEXT: add a0, a1, a0
5064 ; RV64ZVE32F-NEXT: sd a7, 0(a0)
5065 ; RV64ZVE32F-NEXT: andi a0, a4, 16
5066 ; RV64ZVE32F-NEXT: beqz a0, .LBB48_7
5067 ; RV64ZVE32F-NEXT: .LBB48_14: # %cond.store7
5068 ; RV64ZVE32F-NEXT: vmv.x.s a0, v10
5069 ; RV64ZVE32F-NEXT: slli a0, a0, 3
5070 ; RV64ZVE32F-NEXT: add a0, a1, a0
5071 ; RV64ZVE32F-NEXT: sd a6, 0(a0)
5072 ; RV64ZVE32F-NEXT: andi a0, a4, 32
5073 ; RV64ZVE32F-NEXT: bnez a0, .LBB48_8
5074 ; RV64ZVE32F-NEXT: j .LBB48_9
5075 ; RV64ZVE32F-NEXT: .LBB48_15: # %cond.store11
5076 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
5077 ; RV64ZVE32F-NEXT: slli a0, a0, 3
5078 ; RV64ZVE32F-NEXT: add a0, a1, a0
5079 ; RV64ZVE32F-NEXT: sd a3, 0(a0)
5080 ; RV64ZVE32F-NEXT: andi a0, a4, -128
5081 ; RV64ZVE32F-NEXT: beqz a0, .LBB48_11
5082 ; RV64ZVE32F-NEXT: .LBB48_16: # %cond.store13
5083 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
5084 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
5085 ; RV64ZVE32F-NEXT: slli a0, a0, 3
5086 ; RV64ZVE32F-NEXT: add a0, a1, a0
5087 ; RV64ZVE32F-NEXT: sd a2, 0(a0)
5088 ; RV64ZVE32F-NEXT: ret
5089 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i32> %idxs
5090 call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m)
5094 define void @mscatter_baseidx_sext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i32> %idxs, <8 x i1> %m) {
5095 ; RV32V-LABEL: mscatter_baseidx_sext_v8i32_v8i64:
5097 ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
5098 ; RV32V-NEXT: vsll.vi v12, v12, 3
5099 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
5100 ; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t
5103 ; RV64-LABEL: mscatter_baseidx_sext_v8i32_v8i64:
5105 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
5106 ; RV64-NEXT: vsext.vf2 v16, v12
5107 ; RV64-NEXT: vsll.vi v12, v16, 3
5108 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
5111 ; RV32ZVE32F-LABEL: mscatter_baseidx_sext_v8i32_v8i64:
5112 ; RV32ZVE32F: # %bb.0:
5113 ; RV32ZVE32F-NEXT: addi sp, sp, -16
5114 ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
5115 ; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
5116 ; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
5117 ; RV32ZVE32F-NEXT: sw s2, 4(sp) # 4-byte Folded Spill
5118 ; RV32ZVE32F-NEXT: .cfi_offset s0, -4
5119 ; RV32ZVE32F-NEXT: .cfi_offset s1, -8
5120 ; RV32ZVE32F-NEXT: .cfi_offset s2, -12
5121 ; RV32ZVE32F-NEXT: lw a2, 60(a0)
5122 ; RV32ZVE32F-NEXT: lw a3, 56(a0)
5123 ; RV32ZVE32F-NEXT: lw a4, 52(a0)
5124 ; RV32ZVE32F-NEXT: lw a5, 48(a0)
5125 ; RV32ZVE32F-NEXT: lw a6, 44(a0)
5126 ; RV32ZVE32F-NEXT: lw t0, 40(a0)
5127 ; RV32ZVE32F-NEXT: lw t1, 36(a0)
5128 ; RV32ZVE32F-NEXT: lw t2, 32(a0)
5129 ; RV32ZVE32F-NEXT: lw t3, 28(a0)
5130 ; RV32ZVE32F-NEXT: lw t4, 24(a0)
5131 ; RV32ZVE32F-NEXT: lw t5, 20(a0)
5132 ; RV32ZVE32F-NEXT: lw t6, 16(a0)
5133 ; RV32ZVE32F-NEXT: lw s1, 12(a0)
5134 ; RV32ZVE32F-NEXT: lw s0, 8(a0)
5135 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
5136 ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3
5137 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
5138 ; RV32ZVE32F-NEXT: vmv.x.s a7, v0
5139 ; RV32ZVE32F-NEXT: andi s2, a7, 1
5140 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
5141 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
5142 ; RV32ZVE32F-NEXT: bnez s2, .LBB49_10
5143 ; RV32ZVE32F-NEXT: # %bb.1: # %else
5144 ; RV32ZVE32F-NEXT: andi a0, a7, 2
5145 ; RV32ZVE32F-NEXT: bnez a0, .LBB49_11
5146 ; RV32ZVE32F-NEXT: .LBB49_2: # %else2
5147 ; RV32ZVE32F-NEXT: andi a0, a7, 4
5148 ; RV32ZVE32F-NEXT: bnez a0, .LBB49_12
5149 ; RV32ZVE32F-NEXT: .LBB49_3: # %else4
5150 ; RV32ZVE32F-NEXT: andi a0, a7, 8
5151 ; RV32ZVE32F-NEXT: bnez a0, .LBB49_13
5152 ; RV32ZVE32F-NEXT: .LBB49_4: # %else6
5153 ; RV32ZVE32F-NEXT: andi a0, a7, 16
5154 ; RV32ZVE32F-NEXT: bnez a0, .LBB49_14
5155 ; RV32ZVE32F-NEXT: .LBB49_5: # %else8
5156 ; RV32ZVE32F-NEXT: andi a0, a7, 32
5157 ; RV32ZVE32F-NEXT: bnez a0, .LBB49_15
5158 ; RV32ZVE32F-NEXT: .LBB49_6: # %else10
5159 ; RV32ZVE32F-NEXT: andi a0, a7, 64
5160 ; RV32ZVE32F-NEXT: bnez a0, .LBB49_16
5161 ; RV32ZVE32F-NEXT: .LBB49_7: # %else12
5162 ; RV32ZVE32F-NEXT: andi a0, a7, -128
5163 ; RV32ZVE32F-NEXT: beqz a0, .LBB49_9
5164 ; RV32ZVE32F-NEXT: .LBB49_8: # %cond.store13
5165 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
5166 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
5167 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
5168 ; RV32ZVE32F-NEXT: sw a3, 0(a0)
5169 ; RV32ZVE32F-NEXT: sw a2, 4(a0)
5170 ; RV32ZVE32F-NEXT: .LBB49_9: # %else14
5171 ; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
5172 ; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
5173 ; RV32ZVE32F-NEXT: lw s2, 4(sp) # 4-byte Folded Reload
5174 ; RV32ZVE32F-NEXT: addi sp, sp, 16
5175 ; RV32ZVE32F-NEXT: ret
5176 ; RV32ZVE32F-NEXT: .LBB49_10: # %cond.store
5177 ; RV32ZVE32F-NEXT: lw a1, 4(a0)
5178 ; RV32ZVE32F-NEXT: lw a0, 0(a0)
5179 ; RV32ZVE32F-NEXT: vmv.x.s s2, v8
5180 ; RV32ZVE32F-NEXT: sw a1, 4(s2)
5181 ; RV32ZVE32F-NEXT: sw a0, 0(s2)
5182 ; RV32ZVE32F-NEXT: andi a0, a7, 2
5183 ; RV32ZVE32F-NEXT: beqz a0, .LBB49_2
5184 ; RV32ZVE32F-NEXT: .LBB49_11: # %cond.store1
5185 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
5186 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
5187 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
5188 ; RV32ZVE32F-NEXT: sw s1, 4(a0)
5189 ; RV32ZVE32F-NEXT: sw s0, 0(a0)
5190 ; RV32ZVE32F-NEXT: andi a0, a7, 4
5191 ; RV32ZVE32F-NEXT: beqz a0, .LBB49_3
5192 ; RV32ZVE32F-NEXT: .LBB49_12: # %cond.store3
5193 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
5194 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
5195 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
5196 ; RV32ZVE32F-NEXT: sw t6, 0(a0)
5197 ; RV32ZVE32F-NEXT: sw t5, 4(a0)
5198 ; RV32ZVE32F-NEXT: andi a0, a7, 8
5199 ; RV32ZVE32F-NEXT: beqz a0, .LBB49_4
5200 ; RV32ZVE32F-NEXT: .LBB49_13: # %cond.store5
5201 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
5202 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
5203 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
5204 ; RV32ZVE32F-NEXT: sw t4, 0(a0)
5205 ; RV32ZVE32F-NEXT: sw t3, 4(a0)
5206 ; RV32ZVE32F-NEXT: andi a0, a7, 16
5207 ; RV32ZVE32F-NEXT: beqz a0, .LBB49_5
5208 ; RV32ZVE32F-NEXT: .LBB49_14: # %cond.store7
5209 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
5210 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
5211 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
5212 ; RV32ZVE32F-NEXT: sw t2, 0(a0)
5213 ; RV32ZVE32F-NEXT: sw t1, 4(a0)
5214 ; RV32ZVE32F-NEXT: andi a0, a7, 32
5215 ; RV32ZVE32F-NEXT: beqz a0, .LBB49_6
5216 ; RV32ZVE32F-NEXT: .LBB49_15: # %cond.store9
5217 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
5218 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
5219 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
5220 ; RV32ZVE32F-NEXT: sw t0, 0(a0)
5221 ; RV32ZVE32F-NEXT: sw a6, 4(a0)
5222 ; RV32ZVE32F-NEXT: andi a0, a7, 64
5223 ; RV32ZVE32F-NEXT: beqz a0, .LBB49_7
5224 ; RV32ZVE32F-NEXT: .LBB49_16: # %cond.store11
5225 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
5226 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
5227 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
5228 ; RV32ZVE32F-NEXT: sw a5, 0(a0)
5229 ; RV32ZVE32F-NEXT: sw a4, 4(a0)
5230 ; RV32ZVE32F-NEXT: andi a0, a7, -128
5231 ; RV32ZVE32F-NEXT: bnez a0, .LBB49_8
5232 ; RV32ZVE32F-NEXT: j .LBB49_9
5234 ; RV64ZVE32F-LABEL: mscatter_baseidx_sext_v8i32_v8i64:
5235 ; RV64ZVE32F: # %bb.0:
5236 ; RV64ZVE32F-NEXT: ld a2, 56(a0)
5237 ; RV64ZVE32F-NEXT: ld a3, 48(a0)
5238 ; RV64ZVE32F-NEXT: ld a5, 40(a0)
5239 ; RV64ZVE32F-NEXT: ld a6, 32(a0)
5240 ; RV64ZVE32F-NEXT: ld a7, 24(a0)
5241 ; RV64ZVE32F-NEXT: ld t0, 16(a0)
5242 ; RV64ZVE32F-NEXT: ld t1, 8(a0)
5243 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
5244 ; RV64ZVE32F-NEXT: vmv.x.s a4, v0
5245 ; RV64ZVE32F-NEXT: andi t2, a4, 1
5246 ; RV64ZVE32F-NEXT: beqz t2, .LBB49_2
5247 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
5248 ; RV64ZVE32F-NEXT: ld a0, 0(a0)
5249 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
5250 ; RV64ZVE32F-NEXT: vmv.x.s t2, v8
5251 ; RV64ZVE32F-NEXT: slli t2, t2, 3
5252 ; RV64ZVE32F-NEXT: add t2, a1, t2
5253 ; RV64ZVE32F-NEXT: sd a0, 0(t2)
5254 ; RV64ZVE32F-NEXT: .LBB49_2: # %else
5255 ; RV64ZVE32F-NEXT: andi a0, a4, 2
5256 ; RV64ZVE32F-NEXT: beqz a0, .LBB49_4
5257 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
5258 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
5259 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
5260 ; RV64ZVE32F-NEXT: vmv.x.s a0, v10
5261 ; RV64ZVE32F-NEXT: slli a0, a0, 3
5262 ; RV64ZVE32F-NEXT: add a0, a1, a0
5263 ; RV64ZVE32F-NEXT: sd t1, 0(a0)
5264 ; RV64ZVE32F-NEXT: .LBB49_4: # %else2
5265 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
5266 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
5267 ; RV64ZVE32F-NEXT: andi a0, a4, 4
5268 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
5269 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
5270 ; RV64ZVE32F-NEXT: bnez a0, .LBB49_12
5271 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
5272 ; RV64ZVE32F-NEXT: andi a0, a4, 8
5273 ; RV64ZVE32F-NEXT: bnez a0, .LBB49_13
5274 ; RV64ZVE32F-NEXT: .LBB49_6: # %else6
5275 ; RV64ZVE32F-NEXT: andi a0, a4, 16
5276 ; RV64ZVE32F-NEXT: bnez a0, .LBB49_14
5277 ; RV64ZVE32F-NEXT: .LBB49_7: # %else8
5278 ; RV64ZVE32F-NEXT: andi a0, a4, 32
5279 ; RV64ZVE32F-NEXT: beqz a0, .LBB49_9
5280 ; RV64ZVE32F-NEXT: .LBB49_8: # %cond.store9
5281 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
5282 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
5283 ; RV64ZVE32F-NEXT: slli a0, a0, 3
5284 ; RV64ZVE32F-NEXT: add a0, a1, a0
5285 ; RV64ZVE32F-NEXT: sd a5, 0(a0)
5286 ; RV64ZVE32F-NEXT: .LBB49_9: # %else10
5287 ; RV64ZVE32F-NEXT: andi a0, a4, 64
5288 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
5289 ; RV64ZVE32F-NEXT: bnez a0, .LBB49_15
5290 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
5291 ; RV64ZVE32F-NEXT: andi a0, a4, -128
5292 ; RV64ZVE32F-NEXT: bnez a0, .LBB49_16
5293 ; RV64ZVE32F-NEXT: .LBB49_11: # %else14
5294 ; RV64ZVE32F-NEXT: ret
5295 ; RV64ZVE32F-NEXT: .LBB49_12: # %cond.store3
5296 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
5297 ; RV64ZVE32F-NEXT: slli a0, a0, 3
5298 ; RV64ZVE32F-NEXT: add a0, a1, a0
5299 ; RV64ZVE32F-NEXT: sd t0, 0(a0)
5300 ; RV64ZVE32F-NEXT: andi a0, a4, 8
5301 ; RV64ZVE32F-NEXT: beqz a0, .LBB49_6
5302 ; RV64ZVE32F-NEXT: .LBB49_13: # %cond.store5
5303 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
5304 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
5305 ; RV64ZVE32F-NEXT: slli a0, a0, 3
5306 ; RV64ZVE32F-NEXT: add a0, a1, a0
5307 ; RV64ZVE32F-NEXT: sd a7, 0(a0)
5308 ; RV64ZVE32F-NEXT: andi a0, a4, 16
5309 ; RV64ZVE32F-NEXT: beqz a0, .LBB49_7
5310 ; RV64ZVE32F-NEXT: .LBB49_14: # %cond.store7
5311 ; RV64ZVE32F-NEXT: vmv.x.s a0, v10
5312 ; RV64ZVE32F-NEXT: slli a0, a0, 3
5313 ; RV64ZVE32F-NEXT: add a0, a1, a0
5314 ; RV64ZVE32F-NEXT: sd a6, 0(a0)
5315 ; RV64ZVE32F-NEXT: andi a0, a4, 32
5316 ; RV64ZVE32F-NEXT: bnez a0, .LBB49_8
5317 ; RV64ZVE32F-NEXT: j .LBB49_9
5318 ; RV64ZVE32F-NEXT: .LBB49_15: # %cond.store11
5319 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
5320 ; RV64ZVE32F-NEXT: slli a0, a0, 3
5321 ; RV64ZVE32F-NEXT: add a0, a1, a0
5322 ; RV64ZVE32F-NEXT: sd a3, 0(a0)
5323 ; RV64ZVE32F-NEXT: andi a0, a4, -128
5324 ; RV64ZVE32F-NEXT: beqz a0, .LBB49_11
5325 ; RV64ZVE32F-NEXT: .LBB49_16: # %cond.store13
5326 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
5327 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
5328 ; RV64ZVE32F-NEXT: slli a0, a0, 3
5329 ; RV64ZVE32F-NEXT: add a0, a1, a0
5330 ; RV64ZVE32F-NEXT: sd a2, 0(a0)
5331 ; RV64ZVE32F-NEXT: ret
5332 %eidxs = sext <8 x i32> %idxs to <8 x i64>
5333 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs
5334 call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m)
5338 define void @mscatter_baseidx_zext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i32> %idxs, <8 x i1> %m) {
5339 ; RV32V-LABEL: mscatter_baseidx_zext_v8i32_v8i64:
5341 ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
5342 ; RV32V-NEXT: vsll.vi v12, v12, 3
5343 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
5344 ; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t
5347 ; RV64-LABEL: mscatter_baseidx_zext_v8i32_v8i64:
5349 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
5350 ; RV64-NEXT: vzext.vf2 v16, v12
5351 ; RV64-NEXT: vsll.vi v12, v16, 3
5352 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
5355 ; RV32ZVE32F-LABEL: mscatter_baseidx_zext_v8i32_v8i64:
5356 ; RV32ZVE32F: # %bb.0:
5357 ; RV32ZVE32F-NEXT: addi sp, sp, -16
5358 ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
5359 ; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
5360 ; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
5361 ; RV32ZVE32F-NEXT: sw s2, 4(sp) # 4-byte Folded Spill
5362 ; RV32ZVE32F-NEXT: .cfi_offset s0, -4
5363 ; RV32ZVE32F-NEXT: .cfi_offset s1, -8
5364 ; RV32ZVE32F-NEXT: .cfi_offset s2, -12
5365 ; RV32ZVE32F-NEXT: lw a2, 60(a0)
5366 ; RV32ZVE32F-NEXT: lw a3, 56(a0)
5367 ; RV32ZVE32F-NEXT: lw a4, 52(a0)
5368 ; RV32ZVE32F-NEXT: lw a5, 48(a0)
5369 ; RV32ZVE32F-NEXT: lw a6, 44(a0)
5370 ; RV32ZVE32F-NEXT: lw t0, 40(a0)
5371 ; RV32ZVE32F-NEXT: lw t1, 36(a0)
5372 ; RV32ZVE32F-NEXT: lw t2, 32(a0)
5373 ; RV32ZVE32F-NEXT: lw t3, 28(a0)
5374 ; RV32ZVE32F-NEXT: lw t4, 24(a0)
5375 ; RV32ZVE32F-NEXT: lw t5, 20(a0)
5376 ; RV32ZVE32F-NEXT: lw t6, 16(a0)
5377 ; RV32ZVE32F-NEXT: lw s1, 12(a0)
5378 ; RV32ZVE32F-NEXT: lw s0, 8(a0)
5379 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
5380 ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3
5381 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
5382 ; RV32ZVE32F-NEXT: vmv.x.s a7, v0
5383 ; RV32ZVE32F-NEXT: andi s2, a7, 1
5384 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
5385 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
5386 ; RV32ZVE32F-NEXT: bnez s2, .LBB50_10
5387 ; RV32ZVE32F-NEXT: # %bb.1: # %else
5388 ; RV32ZVE32F-NEXT: andi a0, a7, 2
5389 ; RV32ZVE32F-NEXT: bnez a0, .LBB50_11
5390 ; RV32ZVE32F-NEXT: .LBB50_2: # %else2
5391 ; RV32ZVE32F-NEXT: andi a0, a7, 4
5392 ; RV32ZVE32F-NEXT: bnez a0, .LBB50_12
5393 ; RV32ZVE32F-NEXT: .LBB50_3: # %else4
5394 ; RV32ZVE32F-NEXT: andi a0, a7, 8
5395 ; RV32ZVE32F-NEXT: bnez a0, .LBB50_13
5396 ; RV32ZVE32F-NEXT: .LBB50_4: # %else6
5397 ; RV32ZVE32F-NEXT: andi a0, a7, 16
5398 ; RV32ZVE32F-NEXT: bnez a0, .LBB50_14
5399 ; RV32ZVE32F-NEXT: .LBB50_5: # %else8
5400 ; RV32ZVE32F-NEXT: andi a0, a7, 32
5401 ; RV32ZVE32F-NEXT: bnez a0, .LBB50_15
5402 ; RV32ZVE32F-NEXT: .LBB50_6: # %else10
5403 ; RV32ZVE32F-NEXT: andi a0, a7, 64
5404 ; RV32ZVE32F-NEXT: bnez a0, .LBB50_16
5405 ; RV32ZVE32F-NEXT: .LBB50_7: # %else12
5406 ; RV32ZVE32F-NEXT: andi a0, a7, -128
5407 ; RV32ZVE32F-NEXT: beqz a0, .LBB50_9
5408 ; RV32ZVE32F-NEXT: .LBB50_8: # %cond.store13
5409 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
5410 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
5411 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
5412 ; RV32ZVE32F-NEXT: sw a3, 0(a0)
5413 ; RV32ZVE32F-NEXT: sw a2, 4(a0)
5414 ; RV32ZVE32F-NEXT: .LBB50_9: # %else14
5415 ; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
5416 ; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
5417 ; RV32ZVE32F-NEXT: lw s2, 4(sp) # 4-byte Folded Reload
5418 ; RV32ZVE32F-NEXT: addi sp, sp, 16
5419 ; RV32ZVE32F-NEXT: ret
5420 ; RV32ZVE32F-NEXT: .LBB50_10: # %cond.store
5421 ; RV32ZVE32F-NEXT: lw a1, 4(a0)
5422 ; RV32ZVE32F-NEXT: lw a0, 0(a0)
5423 ; RV32ZVE32F-NEXT: vmv.x.s s2, v8
5424 ; RV32ZVE32F-NEXT: sw a1, 4(s2)
5425 ; RV32ZVE32F-NEXT: sw a0, 0(s2)
5426 ; RV32ZVE32F-NEXT: andi a0, a7, 2
5427 ; RV32ZVE32F-NEXT: beqz a0, .LBB50_2
5428 ; RV32ZVE32F-NEXT: .LBB50_11: # %cond.store1
5429 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
5430 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
5431 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
5432 ; RV32ZVE32F-NEXT: sw s1, 4(a0)
5433 ; RV32ZVE32F-NEXT: sw s0, 0(a0)
5434 ; RV32ZVE32F-NEXT: andi a0, a7, 4
5435 ; RV32ZVE32F-NEXT: beqz a0, .LBB50_3
5436 ; RV32ZVE32F-NEXT: .LBB50_12: # %cond.store3
5437 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
5438 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
5439 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
5440 ; RV32ZVE32F-NEXT: sw t6, 0(a0)
5441 ; RV32ZVE32F-NEXT: sw t5, 4(a0)
5442 ; RV32ZVE32F-NEXT: andi a0, a7, 8
5443 ; RV32ZVE32F-NEXT: beqz a0, .LBB50_4
5444 ; RV32ZVE32F-NEXT: .LBB50_13: # %cond.store5
5445 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
5446 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
5447 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
5448 ; RV32ZVE32F-NEXT: sw t4, 0(a0)
5449 ; RV32ZVE32F-NEXT: sw t3, 4(a0)
5450 ; RV32ZVE32F-NEXT: andi a0, a7, 16
5451 ; RV32ZVE32F-NEXT: beqz a0, .LBB50_5
5452 ; RV32ZVE32F-NEXT: .LBB50_14: # %cond.store7
5453 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
5454 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
5455 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
5456 ; RV32ZVE32F-NEXT: sw t2, 0(a0)
5457 ; RV32ZVE32F-NEXT: sw t1, 4(a0)
5458 ; RV32ZVE32F-NEXT: andi a0, a7, 32
5459 ; RV32ZVE32F-NEXT: beqz a0, .LBB50_6
5460 ; RV32ZVE32F-NEXT: .LBB50_15: # %cond.store9
5461 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
5462 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
5463 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
5464 ; RV32ZVE32F-NEXT: sw t0, 0(a0)
5465 ; RV32ZVE32F-NEXT: sw a6, 4(a0)
5466 ; RV32ZVE32F-NEXT: andi a0, a7, 64
5467 ; RV32ZVE32F-NEXT: beqz a0, .LBB50_7
5468 ; RV32ZVE32F-NEXT: .LBB50_16: # %cond.store11
5469 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
5470 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
5471 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
5472 ; RV32ZVE32F-NEXT: sw a5, 0(a0)
5473 ; RV32ZVE32F-NEXT: sw a4, 4(a0)
5474 ; RV32ZVE32F-NEXT: andi a0, a7, -128
5475 ; RV32ZVE32F-NEXT: bnez a0, .LBB50_8
5476 ; RV32ZVE32F-NEXT: j .LBB50_9
5478 ; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i32_v8i64:
5479 ; RV64ZVE32F: # %bb.0:
5480 ; RV64ZVE32F-NEXT: ld a2, 56(a0)
5481 ; RV64ZVE32F-NEXT: ld a3, 48(a0)
5482 ; RV64ZVE32F-NEXT: ld a5, 40(a0)
5483 ; RV64ZVE32F-NEXT: ld a6, 32(a0)
5484 ; RV64ZVE32F-NEXT: ld a7, 24(a0)
5485 ; RV64ZVE32F-NEXT: ld t0, 16(a0)
5486 ; RV64ZVE32F-NEXT: ld t1, 8(a0)
5487 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
5488 ; RV64ZVE32F-NEXT: vmv.x.s a4, v0
5489 ; RV64ZVE32F-NEXT: andi t2, a4, 1
5490 ; RV64ZVE32F-NEXT: beqz t2, .LBB50_2
5491 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
5492 ; RV64ZVE32F-NEXT: ld a0, 0(a0)
5493 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
5494 ; RV64ZVE32F-NEXT: vmv.x.s t2, v8
5495 ; RV64ZVE32F-NEXT: slli t2, t2, 32
5496 ; RV64ZVE32F-NEXT: srli t2, t2, 29
5497 ; RV64ZVE32F-NEXT: add t2, a1, t2
5498 ; RV64ZVE32F-NEXT: sd a0, 0(t2)
5499 ; RV64ZVE32F-NEXT: .LBB50_2: # %else
5500 ; RV64ZVE32F-NEXT: andi a0, a4, 2
5501 ; RV64ZVE32F-NEXT: beqz a0, .LBB50_4
5502 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
5503 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
5504 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
5505 ; RV64ZVE32F-NEXT: vmv.x.s a0, v10
5506 ; RV64ZVE32F-NEXT: slli a0, a0, 32
5507 ; RV64ZVE32F-NEXT: srli a0, a0, 29
5508 ; RV64ZVE32F-NEXT: add a0, a1, a0
5509 ; RV64ZVE32F-NEXT: sd t1, 0(a0)
5510 ; RV64ZVE32F-NEXT: .LBB50_4: # %else2
5511 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
5512 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
5513 ; RV64ZVE32F-NEXT: andi a0, a4, 4
5514 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
5515 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
5516 ; RV64ZVE32F-NEXT: bnez a0, .LBB50_12
5517 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
5518 ; RV64ZVE32F-NEXT: andi a0, a4, 8
5519 ; RV64ZVE32F-NEXT: bnez a0, .LBB50_13
5520 ; RV64ZVE32F-NEXT: .LBB50_6: # %else6
5521 ; RV64ZVE32F-NEXT: andi a0, a4, 16
5522 ; RV64ZVE32F-NEXT: bnez a0, .LBB50_14
5523 ; RV64ZVE32F-NEXT: .LBB50_7: # %else8
5524 ; RV64ZVE32F-NEXT: andi a0, a4, 32
5525 ; RV64ZVE32F-NEXT: beqz a0, .LBB50_9
5526 ; RV64ZVE32F-NEXT: .LBB50_8: # %cond.store9
5527 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
5528 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
5529 ; RV64ZVE32F-NEXT: slli a0, a0, 32
5530 ; RV64ZVE32F-NEXT: srli a0, a0, 29
5531 ; RV64ZVE32F-NEXT: add a0, a1, a0
5532 ; RV64ZVE32F-NEXT: sd a5, 0(a0)
5533 ; RV64ZVE32F-NEXT: .LBB50_9: # %else10
5534 ; RV64ZVE32F-NEXT: andi a0, a4, 64
5535 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
5536 ; RV64ZVE32F-NEXT: bnez a0, .LBB50_15
5537 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
5538 ; RV64ZVE32F-NEXT: andi a0, a4, -128
5539 ; RV64ZVE32F-NEXT: bnez a0, .LBB50_16
5540 ; RV64ZVE32F-NEXT: .LBB50_11: # %else14
5541 ; RV64ZVE32F-NEXT: ret
5542 ; RV64ZVE32F-NEXT: .LBB50_12: # %cond.store3
5543 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
5544 ; RV64ZVE32F-NEXT: slli a0, a0, 32
5545 ; RV64ZVE32F-NEXT: srli a0, a0, 29
5546 ; RV64ZVE32F-NEXT: add a0, a1, a0
5547 ; RV64ZVE32F-NEXT: sd t0, 0(a0)
5548 ; RV64ZVE32F-NEXT: andi a0, a4, 8
5549 ; RV64ZVE32F-NEXT: beqz a0, .LBB50_6
5550 ; RV64ZVE32F-NEXT: .LBB50_13: # %cond.store5
5551 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
5552 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
5553 ; RV64ZVE32F-NEXT: slli a0, a0, 32
5554 ; RV64ZVE32F-NEXT: srli a0, a0, 29
5555 ; RV64ZVE32F-NEXT: add a0, a1, a0
5556 ; RV64ZVE32F-NEXT: sd a7, 0(a0)
5557 ; RV64ZVE32F-NEXT: andi a0, a4, 16
5558 ; RV64ZVE32F-NEXT: beqz a0, .LBB50_7
5559 ; RV64ZVE32F-NEXT: .LBB50_14: # %cond.store7
5560 ; RV64ZVE32F-NEXT: vmv.x.s a0, v10
5561 ; RV64ZVE32F-NEXT: slli a0, a0, 32
5562 ; RV64ZVE32F-NEXT: srli a0, a0, 29
5563 ; RV64ZVE32F-NEXT: add a0, a1, a0
5564 ; RV64ZVE32F-NEXT: sd a6, 0(a0)
5565 ; RV64ZVE32F-NEXT: andi a0, a4, 32
5566 ; RV64ZVE32F-NEXT: bnez a0, .LBB50_8
5567 ; RV64ZVE32F-NEXT: j .LBB50_9
5568 ; RV64ZVE32F-NEXT: .LBB50_15: # %cond.store11
5569 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
5570 ; RV64ZVE32F-NEXT: slli a0, a0, 32
5571 ; RV64ZVE32F-NEXT: srli a0, a0, 29
5572 ; RV64ZVE32F-NEXT: add a0, a1, a0
5573 ; RV64ZVE32F-NEXT: sd a3, 0(a0)
5574 ; RV64ZVE32F-NEXT: andi a0, a4, -128
5575 ; RV64ZVE32F-NEXT: beqz a0, .LBB50_11
5576 ; RV64ZVE32F-NEXT: .LBB50_16: # %cond.store13
5577 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
5578 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
5579 ; RV64ZVE32F-NEXT: slli a0, a0, 32
5580 ; RV64ZVE32F-NEXT: srli a0, a0, 29
5581 ; RV64ZVE32F-NEXT: add a0, a1, a0
5582 ; RV64ZVE32F-NEXT: sd a2, 0(a0)
5583 ; RV64ZVE32F-NEXT: ret
5584 %eidxs = zext <8 x i32> %idxs to <8 x i64>
5585 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs
5586 call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m)
5590 define void @mscatter_baseidx_v8i64(<8 x i64> %val, ptr %base, <8 x i64> %idxs, <8 x i1> %m) {
5591 ; RV32V-LABEL: mscatter_baseidx_v8i64:
5593 ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
5594 ; RV32V-NEXT: vnsrl.wi v16, v12, 0
5595 ; RV32V-NEXT: vsll.vi v12, v16, 3
5596 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
5597 ; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t
5600 ; RV64-LABEL: mscatter_baseidx_v8i64:
5602 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
5603 ; RV64-NEXT: vsll.vi v12, v12, 3
5604 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
5607 ; RV32ZVE32F-LABEL: mscatter_baseidx_v8i64:
5608 ; RV32ZVE32F: # %bb.0:
5609 ; RV32ZVE32F-NEXT: addi sp, sp, -32
5610 ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 32
5611 ; RV32ZVE32F-NEXT: sw s0, 28(sp) # 4-byte Folded Spill
5612 ; RV32ZVE32F-NEXT: sw s1, 24(sp) # 4-byte Folded Spill
5613 ; RV32ZVE32F-NEXT: sw s2, 20(sp) # 4-byte Folded Spill
5614 ; RV32ZVE32F-NEXT: sw s3, 16(sp) # 4-byte Folded Spill
5615 ; RV32ZVE32F-NEXT: sw s4, 12(sp) # 4-byte Folded Spill
5616 ; RV32ZVE32F-NEXT: sw s5, 8(sp) # 4-byte Folded Spill
5617 ; RV32ZVE32F-NEXT: sw s6, 4(sp) # 4-byte Folded Spill
5618 ; RV32ZVE32F-NEXT: sw s7, 0(sp) # 4-byte Folded Spill
5619 ; RV32ZVE32F-NEXT: .cfi_offset s0, -4
5620 ; RV32ZVE32F-NEXT: .cfi_offset s1, -8
5621 ; RV32ZVE32F-NEXT: .cfi_offset s2, -12
5622 ; RV32ZVE32F-NEXT: .cfi_offset s3, -16
5623 ; RV32ZVE32F-NEXT: .cfi_offset s4, -20
5624 ; RV32ZVE32F-NEXT: .cfi_offset s5, -24
5625 ; RV32ZVE32F-NEXT: .cfi_offset s6, -28
5626 ; RV32ZVE32F-NEXT: .cfi_offset s7, -32
5627 ; RV32ZVE32F-NEXT: lw a3, 60(a0)
5628 ; RV32ZVE32F-NEXT: lw a4, 56(a0)
5629 ; RV32ZVE32F-NEXT: lw a5, 52(a0)
5630 ; RV32ZVE32F-NEXT: lw a6, 48(a0)
5631 ; RV32ZVE32F-NEXT: lw a7, 44(a0)
5632 ; RV32ZVE32F-NEXT: lw t0, 40(a0)
5633 ; RV32ZVE32F-NEXT: lw t1, 36(a0)
5634 ; RV32ZVE32F-NEXT: lw t2, 32(a0)
5635 ; RV32ZVE32F-NEXT: lw t3, 28(a0)
5636 ; RV32ZVE32F-NEXT: lw t4, 24(a0)
5637 ; RV32ZVE32F-NEXT: lw t5, 20(a0)
5638 ; RV32ZVE32F-NEXT: lw t6, 16(a0)
5639 ; RV32ZVE32F-NEXT: lw s1, 12(a0)
5640 ; RV32ZVE32F-NEXT: lw s0, 8(a0)
5641 ; RV32ZVE32F-NEXT: lw s2, 56(a2)
5642 ; RV32ZVE32F-NEXT: lw s3, 48(a2)
5643 ; RV32ZVE32F-NEXT: lw s4, 40(a2)
5644 ; RV32ZVE32F-NEXT: lw s5, 8(a2)
5645 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
5646 ; RV32ZVE32F-NEXT: vlse32.v v8, (a2), zero
5647 ; RV32ZVE32F-NEXT: lw s6, 16(a2)
5648 ; RV32ZVE32F-NEXT: lw s7, 24(a2)
5649 ; RV32ZVE32F-NEXT: lw a2, 32(a2)
5650 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, s5
5651 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, s6
5652 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, s7
5653 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a2
5654 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, s4
5655 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, s3
5656 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, s2
5657 ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3
5658 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
5659 ; RV32ZVE32F-NEXT: vmv.x.s a2, v0
5660 ; RV32ZVE32F-NEXT: andi s2, a2, 1
5661 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
5662 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
5663 ; RV32ZVE32F-NEXT: bnez s2, .LBB51_10
5664 ; RV32ZVE32F-NEXT: # %bb.1: # %else
5665 ; RV32ZVE32F-NEXT: andi a0, a2, 2
5666 ; RV32ZVE32F-NEXT: bnez a0, .LBB51_11
5667 ; RV32ZVE32F-NEXT: .LBB51_2: # %else2
5668 ; RV32ZVE32F-NEXT: andi a0, a2, 4
5669 ; RV32ZVE32F-NEXT: bnez a0, .LBB51_12
5670 ; RV32ZVE32F-NEXT: .LBB51_3: # %else4
5671 ; RV32ZVE32F-NEXT: andi a0, a2, 8
5672 ; RV32ZVE32F-NEXT: bnez a0, .LBB51_13
5673 ; RV32ZVE32F-NEXT: .LBB51_4: # %else6
5674 ; RV32ZVE32F-NEXT: andi a0, a2, 16
5675 ; RV32ZVE32F-NEXT: bnez a0, .LBB51_14
5676 ; RV32ZVE32F-NEXT: .LBB51_5: # %else8
5677 ; RV32ZVE32F-NEXT: andi a0, a2, 32
5678 ; RV32ZVE32F-NEXT: bnez a0, .LBB51_15
5679 ; RV32ZVE32F-NEXT: .LBB51_6: # %else10
5680 ; RV32ZVE32F-NEXT: andi a0, a2, 64
5681 ; RV32ZVE32F-NEXT: bnez a0, .LBB51_16
5682 ; RV32ZVE32F-NEXT: .LBB51_7: # %else12
5683 ; RV32ZVE32F-NEXT: andi a0, a2, -128
5684 ; RV32ZVE32F-NEXT: beqz a0, .LBB51_9
5685 ; RV32ZVE32F-NEXT: .LBB51_8: # %cond.store13
5686 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
5687 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
5688 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
5689 ; RV32ZVE32F-NEXT: sw a4, 0(a0)
5690 ; RV32ZVE32F-NEXT: sw a3, 4(a0)
5691 ; RV32ZVE32F-NEXT: .LBB51_9: # %else14
5692 ; RV32ZVE32F-NEXT: lw s0, 28(sp) # 4-byte Folded Reload
5693 ; RV32ZVE32F-NEXT: lw s1, 24(sp) # 4-byte Folded Reload
5694 ; RV32ZVE32F-NEXT: lw s2, 20(sp) # 4-byte Folded Reload
5695 ; RV32ZVE32F-NEXT: lw s3, 16(sp) # 4-byte Folded Reload
5696 ; RV32ZVE32F-NEXT: lw s4, 12(sp) # 4-byte Folded Reload
5697 ; RV32ZVE32F-NEXT: lw s5, 8(sp) # 4-byte Folded Reload
5698 ; RV32ZVE32F-NEXT: lw s6, 4(sp) # 4-byte Folded Reload
5699 ; RV32ZVE32F-NEXT: lw s7, 0(sp) # 4-byte Folded Reload
5700 ; RV32ZVE32F-NEXT: addi sp, sp, 32
5701 ; RV32ZVE32F-NEXT: ret
5702 ; RV32ZVE32F-NEXT: .LBB51_10: # %cond.store
5703 ; RV32ZVE32F-NEXT: lw a1, 4(a0)
5704 ; RV32ZVE32F-NEXT: lw a0, 0(a0)
5705 ; RV32ZVE32F-NEXT: vmv.x.s s2, v8
5706 ; RV32ZVE32F-NEXT: sw a1, 4(s2)
5707 ; RV32ZVE32F-NEXT: sw a0, 0(s2)
5708 ; RV32ZVE32F-NEXT: andi a0, a2, 2
5709 ; RV32ZVE32F-NEXT: beqz a0, .LBB51_2
5710 ; RV32ZVE32F-NEXT: .LBB51_11: # %cond.store1
5711 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
5712 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
5713 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
5714 ; RV32ZVE32F-NEXT: sw s1, 4(a0)
5715 ; RV32ZVE32F-NEXT: sw s0, 0(a0)
5716 ; RV32ZVE32F-NEXT: andi a0, a2, 4
5717 ; RV32ZVE32F-NEXT: beqz a0, .LBB51_3
5718 ; RV32ZVE32F-NEXT: .LBB51_12: # %cond.store3
5719 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
5720 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
5721 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
5722 ; RV32ZVE32F-NEXT: sw t6, 0(a0)
5723 ; RV32ZVE32F-NEXT: sw t5, 4(a0)
5724 ; RV32ZVE32F-NEXT: andi a0, a2, 8
5725 ; RV32ZVE32F-NEXT: beqz a0, .LBB51_4
5726 ; RV32ZVE32F-NEXT: .LBB51_13: # %cond.store5
5727 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
5728 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
5729 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
5730 ; RV32ZVE32F-NEXT: sw t4, 0(a0)
5731 ; RV32ZVE32F-NEXT: sw t3, 4(a0)
5732 ; RV32ZVE32F-NEXT: andi a0, a2, 16
5733 ; RV32ZVE32F-NEXT: beqz a0, .LBB51_5
5734 ; RV32ZVE32F-NEXT: .LBB51_14: # %cond.store7
5735 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
5736 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
5737 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
5738 ; RV32ZVE32F-NEXT: sw t2, 0(a0)
5739 ; RV32ZVE32F-NEXT: sw t1, 4(a0)
5740 ; RV32ZVE32F-NEXT: andi a0, a2, 32
5741 ; RV32ZVE32F-NEXT: beqz a0, .LBB51_6
5742 ; RV32ZVE32F-NEXT: .LBB51_15: # %cond.store9
5743 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
5744 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
5745 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
5746 ; RV32ZVE32F-NEXT: sw t0, 0(a0)
5747 ; RV32ZVE32F-NEXT: sw a7, 4(a0)
5748 ; RV32ZVE32F-NEXT: andi a0, a2, 64
5749 ; RV32ZVE32F-NEXT: beqz a0, .LBB51_7
5750 ; RV32ZVE32F-NEXT: .LBB51_16: # %cond.store11
5751 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
5752 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
5753 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
5754 ; RV32ZVE32F-NEXT: sw a6, 0(a0)
5755 ; RV32ZVE32F-NEXT: sw a5, 4(a0)
5756 ; RV32ZVE32F-NEXT: andi a0, a2, -128
5757 ; RV32ZVE32F-NEXT: bnez a0, .LBB51_8
5758 ; RV32ZVE32F-NEXT: j .LBB51_9
5760 ; RV64ZVE32F-LABEL: mscatter_baseidx_v8i64:
5761 ; RV64ZVE32F: # %bb.0:
5762 ; RV64ZVE32F-NEXT: addi sp, sp, -32
5763 ; RV64ZVE32F-NEXT: .cfi_def_cfa_offset 32
5764 ; RV64ZVE32F-NEXT: sd s0, 24(sp) # 8-byte Folded Spill
5765 ; RV64ZVE32F-NEXT: sd s1, 16(sp) # 8-byte Folded Spill
5766 ; RV64ZVE32F-NEXT: sd s2, 8(sp) # 8-byte Folded Spill
5767 ; RV64ZVE32F-NEXT: sd s3, 0(sp) # 8-byte Folded Spill
5768 ; RV64ZVE32F-NEXT: .cfi_offset s0, -8
5769 ; RV64ZVE32F-NEXT: .cfi_offset s1, -16
5770 ; RV64ZVE32F-NEXT: .cfi_offset s2, -24
5771 ; RV64ZVE32F-NEXT: .cfi_offset s3, -32
5772 ; RV64ZVE32F-NEXT: ld a3, 56(a0)
5773 ; RV64ZVE32F-NEXT: ld a4, 48(a0)
5774 ; RV64ZVE32F-NEXT: ld a6, 40(a0)
5775 ; RV64ZVE32F-NEXT: ld t1, 32(a0)
5776 ; RV64ZVE32F-NEXT: ld t3, 24(a0)
5777 ; RV64ZVE32F-NEXT: ld t6, 16(a0)
5778 ; RV64ZVE32F-NEXT: ld s1, 8(a0)
5779 ; RV64ZVE32F-NEXT: ld s2, 8(a2)
5780 ; RV64ZVE32F-NEXT: ld s0, 16(a2)
5781 ; RV64ZVE32F-NEXT: ld t5, 24(a2)
5782 ; RV64ZVE32F-NEXT: ld t4, 32(a2)
5783 ; RV64ZVE32F-NEXT: ld t2, 40(a2)
5784 ; RV64ZVE32F-NEXT: ld t0, 48(a2)
5785 ; RV64ZVE32F-NEXT: ld a5, 56(a2)
5786 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
5787 ; RV64ZVE32F-NEXT: vmv.x.s a7, v0
5788 ; RV64ZVE32F-NEXT: andi s3, a7, 1
5789 ; RV64ZVE32F-NEXT: bnez s3, .LBB51_10
5790 ; RV64ZVE32F-NEXT: # %bb.1: # %else
5791 ; RV64ZVE32F-NEXT: andi a0, a7, 2
5792 ; RV64ZVE32F-NEXT: bnez a0, .LBB51_11
5793 ; RV64ZVE32F-NEXT: .LBB51_2: # %else2
5794 ; RV64ZVE32F-NEXT: andi a0, a7, 4
5795 ; RV64ZVE32F-NEXT: bnez a0, .LBB51_12
5796 ; RV64ZVE32F-NEXT: .LBB51_3: # %else4
5797 ; RV64ZVE32F-NEXT: andi a0, a7, 8
5798 ; RV64ZVE32F-NEXT: bnez a0, .LBB51_13
5799 ; RV64ZVE32F-NEXT: .LBB51_4: # %else6
5800 ; RV64ZVE32F-NEXT: andi a0, a7, 16
5801 ; RV64ZVE32F-NEXT: bnez a0, .LBB51_14
5802 ; RV64ZVE32F-NEXT: .LBB51_5: # %else8
5803 ; RV64ZVE32F-NEXT: andi a0, a7, 32
5804 ; RV64ZVE32F-NEXT: bnez a0, .LBB51_15
5805 ; RV64ZVE32F-NEXT: .LBB51_6: # %else10
5806 ; RV64ZVE32F-NEXT: andi a0, a7, 64
5807 ; RV64ZVE32F-NEXT: bnez a0, .LBB51_16
5808 ; RV64ZVE32F-NEXT: .LBB51_7: # %else12
5809 ; RV64ZVE32F-NEXT: andi a0, a7, -128
5810 ; RV64ZVE32F-NEXT: beqz a0, .LBB51_9
5811 ; RV64ZVE32F-NEXT: .LBB51_8: # %cond.store13
5812 ; RV64ZVE32F-NEXT: slli a5, a5, 3
5813 ; RV64ZVE32F-NEXT: add a1, a1, a5
5814 ; RV64ZVE32F-NEXT: sd a3, 0(a1)
5815 ; RV64ZVE32F-NEXT: .LBB51_9: # %else14
5816 ; RV64ZVE32F-NEXT: ld s0, 24(sp) # 8-byte Folded Reload
5817 ; RV64ZVE32F-NEXT: ld s1, 16(sp) # 8-byte Folded Reload
5818 ; RV64ZVE32F-NEXT: ld s2, 8(sp) # 8-byte Folded Reload
5819 ; RV64ZVE32F-NEXT: ld s3, 0(sp) # 8-byte Folded Reload
5820 ; RV64ZVE32F-NEXT: addi sp, sp, 32
5821 ; RV64ZVE32F-NEXT: ret
5822 ; RV64ZVE32F-NEXT: .LBB51_10: # %cond.store
5823 ; RV64ZVE32F-NEXT: ld a2, 0(a2)
5824 ; RV64ZVE32F-NEXT: ld a0, 0(a0)
5825 ; RV64ZVE32F-NEXT: slli a2, a2, 3
5826 ; RV64ZVE32F-NEXT: add a2, a1, a2
5827 ; RV64ZVE32F-NEXT: sd a0, 0(a2)
5828 ; RV64ZVE32F-NEXT: andi a0, a7, 2
5829 ; RV64ZVE32F-NEXT: beqz a0, .LBB51_2
5830 ; RV64ZVE32F-NEXT: .LBB51_11: # %cond.store1
5831 ; RV64ZVE32F-NEXT: slli s2, s2, 3
5832 ; RV64ZVE32F-NEXT: add s2, a1, s2
5833 ; RV64ZVE32F-NEXT: sd s1, 0(s2)
5834 ; RV64ZVE32F-NEXT: andi a0, a7, 4
5835 ; RV64ZVE32F-NEXT: beqz a0, .LBB51_3
5836 ; RV64ZVE32F-NEXT: .LBB51_12: # %cond.store3
5837 ; RV64ZVE32F-NEXT: slli s0, s0, 3
5838 ; RV64ZVE32F-NEXT: add s0, a1, s0
5839 ; RV64ZVE32F-NEXT: sd t6, 0(s0)
5840 ; RV64ZVE32F-NEXT: andi a0, a7, 8
5841 ; RV64ZVE32F-NEXT: beqz a0, .LBB51_4
5842 ; RV64ZVE32F-NEXT: .LBB51_13: # %cond.store5
5843 ; RV64ZVE32F-NEXT: slli t5, t5, 3
5844 ; RV64ZVE32F-NEXT: add t5, a1, t5
5845 ; RV64ZVE32F-NEXT: sd t3, 0(t5)
5846 ; RV64ZVE32F-NEXT: andi a0, a7, 16
5847 ; RV64ZVE32F-NEXT: beqz a0, .LBB51_5
5848 ; RV64ZVE32F-NEXT: .LBB51_14: # %cond.store7
5849 ; RV64ZVE32F-NEXT: slli t4, t4, 3
5850 ; RV64ZVE32F-NEXT: add t4, a1, t4
5851 ; RV64ZVE32F-NEXT: sd t1, 0(t4)
5852 ; RV64ZVE32F-NEXT: andi a0, a7, 32
5853 ; RV64ZVE32F-NEXT: beqz a0, .LBB51_6
5854 ; RV64ZVE32F-NEXT: .LBB51_15: # %cond.store9
5855 ; RV64ZVE32F-NEXT: slli t2, t2, 3
5856 ; RV64ZVE32F-NEXT: add t2, a1, t2
5857 ; RV64ZVE32F-NEXT: sd a6, 0(t2)
5858 ; RV64ZVE32F-NEXT: andi a0, a7, 64
5859 ; RV64ZVE32F-NEXT: beqz a0, .LBB51_7
5860 ; RV64ZVE32F-NEXT: .LBB51_16: # %cond.store11
5861 ; RV64ZVE32F-NEXT: slli t0, t0, 3
5862 ; RV64ZVE32F-NEXT: add t0, a1, t0
5863 ; RV64ZVE32F-NEXT: sd a4, 0(t0)
5864 ; RV64ZVE32F-NEXT: andi a0, a7, -128
5865 ; RV64ZVE32F-NEXT: bnez a0, .LBB51_8
5866 ; RV64ZVE32F-NEXT: j .LBB51_9
5867 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %idxs
5868 call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m)
5872 declare void @llvm.masked.scatter.v1f16.v1p0(<1 x half>, <1 x ptr>, i32, <1 x i1>)
5874 define void @mscatter_v1f16(<1 x half> %val, <1 x ptr> %ptrs, <1 x i1> %m) {
5875 ; RV32V-LABEL: mscatter_v1f16:
5877 ; RV32V-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
5878 ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t
5881 ; RV64-LABEL: mscatter_v1f16:
5883 ; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
5884 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
5887 ; RV32ZVE32F-LABEL: mscatter_v1f16:
5888 ; RV32ZVE32F: # %bb.0:
5889 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
5890 ; RV32ZVE32F-NEXT: vsoxei32.v v8, (zero), v9, v0.t
5891 ; RV32ZVE32F-NEXT: ret
5893 ; RV64ZVE32F-LABEL: mscatter_v1f16:
5894 ; RV64ZVE32F: # %bb.0:
5895 ; RV64ZVE32F-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
5896 ; RV64ZVE32F-NEXT: vfirst.m a1, v0
5897 ; RV64ZVE32F-NEXT: bnez a1, .LBB52_2
5898 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
5899 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
5900 ; RV64ZVE32F-NEXT: vse16.v v8, (a0)
5901 ; RV64ZVE32F-NEXT: .LBB52_2: # %else
5902 ; RV64ZVE32F-NEXT: ret
5903 call void @llvm.masked.scatter.v1f16.v1p0(<1 x half> %val, <1 x ptr> %ptrs, i32 2, <1 x i1> %m)
5907 declare void @llvm.masked.scatter.v2f16.v2p0(<2 x half>, <2 x ptr>, i32, <2 x i1>)
5909 define void @mscatter_v2f16(<2 x half> %val, <2 x ptr> %ptrs, <2 x i1> %m) {
5910 ; RV32V-LABEL: mscatter_v2f16:
5912 ; RV32V-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
5913 ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t
5916 ; RV64-LABEL: mscatter_v2f16:
5918 ; RV64-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
5919 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
5922 ; RV32ZVE32F-LABEL: mscatter_v2f16:
5923 ; RV32ZVE32F: # %bb.0:
5924 ; RV32ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
5925 ; RV32ZVE32F-NEXT: vsoxei32.v v8, (zero), v9, v0.t
5926 ; RV32ZVE32F-NEXT: ret
5928 ; RV64ZVE32F-LABEL: mscatter_v2f16:
5929 ; RV64ZVE32F: # %bb.0:
5930 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
5931 ; RV64ZVE32F-NEXT: vmv.x.s a2, v0
5932 ; RV64ZVE32F-NEXT: andi a3, a2, 1
5933 ; RV64ZVE32F-NEXT: bnez a3, .LBB53_3
5934 ; RV64ZVE32F-NEXT: # %bb.1: # %else
5935 ; RV64ZVE32F-NEXT: andi a2, a2, 2
5936 ; RV64ZVE32F-NEXT: bnez a2, .LBB53_4
5937 ; RV64ZVE32F-NEXT: .LBB53_2: # %else2
5938 ; RV64ZVE32F-NEXT: ret
5939 ; RV64ZVE32F-NEXT: .LBB53_3: # %cond.store
5940 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
5941 ; RV64ZVE32F-NEXT: vse16.v v8, (a0)
5942 ; RV64ZVE32F-NEXT: andi a2, a2, 2
5943 ; RV64ZVE32F-NEXT: beqz a2, .LBB53_2
5944 ; RV64ZVE32F-NEXT: .LBB53_4: # %cond.store1
5945 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
5946 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
5947 ; RV64ZVE32F-NEXT: vse16.v v8, (a1)
5948 ; RV64ZVE32F-NEXT: ret
5949 call void @llvm.masked.scatter.v2f16.v2p0(<2 x half> %val, <2 x ptr> %ptrs, i32 2, <2 x i1> %m)
5953 declare void @llvm.masked.scatter.v4f16.v4p0(<4 x half>, <4 x ptr>, i32, <4 x i1>)
5955 define void @mscatter_v4f16(<4 x half> %val, <4 x ptr> %ptrs, <4 x i1> %m) {
5956 ; RV32-LABEL: mscatter_v4f16:
5958 ; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
5959 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
5962 ; RV64-LABEL: mscatter_v4f16:
5964 ; RV64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
5965 ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
5968 ; RV64ZVE32F-LABEL: mscatter_v4f16:
5969 ; RV64ZVE32F: # %bb.0:
5970 ; RV64ZVE32F-NEXT: ld a1, 24(a0)
5971 ; RV64ZVE32F-NEXT: ld a2, 16(a0)
5972 ; RV64ZVE32F-NEXT: ld a4, 8(a0)
5973 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
5974 ; RV64ZVE32F-NEXT: vmv.x.s a3, v0
5975 ; RV64ZVE32F-NEXT: andi a5, a3, 1
5976 ; RV64ZVE32F-NEXT: bnez a5, .LBB54_5
5977 ; RV64ZVE32F-NEXT: # %bb.1: # %else
5978 ; RV64ZVE32F-NEXT: andi a0, a3, 2
5979 ; RV64ZVE32F-NEXT: bnez a0, .LBB54_6
5980 ; RV64ZVE32F-NEXT: .LBB54_2: # %else2
5981 ; RV64ZVE32F-NEXT: andi a0, a3, 4
5982 ; RV64ZVE32F-NEXT: bnez a0, .LBB54_7
5983 ; RV64ZVE32F-NEXT: .LBB54_3: # %else4
5984 ; RV64ZVE32F-NEXT: andi a3, a3, 8
5985 ; RV64ZVE32F-NEXT: bnez a3, .LBB54_8
5986 ; RV64ZVE32F-NEXT: .LBB54_4: # %else6
5987 ; RV64ZVE32F-NEXT: ret
5988 ; RV64ZVE32F-NEXT: .LBB54_5: # %cond.store
5989 ; RV64ZVE32F-NEXT: ld a0, 0(a0)
5990 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
5991 ; RV64ZVE32F-NEXT: vse16.v v8, (a0)
5992 ; RV64ZVE32F-NEXT: andi a0, a3, 2
5993 ; RV64ZVE32F-NEXT: beqz a0, .LBB54_2
5994 ; RV64ZVE32F-NEXT: .LBB54_6: # %cond.store1
5995 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
5996 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
5997 ; RV64ZVE32F-NEXT: vse16.v v9, (a4)
5998 ; RV64ZVE32F-NEXT: andi a0, a3, 4
5999 ; RV64ZVE32F-NEXT: beqz a0, .LBB54_3
6000 ; RV64ZVE32F-NEXT: .LBB54_7: # %cond.store3
6001 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
6002 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
6003 ; RV64ZVE32F-NEXT: vse16.v v9, (a2)
6004 ; RV64ZVE32F-NEXT: andi a3, a3, 8
6005 ; RV64ZVE32F-NEXT: beqz a3, .LBB54_4
6006 ; RV64ZVE32F-NEXT: .LBB54_8: # %cond.store5
6007 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
6008 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 3
6009 ; RV64ZVE32F-NEXT: vse16.v v8, (a1)
6010 ; RV64ZVE32F-NEXT: ret
6011 call void @llvm.masked.scatter.v4f16.v4p0(<4 x half> %val, <4 x ptr> %ptrs, i32 2, <4 x i1> %m)
6015 define void @mscatter_truemask_v4f16(<4 x half> %val, <4 x ptr> %ptrs) {
6016 ; RV32-LABEL: mscatter_truemask_v4f16:
6018 ; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
6019 ; RV32-NEXT: vsoxei32.v v8, (zero), v9
6022 ; RV64-LABEL: mscatter_truemask_v4f16:
6024 ; RV64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
6025 ; RV64-NEXT: vsoxei64.v v8, (zero), v10
6028 ; RV64ZVE32F-LABEL: mscatter_truemask_v4f16:
6029 ; RV64ZVE32F: # %bb.0:
6030 ; RV64ZVE32F-NEXT: ld a1, 0(a0)
6031 ; RV64ZVE32F-NEXT: ld a2, 24(a0)
6032 ; RV64ZVE32F-NEXT: ld a3, 8(a0)
6033 ; RV64ZVE32F-NEXT: ld a0, 16(a0)
6034 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
6035 ; RV64ZVE32F-NEXT: vse16.v v8, (a1)
6036 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
6037 ; RV64ZVE32F-NEXT: vse16.v v9, (a3)
6038 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
6039 ; RV64ZVE32F-NEXT: vse16.v v9, (a0)
6040 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 3
6041 ; RV64ZVE32F-NEXT: vse16.v v8, (a2)
6042 ; RV64ZVE32F-NEXT: ret
6043 call void @llvm.masked.scatter.v4f16.v4p0(<4 x half> %val, <4 x ptr> %ptrs, i32 2, <4 x i1> splat (i1 1))
6047 define void @mscatter_falsemask_v4f16(<4 x half> %val, <4 x ptr> %ptrs) {
6048 ; CHECK-LABEL: mscatter_falsemask_v4f16:
6051 call void @llvm.masked.scatter.v4f16.v4p0(<4 x half> %val, <4 x ptr> %ptrs, i32 2, <4 x i1> zeroinitializer)
6055 declare void @llvm.masked.scatter.v8f16.v8p0(<8 x half>, <8 x ptr>, i32, <8 x i1>)
6057 define void @mscatter_v8f16(<8 x half> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
6058 ; RV32-LABEL: mscatter_v8f16:
6060 ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
6061 ; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t
6064 ; RV64-LABEL: mscatter_v8f16:
6066 ; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
6067 ; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t
6070 ; RV64ZVE32F-LABEL: mscatter_v8f16:
6071 ; RV64ZVE32F: # %bb.0:
6072 ; RV64ZVE32F-NEXT: ld a1, 56(a0)
6073 ; RV64ZVE32F-NEXT: ld a2, 48(a0)
6074 ; RV64ZVE32F-NEXT: ld a4, 40(a0)
6075 ; RV64ZVE32F-NEXT: ld a5, 32(a0)
6076 ; RV64ZVE32F-NEXT: ld a6, 24(a0)
6077 ; RV64ZVE32F-NEXT: ld a7, 16(a0)
6078 ; RV64ZVE32F-NEXT: ld t0, 8(a0)
6079 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
6080 ; RV64ZVE32F-NEXT: vmv.x.s a3, v0
6081 ; RV64ZVE32F-NEXT: andi t1, a3, 1
6082 ; RV64ZVE32F-NEXT: bnez t1, .LBB57_9
6083 ; RV64ZVE32F-NEXT: # %bb.1: # %else
6084 ; RV64ZVE32F-NEXT: andi a0, a3, 2
6085 ; RV64ZVE32F-NEXT: bnez a0, .LBB57_10
6086 ; RV64ZVE32F-NEXT: .LBB57_2: # %else2
6087 ; RV64ZVE32F-NEXT: andi a0, a3, 4
6088 ; RV64ZVE32F-NEXT: bnez a0, .LBB57_11
6089 ; RV64ZVE32F-NEXT: .LBB57_3: # %else4
6090 ; RV64ZVE32F-NEXT: andi a0, a3, 8
6091 ; RV64ZVE32F-NEXT: bnez a0, .LBB57_12
6092 ; RV64ZVE32F-NEXT: .LBB57_4: # %else6
6093 ; RV64ZVE32F-NEXT: andi a0, a3, 16
6094 ; RV64ZVE32F-NEXT: bnez a0, .LBB57_13
6095 ; RV64ZVE32F-NEXT: .LBB57_5: # %else8
6096 ; RV64ZVE32F-NEXT: andi a0, a3, 32
6097 ; RV64ZVE32F-NEXT: bnez a0, .LBB57_14
6098 ; RV64ZVE32F-NEXT: .LBB57_6: # %else10
6099 ; RV64ZVE32F-NEXT: andi a0, a3, 64
6100 ; RV64ZVE32F-NEXT: bnez a0, .LBB57_15
6101 ; RV64ZVE32F-NEXT: .LBB57_7: # %else12
6102 ; RV64ZVE32F-NEXT: andi a0, a3, -128
6103 ; RV64ZVE32F-NEXT: bnez a0, .LBB57_16
6104 ; RV64ZVE32F-NEXT: .LBB57_8: # %else14
6105 ; RV64ZVE32F-NEXT: ret
6106 ; RV64ZVE32F-NEXT: .LBB57_9: # %cond.store
6107 ; RV64ZVE32F-NEXT: ld a0, 0(a0)
6108 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6109 ; RV64ZVE32F-NEXT: vse16.v v8, (a0)
6110 ; RV64ZVE32F-NEXT: andi a0, a3, 2
6111 ; RV64ZVE32F-NEXT: beqz a0, .LBB57_2
6112 ; RV64ZVE32F-NEXT: .LBB57_10: # %cond.store1
6113 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6114 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
6115 ; RV64ZVE32F-NEXT: vse16.v v9, (t0)
6116 ; RV64ZVE32F-NEXT: andi a0, a3, 4
6117 ; RV64ZVE32F-NEXT: beqz a0, .LBB57_3
6118 ; RV64ZVE32F-NEXT: .LBB57_11: # %cond.store3
6119 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6120 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
6121 ; RV64ZVE32F-NEXT: vse16.v v9, (a7)
6122 ; RV64ZVE32F-NEXT: andi a0, a3, 8
6123 ; RV64ZVE32F-NEXT: beqz a0, .LBB57_4
6124 ; RV64ZVE32F-NEXT: .LBB57_12: # %cond.store5
6125 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6126 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 3
6127 ; RV64ZVE32F-NEXT: vse16.v v9, (a6)
6128 ; RV64ZVE32F-NEXT: andi a0, a3, 16
6129 ; RV64ZVE32F-NEXT: beqz a0, .LBB57_5
6130 ; RV64ZVE32F-NEXT: .LBB57_13: # %cond.store7
6131 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6132 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
6133 ; RV64ZVE32F-NEXT: vse16.v v9, (a5)
6134 ; RV64ZVE32F-NEXT: andi a0, a3, 32
6135 ; RV64ZVE32F-NEXT: beqz a0, .LBB57_6
6136 ; RV64ZVE32F-NEXT: .LBB57_14: # %cond.store9
6137 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6138 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5
6139 ; RV64ZVE32F-NEXT: vse16.v v9, (a4)
6140 ; RV64ZVE32F-NEXT: andi a0, a3, 64
6141 ; RV64ZVE32F-NEXT: beqz a0, .LBB57_7
6142 ; RV64ZVE32F-NEXT: .LBB57_15: # %cond.store11
6143 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6144 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 6
6145 ; RV64ZVE32F-NEXT: vse16.v v9, (a2)
6146 ; RV64ZVE32F-NEXT: andi a0, a3, -128
6147 ; RV64ZVE32F-NEXT: beqz a0, .LBB57_8
6148 ; RV64ZVE32F-NEXT: .LBB57_16: # %cond.store13
6149 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6150 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
6151 ; RV64ZVE32F-NEXT: vse16.v v8, (a1)
6152 ; RV64ZVE32F-NEXT: ret
6153 call void @llvm.masked.scatter.v8f16.v8p0(<8 x half> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> %m)
6157 define void @mscatter_baseidx_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) {
6158 ; RV32-LABEL: mscatter_baseidx_v8i8_v8f16:
6160 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
6161 ; RV32-NEXT: vsext.vf4 v10, v9
6162 ; RV32-NEXT: vadd.vv v10, v10, v10
6163 ; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma
6164 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
6167 ; RV64-LABEL: mscatter_baseidx_v8i8_v8f16:
6169 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
6170 ; RV64-NEXT: vsext.vf8 v12, v9
6171 ; RV64-NEXT: vadd.vv v12, v12, v12
6172 ; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, ma
6173 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
6176 ; RV64ZVE32F-LABEL: mscatter_baseidx_v8i8_v8f16:
6177 ; RV64ZVE32F: # %bb.0:
6178 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
6179 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
6180 ; RV64ZVE32F-NEXT: andi a2, a1, 1
6181 ; RV64ZVE32F-NEXT: beqz a2, .LBB58_2
6182 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
6183 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
6184 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6185 ; RV64ZVE32F-NEXT: add a2, a0, a2
6186 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6187 ; RV64ZVE32F-NEXT: vse16.v v8, (a2)
6188 ; RV64ZVE32F-NEXT: .LBB58_2: # %else
6189 ; RV64ZVE32F-NEXT: andi a2, a1, 2
6190 ; RV64ZVE32F-NEXT: beqz a2, .LBB58_4
6191 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
6192 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
6193 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1
6194 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
6195 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6196 ; RV64ZVE32F-NEXT: add a2, a0, a2
6197 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6198 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
6199 ; RV64ZVE32F-NEXT: vse16.v v10, (a2)
6200 ; RV64ZVE32F-NEXT: .LBB58_4: # %else2
6201 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
6202 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4
6203 ; RV64ZVE32F-NEXT: andi a2, a1, 4
6204 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
6205 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2
6206 ; RV64ZVE32F-NEXT: bnez a2, .LBB58_12
6207 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
6208 ; RV64ZVE32F-NEXT: andi a2, a1, 8
6209 ; RV64ZVE32F-NEXT: bnez a2, .LBB58_13
6210 ; RV64ZVE32F-NEXT: .LBB58_6: # %else6
6211 ; RV64ZVE32F-NEXT: andi a2, a1, 16
6212 ; RV64ZVE32F-NEXT: bnez a2, .LBB58_14
6213 ; RV64ZVE32F-NEXT: .LBB58_7: # %else8
6214 ; RV64ZVE32F-NEXT: andi a2, a1, 32
6215 ; RV64ZVE32F-NEXT: beqz a2, .LBB58_9
6216 ; RV64ZVE32F-NEXT: .LBB58_8: # %cond.store9
6217 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
6218 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 1
6219 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
6220 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6221 ; RV64ZVE32F-NEXT: add a2, a0, a2
6222 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6223 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5
6224 ; RV64ZVE32F-NEXT: vse16.v v9, (a2)
6225 ; RV64ZVE32F-NEXT: .LBB58_9: # %else10
6226 ; RV64ZVE32F-NEXT: andi a2, a1, 64
6227 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
6228 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 2
6229 ; RV64ZVE32F-NEXT: bnez a2, .LBB58_15
6230 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
6231 ; RV64ZVE32F-NEXT: andi a1, a1, -128
6232 ; RV64ZVE32F-NEXT: bnez a1, .LBB58_16
6233 ; RV64ZVE32F-NEXT: .LBB58_11: # %else14
6234 ; RV64ZVE32F-NEXT: ret
6235 ; RV64ZVE32F-NEXT: .LBB58_12: # %cond.store3
6236 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
6237 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6238 ; RV64ZVE32F-NEXT: add a2, a0, a2
6239 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6240 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2
6241 ; RV64ZVE32F-NEXT: vse16.v v11, (a2)
6242 ; RV64ZVE32F-NEXT: andi a2, a1, 8
6243 ; RV64ZVE32F-NEXT: beqz a2, .LBB58_6
6244 ; RV64ZVE32F-NEXT: .LBB58_13: # %cond.store5
6245 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
6246 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
6247 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
6248 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6249 ; RV64ZVE32F-NEXT: add a2, a0, a2
6250 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6251 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 3
6252 ; RV64ZVE32F-NEXT: vse16.v v9, (a2)
6253 ; RV64ZVE32F-NEXT: andi a2, a1, 16
6254 ; RV64ZVE32F-NEXT: beqz a2, .LBB58_7
6255 ; RV64ZVE32F-NEXT: .LBB58_14: # %cond.store7
6256 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
6257 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
6258 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6259 ; RV64ZVE32F-NEXT: add a2, a0, a2
6260 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6261 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
6262 ; RV64ZVE32F-NEXT: vse16.v v9, (a2)
6263 ; RV64ZVE32F-NEXT: andi a2, a1, 32
6264 ; RV64ZVE32F-NEXT: bnez a2, .LBB58_8
6265 ; RV64ZVE32F-NEXT: j .LBB58_9
6266 ; RV64ZVE32F-NEXT: .LBB58_15: # %cond.store11
6267 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
6268 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6269 ; RV64ZVE32F-NEXT: add a2, a0, a2
6270 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6271 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 6
6272 ; RV64ZVE32F-NEXT: vse16.v v10, (a2)
6273 ; RV64ZVE32F-NEXT: andi a1, a1, -128
6274 ; RV64ZVE32F-NEXT: beqz a1, .LBB58_11
6275 ; RV64ZVE32F-NEXT: .LBB58_16: # %cond.store13
6276 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
6277 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
6278 ; RV64ZVE32F-NEXT: vmv.x.s a1, v9
6279 ; RV64ZVE32F-NEXT: slli a1, a1, 1
6280 ; RV64ZVE32F-NEXT: add a0, a0, a1
6281 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6282 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
6283 ; RV64ZVE32F-NEXT: vse16.v v8, (a0)
6284 ; RV64ZVE32F-NEXT: ret
6285 %ptrs = getelementptr inbounds half, ptr %base, <8 x i8> %idxs
6286 call void @llvm.masked.scatter.v8f16.v8p0(<8 x half> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> %m)
6290 define void @mscatter_baseidx_sext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) {
6291 ; RV32-LABEL: mscatter_baseidx_sext_v8i8_v8f16:
6293 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
6294 ; RV32-NEXT: vsext.vf4 v10, v9
6295 ; RV32-NEXT: vadd.vv v10, v10, v10
6296 ; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma
6297 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
6300 ; RV64-LABEL: mscatter_baseidx_sext_v8i8_v8f16:
6302 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
6303 ; RV64-NEXT: vsext.vf8 v12, v9
6304 ; RV64-NEXT: vadd.vv v12, v12, v12
6305 ; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, ma
6306 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
6309 ; RV64ZVE32F-LABEL: mscatter_baseidx_sext_v8i8_v8f16:
6310 ; RV64ZVE32F: # %bb.0:
6311 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
6312 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
6313 ; RV64ZVE32F-NEXT: andi a2, a1, 1
6314 ; RV64ZVE32F-NEXT: beqz a2, .LBB59_2
6315 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
6316 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
6317 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6318 ; RV64ZVE32F-NEXT: add a2, a0, a2
6319 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6320 ; RV64ZVE32F-NEXT: vse16.v v8, (a2)
6321 ; RV64ZVE32F-NEXT: .LBB59_2: # %else
6322 ; RV64ZVE32F-NEXT: andi a2, a1, 2
6323 ; RV64ZVE32F-NEXT: beqz a2, .LBB59_4
6324 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
6325 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
6326 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1
6327 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
6328 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6329 ; RV64ZVE32F-NEXT: add a2, a0, a2
6330 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6331 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
6332 ; RV64ZVE32F-NEXT: vse16.v v10, (a2)
6333 ; RV64ZVE32F-NEXT: .LBB59_4: # %else2
6334 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
6335 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4
6336 ; RV64ZVE32F-NEXT: andi a2, a1, 4
6337 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
6338 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2
6339 ; RV64ZVE32F-NEXT: bnez a2, .LBB59_12
6340 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
6341 ; RV64ZVE32F-NEXT: andi a2, a1, 8
6342 ; RV64ZVE32F-NEXT: bnez a2, .LBB59_13
6343 ; RV64ZVE32F-NEXT: .LBB59_6: # %else6
6344 ; RV64ZVE32F-NEXT: andi a2, a1, 16
6345 ; RV64ZVE32F-NEXT: bnez a2, .LBB59_14
6346 ; RV64ZVE32F-NEXT: .LBB59_7: # %else8
6347 ; RV64ZVE32F-NEXT: andi a2, a1, 32
6348 ; RV64ZVE32F-NEXT: beqz a2, .LBB59_9
6349 ; RV64ZVE32F-NEXT: .LBB59_8: # %cond.store9
6350 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
6351 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 1
6352 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
6353 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6354 ; RV64ZVE32F-NEXT: add a2, a0, a2
6355 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6356 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5
6357 ; RV64ZVE32F-NEXT: vse16.v v9, (a2)
6358 ; RV64ZVE32F-NEXT: .LBB59_9: # %else10
6359 ; RV64ZVE32F-NEXT: andi a2, a1, 64
6360 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
6361 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 2
6362 ; RV64ZVE32F-NEXT: bnez a2, .LBB59_15
6363 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
6364 ; RV64ZVE32F-NEXT: andi a1, a1, -128
6365 ; RV64ZVE32F-NEXT: bnez a1, .LBB59_16
6366 ; RV64ZVE32F-NEXT: .LBB59_11: # %else14
6367 ; RV64ZVE32F-NEXT: ret
6368 ; RV64ZVE32F-NEXT: .LBB59_12: # %cond.store3
6369 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
6370 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6371 ; RV64ZVE32F-NEXT: add a2, a0, a2
6372 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6373 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2
6374 ; RV64ZVE32F-NEXT: vse16.v v11, (a2)
6375 ; RV64ZVE32F-NEXT: andi a2, a1, 8
6376 ; RV64ZVE32F-NEXT: beqz a2, .LBB59_6
6377 ; RV64ZVE32F-NEXT: .LBB59_13: # %cond.store5
6378 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
6379 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
6380 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
6381 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6382 ; RV64ZVE32F-NEXT: add a2, a0, a2
6383 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6384 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 3
6385 ; RV64ZVE32F-NEXT: vse16.v v9, (a2)
6386 ; RV64ZVE32F-NEXT: andi a2, a1, 16
6387 ; RV64ZVE32F-NEXT: beqz a2, .LBB59_7
6388 ; RV64ZVE32F-NEXT: .LBB59_14: # %cond.store7
6389 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
6390 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
6391 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6392 ; RV64ZVE32F-NEXT: add a2, a0, a2
6393 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6394 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
6395 ; RV64ZVE32F-NEXT: vse16.v v9, (a2)
6396 ; RV64ZVE32F-NEXT: andi a2, a1, 32
6397 ; RV64ZVE32F-NEXT: bnez a2, .LBB59_8
6398 ; RV64ZVE32F-NEXT: j .LBB59_9
6399 ; RV64ZVE32F-NEXT: .LBB59_15: # %cond.store11
6400 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
6401 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6402 ; RV64ZVE32F-NEXT: add a2, a0, a2
6403 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6404 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 6
6405 ; RV64ZVE32F-NEXT: vse16.v v10, (a2)
6406 ; RV64ZVE32F-NEXT: andi a1, a1, -128
6407 ; RV64ZVE32F-NEXT: beqz a1, .LBB59_11
6408 ; RV64ZVE32F-NEXT: .LBB59_16: # %cond.store13
6409 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
6410 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
6411 ; RV64ZVE32F-NEXT: vmv.x.s a1, v9
6412 ; RV64ZVE32F-NEXT: slli a1, a1, 1
6413 ; RV64ZVE32F-NEXT: add a0, a0, a1
6414 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6415 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
6416 ; RV64ZVE32F-NEXT: vse16.v v8, (a0)
6417 ; RV64ZVE32F-NEXT: ret
6418 %eidxs = sext <8 x i8> %idxs to <8 x i16>
6419 %ptrs = getelementptr inbounds half, ptr %base, <8 x i16> %eidxs
6420 call void @llvm.masked.scatter.v8f16.v8p0(<8 x half> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> %m)
6424 define void @mscatter_baseidx_zext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) {
6425 ; RV32-LABEL: mscatter_baseidx_zext_v8i8_v8f16:
6427 ; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
6428 ; RV32-NEXT: vwaddu.vv v10, v9, v9
6429 ; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma
6430 ; RV32-NEXT: vsoxei16.v v8, (a0), v10, v0.t
6433 ; RV64-LABEL: mscatter_baseidx_zext_v8i8_v8f16:
6435 ; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
6436 ; RV64-NEXT: vwaddu.vv v10, v9, v9
6437 ; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, ma
6438 ; RV64-NEXT: vsoxei16.v v8, (a0), v10, v0.t
6441 ; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i8_v8f16:
6442 ; RV64ZVE32F: # %bb.0:
6443 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
6444 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
6445 ; RV64ZVE32F-NEXT: andi a2, a1, 1
6446 ; RV64ZVE32F-NEXT: beqz a2, .LBB60_2
6447 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
6448 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
6449 ; RV64ZVE32F-NEXT: andi a2, a2, 255
6450 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6451 ; RV64ZVE32F-NEXT: add a2, a0, a2
6452 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6453 ; RV64ZVE32F-NEXT: vse16.v v8, (a2)
6454 ; RV64ZVE32F-NEXT: .LBB60_2: # %else
6455 ; RV64ZVE32F-NEXT: andi a2, a1, 2
6456 ; RV64ZVE32F-NEXT: beqz a2, .LBB60_4
6457 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
6458 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
6459 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1
6460 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
6461 ; RV64ZVE32F-NEXT: andi a2, a2, 255
6462 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6463 ; RV64ZVE32F-NEXT: add a2, a0, a2
6464 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6465 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
6466 ; RV64ZVE32F-NEXT: vse16.v v10, (a2)
6467 ; RV64ZVE32F-NEXT: .LBB60_4: # %else2
6468 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
6469 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4
6470 ; RV64ZVE32F-NEXT: andi a2, a1, 4
6471 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
6472 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2
6473 ; RV64ZVE32F-NEXT: bnez a2, .LBB60_12
6474 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
6475 ; RV64ZVE32F-NEXT: andi a2, a1, 8
6476 ; RV64ZVE32F-NEXT: bnez a2, .LBB60_13
6477 ; RV64ZVE32F-NEXT: .LBB60_6: # %else6
6478 ; RV64ZVE32F-NEXT: andi a2, a1, 16
6479 ; RV64ZVE32F-NEXT: bnez a2, .LBB60_14
6480 ; RV64ZVE32F-NEXT: .LBB60_7: # %else8
6481 ; RV64ZVE32F-NEXT: andi a2, a1, 32
6482 ; RV64ZVE32F-NEXT: beqz a2, .LBB60_9
6483 ; RV64ZVE32F-NEXT: .LBB60_8: # %cond.store9
6484 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
6485 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 1
6486 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
6487 ; RV64ZVE32F-NEXT: andi a2, a2, 255
6488 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6489 ; RV64ZVE32F-NEXT: add a2, a0, a2
6490 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6491 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5
6492 ; RV64ZVE32F-NEXT: vse16.v v9, (a2)
6493 ; RV64ZVE32F-NEXT: .LBB60_9: # %else10
6494 ; RV64ZVE32F-NEXT: andi a2, a1, 64
6495 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
6496 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 2
6497 ; RV64ZVE32F-NEXT: bnez a2, .LBB60_15
6498 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
6499 ; RV64ZVE32F-NEXT: andi a1, a1, -128
6500 ; RV64ZVE32F-NEXT: bnez a1, .LBB60_16
6501 ; RV64ZVE32F-NEXT: .LBB60_11: # %else14
6502 ; RV64ZVE32F-NEXT: ret
6503 ; RV64ZVE32F-NEXT: .LBB60_12: # %cond.store3
6504 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
6505 ; RV64ZVE32F-NEXT: andi a2, a2, 255
6506 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6507 ; RV64ZVE32F-NEXT: add a2, a0, a2
6508 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6509 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2
6510 ; RV64ZVE32F-NEXT: vse16.v v11, (a2)
6511 ; RV64ZVE32F-NEXT: andi a2, a1, 8
6512 ; RV64ZVE32F-NEXT: beqz a2, .LBB60_6
6513 ; RV64ZVE32F-NEXT: .LBB60_13: # %cond.store5
6514 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
6515 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
6516 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
6517 ; RV64ZVE32F-NEXT: andi a2, a2, 255
6518 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6519 ; RV64ZVE32F-NEXT: add a2, a0, a2
6520 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6521 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 3
6522 ; RV64ZVE32F-NEXT: vse16.v v9, (a2)
6523 ; RV64ZVE32F-NEXT: andi a2, a1, 16
6524 ; RV64ZVE32F-NEXT: beqz a2, .LBB60_7
6525 ; RV64ZVE32F-NEXT: .LBB60_14: # %cond.store7
6526 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
6527 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
6528 ; RV64ZVE32F-NEXT: andi a2, a2, 255
6529 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6530 ; RV64ZVE32F-NEXT: add a2, a0, a2
6531 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6532 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
6533 ; RV64ZVE32F-NEXT: vse16.v v9, (a2)
6534 ; RV64ZVE32F-NEXT: andi a2, a1, 32
6535 ; RV64ZVE32F-NEXT: bnez a2, .LBB60_8
6536 ; RV64ZVE32F-NEXT: j .LBB60_9
6537 ; RV64ZVE32F-NEXT: .LBB60_15: # %cond.store11
6538 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
6539 ; RV64ZVE32F-NEXT: andi a2, a2, 255
6540 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6541 ; RV64ZVE32F-NEXT: add a2, a0, a2
6542 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6543 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 6
6544 ; RV64ZVE32F-NEXT: vse16.v v10, (a2)
6545 ; RV64ZVE32F-NEXT: andi a1, a1, -128
6546 ; RV64ZVE32F-NEXT: beqz a1, .LBB60_11
6547 ; RV64ZVE32F-NEXT: .LBB60_16: # %cond.store13
6548 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
6549 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
6550 ; RV64ZVE32F-NEXT: vmv.x.s a1, v9
6551 ; RV64ZVE32F-NEXT: andi a1, a1, 255
6552 ; RV64ZVE32F-NEXT: slli a1, a1, 1
6553 ; RV64ZVE32F-NEXT: add a0, a0, a1
6554 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6555 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
6556 ; RV64ZVE32F-NEXT: vse16.v v8, (a0)
6557 ; RV64ZVE32F-NEXT: ret
6558 %eidxs = zext <8 x i8> %idxs to <8 x i16>
6559 %ptrs = getelementptr inbounds half, ptr %base, <8 x i16> %eidxs
6560 call void @llvm.masked.scatter.v8f16.v8p0(<8 x half> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> %m)
6564 define void @mscatter_baseidx_v8f16(<8 x half> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m) {
6565 ; RV32-LABEL: mscatter_baseidx_v8f16:
6567 ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
6568 ; RV32-NEXT: vwadd.vv v10, v9, v9
6569 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
6572 ; RV64-LABEL: mscatter_baseidx_v8f16:
6574 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
6575 ; RV64-NEXT: vsext.vf4 v12, v9
6576 ; RV64-NEXT: vadd.vv v12, v12, v12
6577 ; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, ma
6578 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
6581 ; RV64ZVE32F-LABEL: mscatter_baseidx_v8f16:
6582 ; RV64ZVE32F: # %bb.0:
6583 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
6584 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
6585 ; RV64ZVE32F-NEXT: andi a2, a1, 1
6586 ; RV64ZVE32F-NEXT: beqz a2, .LBB61_2
6587 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
6588 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6589 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
6590 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6591 ; RV64ZVE32F-NEXT: add a2, a0, a2
6592 ; RV64ZVE32F-NEXT: vse16.v v8, (a2)
6593 ; RV64ZVE32F-NEXT: .LBB61_2: # %else
6594 ; RV64ZVE32F-NEXT: andi a2, a1, 2
6595 ; RV64ZVE32F-NEXT: beqz a2, .LBB61_4
6596 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
6597 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
6598 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1
6599 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
6600 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6601 ; RV64ZVE32F-NEXT: add a2, a0, a2
6602 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6603 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
6604 ; RV64ZVE32F-NEXT: vse16.v v10, (a2)
6605 ; RV64ZVE32F-NEXT: .LBB61_4: # %else2
6606 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
6607 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4
6608 ; RV64ZVE32F-NEXT: andi a2, a1, 4
6609 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
6610 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2
6611 ; RV64ZVE32F-NEXT: bnez a2, .LBB61_12
6612 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
6613 ; RV64ZVE32F-NEXT: andi a2, a1, 8
6614 ; RV64ZVE32F-NEXT: bnez a2, .LBB61_13
6615 ; RV64ZVE32F-NEXT: .LBB61_6: # %else6
6616 ; RV64ZVE32F-NEXT: andi a2, a1, 16
6617 ; RV64ZVE32F-NEXT: bnez a2, .LBB61_14
6618 ; RV64ZVE32F-NEXT: .LBB61_7: # %else8
6619 ; RV64ZVE32F-NEXT: andi a2, a1, 32
6620 ; RV64ZVE32F-NEXT: beqz a2, .LBB61_9
6621 ; RV64ZVE32F-NEXT: .LBB61_8: # %cond.store9
6622 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
6623 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 1
6624 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
6625 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6626 ; RV64ZVE32F-NEXT: add a2, a0, a2
6627 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6628 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5
6629 ; RV64ZVE32F-NEXT: vse16.v v9, (a2)
6630 ; RV64ZVE32F-NEXT: .LBB61_9: # %else10
6631 ; RV64ZVE32F-NEXT: andi a2, a1, 64
6632 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
6633 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 2
6634 ; RV64ZVE32F-NEXT: bnez a2, .LBB61_15
6635 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
6636 ; RV64ZVE32F-NEXT: andi a1, a1, -128
6637 ; RV64ZVE32F-NEXT: bnez a1, .LBB61_16
6638 ; RV64ZVE32F-NEXT: .LBB61_11: # %else14
6639 ; RV64ZVE32F-NEXT: ret
6640 ; RV64ZVE32F-NEXT: .LBB61_12: # %cond.store3
6641 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
6642 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6643 ; RV64ZVE32F-NEXT: add a2, a0, a2
6644 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6645 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2
6646 ; RV64ZVE32F-NEXT: vse16.v v11, (a2)
6647 ; RV64ZVE32F-NEXT: andi a2, a1, 8
6648 ; RV64ZVE32F-NEXT: beqz a2, .LBB61_6
6649 ; RV64ZVE32F-NEXT: .LBB61_13: # %cond.store5
6650 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
6651 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
6652 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
6653 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6654 ; RV64ZVE32F-NEXT: add a2, a0, a2
6655 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6656 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 3
6657 ; RV64ZVE32F-NEXT: vse16.v v9, (a2)
6658 ; RV64ZVE32F-NEXT: andi a2, a1, 16
6659 ; RV64ZVE32F-NEXT: beqz a2, .LBB61_7
6660 ; RV64ZVE32F-NEXT: .LBB61_14: # %cond.store7
6661 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6662 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
6663 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6664 ; RV64ZVE32F-NEXT: add a2, a0, a2
6665 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
6666 ; RV64ZVE32F-NEXT: vse16.v v9, (a2)
6667 ; RV64ZVE32F-NEXT: andi a2, a1, 32
6668 ; RV64ZVE32F-NEXT: bnez a2, .LBB61_8
6669 ; RV64ZVE32F-NEXT: j .LBB61_9
6670 ; RV64ZVE32F-NEXT: .LBB61_15: # %cond.store11
6671 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
6672 ; RV64ZVE32F-NEXT: slli a2, a2, 1
6673 ; RV64ZVE32F-NEXT: add a2, a0, a2
6674 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6675 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 6
6676 ; RV64ZVE32F-NEXT: vse16.v v10, (a2)
6677 ; RV64ZVE32F-NEXT: andi a1, a1, -128
6678 ; RV64ZVE32F-NEXT: beqz a1, .LBB61_11
6679 ; RV64ZVE32F-NEXT: .LBB61_16: # %cond.store13
6680 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
6681 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
6682 ; RV64ZVE32F-NEXT: vmv.x.s a1, v9
6683 ; RV64ZVE32F-NEXT: slli a1, a1, 1
6684 ; RV64ZVE32F-NEXT: add a0, a0, a1
6685 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
6686 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
6687 ; RV64ZVE32F-NEXT: vse16.v v8, (a0)
6688 ; RV64ZVE32F-NEXT: ret
6689 %ptrs = getelementptr inbounds half, ptr %base, <8 x i16> %idxs
6690 call void @llvm.masked.scatter.v8f16.v8p0(<8 x half> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> %m)
6694 declare void @llvm.masked.scatter.v1f32.v1p0(<1 x float>, <1 x ptr>, i32, <1 x i1>)
6696 define void @mscatter_v1f32(<1 x float> %val, <1 x ptr> %ptrs, <1 x i1> %m) {
6697 ; RV32V-LABEL: mscatter_v1f32:
6699 ; RV32V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
6700 ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t
6703 ; RV64-LABEL: mscatter_v1f32:
6705 ; RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
6706 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
6709 ; RV32ZVE32F-LABEL: mscatter_v1f32:
6710 ; RV32ZVE32F: # %bb.0:
6711 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
6712 ; RV32ZVE32F-NEXT: vsoxei32.v v8, (zero), v9, v0.t
6713 ; RV32ZVE32F-NEXT: ret
6715 ; RV64ZVE32F-LABEL: mscatter_v1f32:
6716 ; RV64ZVE32F: # %bb.0:
6717 ; RV64ZVE32F-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
6718 ; RV64ZVE32F-NEXT: vfirst.m a1, v0
6719 ; RV64ZVE32F-NEXT: bnez a1, .LBB62_2
6720 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
6721 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
6722 ; RV64ZVE32F-NEXT: vse32.v v8, (a0)
6723 ; RV64ZVE32F-NEXT: .LBB62_2: # %else
6724 ; RV64ZVE32F-NEXT: ret
6725 call void @llvm.masked.scatter.v1f32.v1p0(<1 x float> %val, <1 x ptr> %ptrs, i32 4, <1 x i1> %m)
6729 declare void @llvm.masked.scatter.v2f32.v2p0(<2 x float>, <2 x ptr>, i32, <2 x i1>)
6731 define void @mscatter_v2f32(<2 x float> %val, <2 x ptr> %ptrs, <2 x i1> %m) {
6732 ; RV32V-LABEL: mscatter_v2f32:
6734 ; RV32V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
6735 ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t
6738 ; RV64-LABEL: mscatter_v2f32:
6740 ; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
6741 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
6744 ; RV32ZVE32F-LABEL: mscatter_v2f32:
6745 ; RV32ZVE32F: # %bb.0:
6746 ; RV32ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
6747 ; RV32ZVE32F-NEXT: vsoxei32.v v8, (zero), v9, v0.t
6748 ; RV32ZVE32F-NEXT: ret
6750 ; RV64ZVE32F-LABEL: mscatter_v2f32:
6751 ; RV64ZVE32F: # %bb.0:
6752 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
6753 ; RV64ZVE32F-NEXT: vmv.x.s a2, v0
6754 ; RV64ZVE32F-NEXT: andi a3, a2, 1
6755 ; RV64ZVE32F-NEXT: bnez a3, .LBB63_3
6756 ; RV64ZVE32F-NEXT: # %bb.1: # %else
6757 ; RV64ZVE32F-NEXT: andi a2, a2, 2
6758 ; RV64ZVE32F-NEXT: bnez a2, .LBB63_4
6759 ; RV64ZVE32F-NEXT: .LBB63_2: # %else2
6760 ; RV64ZVE32F-NEXT: ret
6761 ; RV64ZVE32F-NEXT: .LBB63_3: # %cond.store
6762 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
6763 ; RV64ZVE32F-NEXT: vse32.v v8, (a0)
6764 ; RV64ZVE32F-NEXT: andi a2, a2, 2
6765 ; RV64ZVE32F-NEXT: beqz a2, .LBB63_2
6766 ; RV64ZVE32F-NEXT: .LBB63_4: # %cond.store1
6767 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
6768 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
6769 ; RV64ZVE32F-NEXT: vse32.v v8, (a1)
6770 ; RV64ZVE32F-NEXT: ret
6771 call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> %val, <2 x ptr> %ptrs, i32 4, <2 x i1> %m)
6775 declare void @llvm.masked.scatter.v4f32.v4p0(<4 x float>, <4 x ptr>, i32, <4 x i1>)
6777 define void @mscatter_v4f32(<4 x float> %val, <4 x ptr> %ptrs, <4 x i1> %m) {
6778 ; RV32-LABEL: mscatter_v4f32:
6780 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
6781 ; RV32-NEXT: vsoxei32.v v8, (zero), v9, v0.t
6784 ; RV64-LABEL: mscatter_v4f32:
6786 ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
6787 ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
6790 ; RV64ZVE32F-LABEL: mscatter_v4f32:
6791 ; RV64ZVE32F: # %bb.0:
6792 ; RV64ZVE32F-NEXT: ld a1, 24(a0)
6793 ; RV64ZVE32F-NEXT: ld a2, 16(a0)
6794 ; RV64ZVE32F-NEXT: ld a4, 8(a0)
6795 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
6796 ; RV64ZVE32F-NEXT: vmv.x.s a3, v0
6797 ; RV64ZVE32F-NEXT: andi a5, a3, 1
6798 ; RV64ZVE32F-NEXT: bnez a5, .LBB64_5
6799 ; RV64ZVE32F-NEXT: # %bb.1: # %else
6800 ; RV64ZVE32F-NEXT: andi a0, a3, 2
6801 ; RV64ZVE32F-NEXT: bnez a0, .LBB64_6
6802 ; RV64ZVE32F-NEXT: .LBB64_2: # %else2
6803 ; RV64ZVE32F-NEXT: andi a0, a3, 4
6804 ; RV64ZVE32F-NEXT: bnez a0, .LBB64_7
6805 ; RV64ZVE32F-NEXT: .LBB64_3: # %else4
6806 ; RV64ZVE32F-NEXT: andi a3, a3, 8
6807 ; RV64ZVE32F-NEXT: bnez a3, .LBB64_8
6808 ; RV64ZVE32F-NEXT: .LBB64_4: # %else6
6809 ; RV64ZVE32F-NEXT: ret
6810 ; RV64ZVE32F-NEXT: .LBB64_5: # %cond.store
6811 ; RV64ZVE32F-NEXT: ld a0, 0(a0)
6812 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
6813 ; RV64ZVE32F-NEXT: vse32.v v8, (a0)
6814 ; RV64ZVE32F-NEXT: andi a0, a3, 2
6815 ; RV64ZVE32F-NEXT: beqz a0, .LBB64_2
6816 ; RV64ZVE32F-NEXT: .LBB64_6: # %cond.store1
6817 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
6818 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
6819 ; RV64ZVE32F-NEXT: vse32.v v9, (a4)
6820 ; RV64ZVE32F-NEXT: andi a0, a3, 4
6821 ; RV64ZVE32F-NEXT: beqz a0, .LBB64_3
6822 ; RV64ZVE32F-NEXT: .LBB64_7: # %cond.store3
6823 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
6824 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
6825 ; RV64ZVE32F-NEXT: vse32.v v9, (a2)
6826 ; RV64ZVE32F-NEXT: andi a3, a3, 8
6827 ; RV64ZVE32F-NEXT: beqz a3, .LBB64_4
6828 ; RV64ZVE32F-NEXT: .LBB64_8: # %cond.store5
6829 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
6830 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 3
6831 ; RV64ZVE32F-NEXT: vse32.v v8, (a1)
6832 ; RV64ZVE32F-NEXT: ret
6833 call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> %val, <4 x ptr> %ptrs, i32 4, <4 x i1> %m)
6837 define void @mscatter_truemask_v4f32(<4 x float> %val, <4 x ptr> %ptrs) {
6838 ; RV32-LABEL: mscatter_truemask_v4f32:
6840 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
6841 ; RV32-NEXT: vsoxei32.v v8, (zero), v9
6844 ; RV64-LABEL: mscatter_truemask_v4f32:
6846 ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
6847 ; RV64-NEXT: vsoxei64.v v8, (zero), v10
6850 ; RV64ZVE32F-LABEL: mscatter_truemask_v4f32:
6851 ; RV64ZVE32F: # %bb.0:
6852 ; RV64ZVE32F-NEXT: ld a1, 0(a0)
6853 ; RV64ZVE32F-NEXT: ld a2, 24(a0)
6854 ; RV64ZVE32F-NEXT: ld a3, 8(a0)
6855 ; RV64ZVE32F-NEXT: ld a0, 16(a0)
6856 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
6857 ; RV64ZVE32F-NEXT: vse32.v v8, (a1)
6858 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
6859 ; RV64ZVE32F-NEXT: vse32.v v9, (a3)
6860 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
6861 ; RV64ZVE32F-NEXT: vse32.v v9, (a0)
6862 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 3
6863 ; RV64ZVE32F-NEXT: vse32.v v8, (a2)
6864 ; RV64ZVE32F-NEXT: ret
6865 call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> %val, <4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 1))
6869 define void @mscatter_falsemask_v4f32(<4 x float> %val, <4 x ptr> %ptrs) {
6870 ; CHECK-LABEL: mscatter_falsemask_v4f32:
6873 call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> %val, <4 x ptr> %ptrs, i32 4, <4 x i1> zeroinitializer)
6877 declare void @llvm.masked.scatter.v8f32.v8p0(<8 x float>, <8 x ptr>, i32, <8 x i1>)
6879 define void @mscatter_v8f32(<8 x float> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
6880 ; RV32-LABEL: mscatter_v8f32:
6882 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
6883 ; RV32-NEXT: vsoxei32.v v8, (zero), v10, v0.t
6886 ; RV64-LABEL: mscatter_v8f32:
6888 ; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma
6889 ; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t
6892 ; RV64ZVE32F-LABEL: mscatter_v8f32:
6893 ; RV64ZVE32F: # %bb.0:
6894 ; RV64ZVE32F-NEXT: ld a1, 56(a0)
6895 ; RV64ZVE32F-NEXT: ld a2, 48(a0)
6896 ; RV64ZVE32F-NEXT: ld a4, 40(a0)
6897 ; RV64ZVE32F-NEXT: ld a5, 32(a0)
6898 ; RV64ZVE32F-NEXT: ld a6, 24(a0)
6899 ; RV64ZVE32F-NEXT: ld a7, 16(a0)
6900 ; RV64ZVE32F-NEXT: ld t0, 8(a0)
6901 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
6902 ; RV64ZVE32F-NEXT: vmv.x.s a3, v0
6903 ; RV64ZVE32F-NEXT: andi t1, a3, 1
6904 ; RV64ZVE32F-NEXT: bnez t1, .LBB67_9
6905 ; RV64ZVE32F-NEXT: # %bb.1: # %else
6906 ; RV64ZVE32F-NEXT: andi a0, a3, 2
6907 ; RV64ZVE32F-NEXT: bnez a0, .LBB67_10
6908 ; RV64ZVE32F-NEXT: .LBB67_2: # %else2
6909 ; RV64ZVE32F-NEXT: andi a0, a3, 4
6910 ; RV64ZVE32F-NEXT: bnez a0, .LBB67_11
6911 ; RV64ZVE32F-NEXT: .LBB67_3: # %else4
6912 ; RV64ZVE32F-NEXT: andi a0, a3, 8
6913 ; RV64ZVE32F-NEXT: bnez a0, .LBB67_12
6914 ; RV64ZVE32F-NEXT: .LBB67_4: # %else6
6915 ; RV64ZVE32F-NEXT: andi a0, a3, 16
6916 ; RV64ZVE32F-NEXT: bnez a0, .LBB67_13
6917 ; RV64ZVE32F-NEXT: .LBB67_5: # %else8
6918 ; RV64ZVE32F-NEXT: andi a0, a3, 32
6919 ; RV64ZVE32F-NEXT: bnez a0, .LBB67_14
6920 ; RV64ZVE32F-NEXT: .LBB67_6: # %else10
6921 ; RV64ZVE32F-NEXT: andi a0, a3, 64
6922 ; RV64ZVE32F-NEXT: bnez a0, .LBB67_15
6923 ; RV64ZVE32F-NEXT: .LBB67_7: # %else12
6924 ; RV64ZVE32F-NEXT: andi a0, a3, -128
6925 ; RV64ZVE32F-NEXT: bnez a0, .LBB67_16
6926 ; RV64ZVE32F-NEXT: .LBB67_8: # %else14
6927 ; RV64ZVE32F-NEXT: ret
6928 ; RV64ZVE32F-NEXT: .LBB67_9: # %cond.store
6929 ; RV64ZVE32F-NEXT: ld a0, 0(a0)
6930 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
6931 ; RV64ZVE32F-NEXT: vse32.v v8, (a0)
6932 ; RV64ZVE32F-NEXT: andi a0, a3, 2
6933 ; RV64ZVE32F-NEXT: beqz a0, .LBB67_2
6934 ; RV64ZVE32F-NEXT: .LBB67_10: # %cond.store1
6935 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
6936 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
6937 ; RV64ZVE32F-NEXT: vse32.v v10, (t0)
6938 ; RV64ZVE32F-NEXT: andi a0, a3, 4
6939 ; RV64ZVE32F-NEXT: beqz a0, .LBB67_3
6940 ; RV64ZVE32F-NEXT: .LBB67_11: # %cond.store3
6941 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
6942 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 2
6943 ; RV64ZVE32F-NEXT: vse32.v v10, (a7)
6944 ; RV64ZVE32F-NEXT: andi a0, a3, 8
6945 ; RV64ZVE32F-NEXT: beqz a0, .LBB67_4
6946 ; RV64ZVE32F-NEXT: .LBB67_12: # %cond.store5
6947 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
6948 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
6949 ; RV64ZVE32F-NEXT: vse32.v v10, (a6)
6950 ; RV64ZVE32F-NEXT: andi a0, a3, 16
6951 ; RV64ZVE32F-NEXT: beqz a0, .LBB67_5
6952 ; RV64ZVE32F-NEXT: .LBB67_13: # %cond.store7
6953 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
6954 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
6955 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
6956 ; RV64ZVE32F-NEXT: vse32.v v10, (a5)
6957 ; RV64ZVE32F-NEXT: andi a0, a3, 32
6958 ; RV64ZVE32F-NEXT: beqz a0, .LBB67_6
6959 ; RV64ZVE32F-NEXT: .LBB67_14: # %cond.store9
6960 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
6961 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 5
6962 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
6963 ; RV64ZVE32F-NEXT: vse32.v v10, (a4)
6964 ; RV64ZVE32F-NEXT: andi a0, a3, 64
6965 ; RV64ZVE32F-NEXT: beqz a0, .LBB67_7
6966 ; RV64ZVE32F-NEXT: .LBB67_15: # %cond.store11
6967 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
6968 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 6
6969 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
6970 ; RV64ZVE32F-NEXT: vse32.v v10, (a2)
6971 ; RV64ZVE32F-NEXT: andi a0, a3, -128
6972 ; RV64ZVE32F-NEXT: beqz a0, .LBB67_8
6973 ; RV64ZVE32F-NEXT: .LBB67_16: # %cond.store13
6974 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
6975 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
6976 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
6977 ; RV64ZVE32F-NEXT: vse32.v v8, (a1)
6978 ; RV64ZVE32F-NEXT: ret
6979 call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> %val, <8 x ptr> %ptrs, i32 4, <8 x i1> %m)
6983 define void @mscatter_baseidx_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) {
6984 ; RV32-LABEL: mscatter_baseidx_v8i8_v8f32:
6986 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
6987 ; RV32-NEXT: vsext.vf4 v12, v10
6988 ; RV32-NEXT: vsll.vi v10, v12, 2
6989 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
6992 ; RV64-LABEL: mscatter_baseidx_v8i8_v8f32:
6994 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
6995 ; RV64-NEXT: vsext.vf8 v12, v10
6996 ; RV64-NEXT: vsll.vi v12, v12, 2
6997 ; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
6998 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
7001 ; RV64ZVE32F-LABEL: mscatter_baseidx_v8i8_v8f32:
7002 ; RV64ZVE32F: # %bb.0:
7003 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
7004 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
7005 ; RV64ZVE32F-NEXT: andi a2, a1, 1
7006 ; RV64ZVE32F-NEXT: beqz a2, .LBB68_2
7007 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
7008 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
7009 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7010 ; RV64ZVE32F-NEXT: add a2, a0, a2
7011 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7012 ; RV64ZVE32F-NEXT: vse32.v v8, (a2)
7013 ; RV64ZVE32F-NEXT: .LBB68_2: # %else
7014 ; RV64ZVE32F-NEXT: andi a2, a1, 2
7015 ; RV64ZVE32F-NEXT: beqz a2, .LBB68_4
7016 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
7017 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
7018 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1
7019 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
7020 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7021 ; RV64ZVE32F-NEXT: add a2, a0, a2
7022 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
7023 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 1
7024 ; RV64ZVE32F-NEXT: vse32.v v11, (a2)
7025 ; RV64ZVE32F-NEXT: .LBB68_4: # %else2
7026 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
7027 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4
7028 ; RV64ZVE32F-NEXT: andi a2, a1, 4
7029 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
7030 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
7031 ; RV64ZVE32F-NEXT: bnez a2, .LBB68_12
7032 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
7033 ; RV64ZVE32F-NEXT: andi a2, a1, 8
7034 ; RV64ZVE32F-NEXT: bnez a2, .LBB68_13
7035 ; RV64ZVE32F-NEXT: .LBB68_6: # %else6
7036 ; RV64ZVE32F-NEXT: andi a2, a1, 16
7037 ; RV64ZVE32F-NEXT: bnez a2, .LBB68_14
7038 ; RV64ZVE32F-NEXT: .LBB68_7: # %else8
7039 ; RV64ZVE32F-NEXT: andi a2, a1, 32
7040 ; RV64ZVE32F-NEXT: beqz a2, .LBB68_9
7041 ; RV64ZVE32F-NEXT: .LBB68_8: # %cond.store9
7042 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
7043 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 1
7044 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
7045 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7046 ; RV64ZVE32F-NEXT: add a2, a0, a2
7047 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
7048 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5
7049 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7050 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
7051 ; RV64ZVE32F-NEXT: .LBB68_9: # %else10
7052 ; RV64ZVE32F-NEXT: andi a2, a1, 64
7053 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
7054 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2
7055 ; RV64ZVE32F-NEXT: bnez a2, .LBB68_15
7056 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
7057 ; RV64ZVE32F-NEXT: andi a1, a1, -128
7058 ; RV64ZVE32F-NEXT: bnez a1, .LBB68_16
7059 ; RV64ZVE32F-NEXT: .LBB68_11: # %else14
7060 ; RV64ZVE32F-NEXT: ret
7061 ; RV64ZVE32F-NEXT: .LBB68_12: # %cond.store3
7062 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
7063 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7064 ; RV64ZVE32F-NEXT: add a2, a0, a2
7065 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
7066 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2
7067 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7068 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
7069 ; RV64ZVE32F-NEXT: andi a2, a1, 8
7070 ; RV64ZVE32F-NEXT: beqz a2, .LBB68_6
7071 ; RV64ZVE32F-NEXT: .LBB68_13: # %cond.store5
7072 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
7073 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
7074 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
7075 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7076 ; RV64ZVE32F-NEXT: add a2, a0, a2
7077 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
7078 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
7079 ; RV64ZVE32F-NEXT: vse32.v v10, (a2)
7080 ; RV64ZVE32F-NEXT: andi a2, a1, 16
7081 ; RV64ZVE32F-NEXT: beqz a2, .LBB68_7
7082 ; RV64ZVE32F-NEXT: .LBB68_14: # %cond.store7
7083 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
7084 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
7085 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7086 ; RV64ZVE32F-NEXT: add a2, a0, a2
7087 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
7088 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
7089 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7090 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
7091 ; RV64ZVE32F-NEXT: andi a2, a1, 32
7092 ; RV64ZVE32F-NEXT: bnez a2, .LBB68_8
7093 ; RV64ZVE32F-NEXT: j .LBB68_9
7094 ; RV64ZVE32F-NEXT: .LBB68_15: # %cond.store11
7095 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
7096 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7097 ; RV64ZVE32F-NEXT: add a2, a0, a2
7098 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
7099 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6
7100 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7101 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
7102 ; RV64ZVE32F-NEXT: andi a1, a1, -128
7103 ; RV64ZVE32F-NEXT: beqz a1, .LBB68_11
7104 ; RV64ZVE32F-NEXT: .LBB68_16: # %cond.store13
7105 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
7106 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
7107 ; RV64ZVE32F-NEXT: vmv.x.s a1, v10
7108 ; RV64ZVE32F-NEXT: slli a1, a1, 2
7109 ; RV64ZVE32F-NEXT: add a0, a0, a1
7110 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
7111 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
7112 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7113 ; RV64ZVE32F-NEXT: vse32.v v8, (a0)
7114 ; RV64ZVE32F-NEXT: ret
7115 %ptrs = getelementptr inbounds float, ptr %base, <8 x i8> %idxs
7116 call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> %val, <8 x ptr> %ptrs, i32 4, <8 x i1> %m)
7120 define void @mscatter_baseidx_sext_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) {
7121 ; RV32-LABEL: mscatter_baseidx_sext_v8i8_v8f32:
7123 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
7124 ; RV32-NEXT: vsext.vf4 v12, v10
7125 ; RV32-NEXT: vsll.vi v10, v12, 2
7126 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
7129 ; RV64-LABEL: mscatter_baseidx_sext_v8i8_v8f32:
7131 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
7132 ; RV64-NEXT: vsext.vf8 v12, v10
7133 ; RV64-NEXT: vsll.vi v12, v12, 2
7134 ; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
7135 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
7138 ; RV64ZVE32F-LABEL: mscatter_baseidx_sext_v8i8_v8f32:
7139 ; RV64ZVE32F: # %bb.0:
7140 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
7141 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
7142 ; RV64ZVE32F-NEXT: andi a2, a1, 1
7143 ; RV64ZVE32F-NEXT: beqz a2, .LBB69_2
7144 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
7145 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
7146 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7147 ; RV64ZVE32F-NEXT: add a2, a0, a2
7148 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7149 ; RV64ZVE32F-NEXT: vse32.v v8, (a2)
7150 ; RV64ZVE32F-NEXT: .LBB69_2: # %else
7151 ; RV64ZVE32F-NEXT: andi a2, a1, 2
7152 ; RV64ZVE32F-NEXT: beqz a2, .LBB69_4
7153 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
7154 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
7155 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1
7156 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
7157 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7158 ; RV64ZVE32F-NEXT: add a2, a0, a2
7159 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
7160 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 1
7161 ; RV64ZVE32F-NEXT: vse32.v v11, (a2)
7162 ; RV64ZVE32F-NEXT: .LBB69_4: # %else2
7163 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
7164 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4
7165 ; RV64ZVE32F-NEXT: andi a2, a1, 4
7166 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
7167 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
7168 ; RV64ZVE32F-NEXT: bnez a2, .LBB69_12
7169 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
7170 ; RV64ZVE32F-NEXT: andi a2, a1, 8
7171 ; RV64ZVE32F-NEXT: bnez a2, .LBB69_13
7172 ; RV64ZVE32F-NEXT: .LBB69_6: # %else6
7173 ; RV64ZVE32F-NEXT: andi a2, a1, 16
7174 ; RV64ZVE32F-NEXT: bnez a2, .LBB69_14
7175 ; RV64ZVE32F-NEXT: .LBB69_7: # %else8
7176 ; RV64ZVE32F-NEXT: andi a2, a1, 32
7177 ; RV64ZVE32F-NEXT: beqz a2, .LBB69_9
7178 ; RV64ZVE32F-NEXT: .LBB69_8: # %cond.store9
7179 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
7180 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 1
7181 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
7182 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7183 ; RV64ZVE32F-NEXT: add a2, a0, a2
7184 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
7185 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5
7186 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7187 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
7188 ; RV64ZVE32F-NEXT: .LBB69_9: # %else10
7189 ; RV64ZVE32F-NEXT: andi a2, a1, 64
7190 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
7191 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2
7192 ; RV64ZVE32F-NEXT: bnez a2, .LBB69_15
7193 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
7194 ; RV64ZVE32F-NEXT: andi a1, a1, -128
7195 ; RV64ZVE32F-NEXT: bnez a1, .LBB69_16
7196 ; RV64ZVE32F-NEXT: .LBB69_11: # %else14
7197 ; RV64ZVE32F-NEXT: ret
7198 ; RV64ZVE32F-NEXT: .LBB69_12: # %cond.store3
7199 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
7200 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7201 ; RV64ZVE32F-NEXT: add a2, a0, a2
7202 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
7203 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2
7204 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7205 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
7206 ; RV64ZVE32F-NEXT: andi a2, a1, 8
7207 ; RV64ZVE32F-NEXT: beqz a2, .LBB69_6
7208 ; RV64ZVE32F-NEXT: .LBB69_13: # %cond.store5
7209 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
7210 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
7211 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
7212 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7213 ; RV64ZVE32F-NEXT: add a2, a0, a2
7214 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
7215 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
7216 ; RV64ZVE32F-NEXT: vse32.v v10, (a2)
7217 ; RV64ZVE32F-NEXT: andi a2, a1, 16
7218 ; RV64ZVE32F-NEXT: beqz a2, .LBB69_7
7219 ; RV64ZVE32F-NEXT: .LBB69_14: # %cond.store7
7220 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
7221 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
7222 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7223 ; RV64ZVE32F-NEXT: add a2, a0, a2
7224 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
7225 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
7226 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7227 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
7228 ; RV64ZVE32F-NEXT: andi a2, a1, 32
7229 ; RV64ZVE32F-NEXT: bnez a2, .LBB69_8
7230 ; RV64ZVE32F-NEXT: j .LBB69_9
7231 ; RV64ZVE32F-NEXT: .LBB69_15: # %cond.store11
7232 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
7233 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7234 ; RV64ZVE32F-NEXT: add a2, a0, a2
7235 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
7236 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6
7237 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7238 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
7239 ; RV64ZVE32F-NEXT: andi a1, a1, -128
7240 ; RV64ZVE32F-NEXT: beqz a1, .LBB69_11
7241 ; RV64ZVE32F-NEXT: .LBB69_16: # %cond.store13
7242 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
7243 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
7244 ; RV64ZVE32F-NEXT: vmv.x.s a1, v10
7245 ; RV64ZVE32F-NEXT: slli a1, a1, 2
7246 ; RV64ZVE32F-NEXT: add a0, a0, a1
7247 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
7248 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
7249 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7250 ; RV64ZVE32F-NEXT: vse32.v v8, (a0)
7251 ; RV64ZVE32F-NEXT: ret
7252 %eidxs = sext <8 x i8> %idxs to <8 x i32>
7253 %ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %eidxs
7254 call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> %val, <8 x ptr> %ptrs, i32 4, <8 x i1> %m)
7258 define void @mscatter_baseidx_zext_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) {
7259 ; RV32-LABEL: mscatter_baseidx_zext_v8i8_v8f32:
7261 ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
7262 ; RV32-NEXT: vzext.vf2 v11, v10
7263 ; RV32-NEXT: vsll.vi v10, v11, 2
7264 ; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
7265 ; RV32-NEXT: vsoxei16.v v8, (a0), v10, v0.t
7268 ; RV64-LABEL: mscatter_baseidx_zext_v8i8_v8f32:
7270 ; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
7271 ; RV64-NEXT: vzext.vf2 v11, v10
7272 ; RV64-NEXT: vsll.vi v10, v11, 2
7273 ; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
7274 ; RV64-NEXT: vsoxei16.v v8, (a0), v10, v0.t
7277 ; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i8_v8f32:
7278 ; RV64ZVE32F: # %bb.0:
7279 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
7280 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
7281 ; RV64ZVE32F-NEXT: andi a2, a1, 1
7282 ; RV64ZVE32F-NEXT: beqz a2, .LBB70_2
7283 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
7284 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
7285 ; RV64ZVE32F-NEXT: andi a2, a2, 255
7286 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7287 ; RV64ZVE32F-NEXT: add a2, a0, a2
7288 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7289 ; RV64ZVE32F-NEXT: vse32.v v8, (a2)
7290 ; RV64ZVE32F-NEXT: .LBB70_2: # %else
7291 ; RV64ZVE32F-NEXT: andi a2, a1, 2
7292 ; RV64ZVE32F-NEXT: beqz a2, .LBB70_4
7293 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
7294 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
7295 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1
7296 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
7297 ; RV64ZVE32F-NEXT: andi a2, a2, 255
7298 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7299 ; RV64ZVE32F-NEXT: add a2, a0, a2
7300 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
7301 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 1
7302 ; RV64ZVE32F-NEXT: vse32.v v11, (a2)
7303 ; RV64ZVE32F-NEXT: .LBB70_4: # %else2
7304 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
7305 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4
7306 ; RV64ZVE32F-NEXT: andi a2, a1, 4
7307 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
7308 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
7309 ; RV64ZVE32F-NEXT: bnez a2, .LBB70_12
7310 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
7311 ; RV64ZVE32F-NEXT: andi a2, a1, 8
7312 ; RV64ZVE32F-NEXT: bnez a2, .LBB70_13
7313 ; RV64ZVE32F-NEXT: .LBB70_6: # %else6
7314 ; RV64ZVE32F-NEXT: andi a2, a1, 16
7315 ; RV64ZVE32F-NEXT: bnez a2, .LBB70_14
7316 ; RV64ZVE32F-NEXT: .LBB70_7: # %else8
7317 ; RV64ZVE32F-NEXT: andi a2, a1, 32
7318 ; RV64ZVE32F-NEXT: beqz a2, .LBB70_9
7319 ; RV64ZVE32F-NEXT: .LBB70_8: # %cond.store9
7320 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
7321 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 1
7322 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
7323 ; RV64ZVE32F-NEXT: andi a2, a2, 255
7324 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7325 ; RV64ZVE32F-NEXT: add a2, a0, a2
7326 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
7327 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5
7328 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7329 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
7330 ; RV64ZVE32F-NEXT: .LBB70_9: # %else10
7331 ; RV64ZVE32F-NEXT: andi a2, a1, 64
7332 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
7333 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2
7334 ; RV64ZVE32F-NEXT: bnez a2, .LBB70_15
7335 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
7336 ; RV64ZVE32F-NEXT: andi a1, a1, -128
7337 ; RV64ZVE32F-NEXT: bnez a1, .LBB70_16
7338 ; RV64ZVE32F-NEXT: .LBB70_11: # %else14
7339 ; RV64ZVE32F-NEXT: ret
7340 ; RV64ZVE32F-NEXT: .LBB70_12: # %cond.store3
7341 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
7342 ; RV64ZVE32F-NEXT: andi a2, a2, 255
7343 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7344 ; RV64ZVE32F-NEXT: add a2, a0, a2
7345 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
7346 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2
7347 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7348 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
7349 ; RV64ZVE32F-NEXT: andi a2, a1, 8
7350 ; RV64ZVE32F-NEXT: beqz a2, .LBB70_6
7351 ; RV64ZVE32F-NEXT: .LBB70_13: # %cond.store5
7352 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
7353 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
7354 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
7355 ; RV64ZVE32F-NEXT: andi a2, a2, 255
7356 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7357 ; RV64ZVE32F-NEXT: add a2, a0, a2
7358 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
7359 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
7360 ; RV64ZVE32F-NEXT: vse32.v v10, (a2)
7361 ; RV64ZVE32F-NEXT: andi a2, a1, 16
7362 ; RV64ZVE32F-NEXT: beqz a2, .LBB70_7
7363 ; RV64ZVE32F-NEXT: .LBB70_14: # %cond.store7
7364 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
7365 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
7366 ; RV64ZVE32F-NEXT: andi a2, a2, 255
7367 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7368 ; RV64ZVE32F-NEXT: add a2, a0, a2
7369 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
7370 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
7371 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7372 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
7373 ; RV64ZVE32F-NEXT: andi a2, a1, 32
7374 ; RV64ZVE32F-NEXT: bnez a2, .LBB70_8
7375 ; RV64ZVE32F-NEXT: j .LBB70_9
7376 ; RV64ZVE32F-NEXT: .LBB70_15: # %cond.store11
7377 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
7378 ; RV64ZVE32F-NEXT: andi a2, a2, 255
7379 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7380 ; RV64ZVE32F-NEXT: add a2, a0, a2
7381 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
7382 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6
7383 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7384 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
7385 ; RV64ZVE32F-NEXT: andi a1, a1, -128
7386 ; RV64ZVE32F-NEXT: beqz a1, .LBB70_11
7387 ; RV64ZVE32F-NEXT: .LBB70_16: # %cond.store13
7388 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
7389 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
7390 ; RV64ZVE32F-NEXT: vmv.x.s a1, v10
7391 ; RV64ZVE32F-NEXT: andi a1, a1, 255
7392 ; RV64ZVE32F-NEXT: slli a1, a1, 2
7393 ; RV64ZVE32F-NEXT: add a0, a0, a1
7394 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
7395 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
7396 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7397 ; RV64ZVE32F-NEXT: vse32.v v8, (a0)
7398 ; RV64ZVE32F-NEXT: ret
7399 %eidxs = zext <8 x i8> %idxs to <8 x i32>
7400 %ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %eidxs
7401 call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> %val, <8 x ptr> %ptrs, i32 4, <8 x i1> %m)
7405 define void @mscatter_baseidx_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m) {
7406 ; RV32-LABEL: mscatter_baseidx_v8i16_v8f32:
7408 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
7409 ; RV32-NEXT: vsext.vf2 v12, v10
7410 ; RV32-NEXT: vsll.vi v10, v12, 2
7411 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
7414 ; RV64-LABEL: mscatter_baseidx_v8i16_v8f32:
7416 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
7417 ; RV64-NEXT: vsext.vf4 v12, v10
7418 ; RV64-NEXT: vsll.vi v12, v12, 2
7419 ; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
7420 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
7423 ; RV64ZVE32F-LABEL: mscatter_baseidx_v8i16_v8f32:
7424 ; RV64ZVE32F: # %bb.0:
7425 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
7426 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
7427 ; RV64ZVE32F-NEXT: andi a2, a1, 1
7428 ; RV64ZVE32F-NEXT: beqz a2, .LBB71_2
7429 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
7430 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
7431 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
7432 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7433 ; RV64ZVE32F-NEXT: add a2, a0, a2
7434 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7435 ; RV64ZVE32F-NEXT: vse32.v v8, (a2)
7436 ; RV64ZVE32F-NEXT: .LBB71_2: # %else
7437 ; RV64ZVE32F-NEXT: andi a2, a1, 2
7438 ; RV64ZVE32F-NEXT: beqz a2, .LBB71_4
7439 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
7440 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
7441 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1
7442 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
7443 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7444 ; RV64ZVE32F-NEXT: add a2, a0, a2
7445 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
7446 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 1
7447 ; RV64ZVE32F-NEXT: vse32.v v11, (a2)
7448 ; RV64ZVE32F-NEXT: .LBB71_4: # %else2
7449 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
7450 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4
7451 ; RV64ZVE32F-NEXT: andi a2, a1, 4
7452 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
7453 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
7454 ; RV64ZVE32F-NEXT: bnez a2, .LBB71_12
7455 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
7456 ; RV64ZVE32F-NEXT: andi a2, a1, 8
7457 ; RV64ZVE32F-NEXT: bnez a2, .LBB71_13
7458 ; RV64ZVE32F-NEXT: .LBB71_6: # %else6
7459 ; RV64ZVE32F-NEXT: andi a2, a1, 16
7460 ; RV64ZVE32F-NEXT: bnez a2, .LBB71_14
7461 ; RV64ZVE32F-NEXT: .LBB71_7: # %else8
7462 ; RV64ZVE32F-NEXT: andi a2, a1, 32
7463 ; RV64ZVE32F-NEXT: beqz a2, .LBB71_9
7464 ; RV64ZVE32F-NEXT: .LBB71_8: # %cond.store9
7465 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
7466 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 1
7467 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
7468 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7469 ; RV64ZVE32F-NEXT: add a2, a0, a2
7470 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
7471 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5
7472 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7473 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
7474 ; RV64ZVE32F-NEXT: .LBB71_9: # %else10
7475 ; RV64ZVE32F-NEXT: andi a2, a1, 64
7476 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
7477 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2
7478 ; RV64ZVE32F-NEXT: bnez a2, .LBB71_15
7479 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
7480 ; RV64ZVE32F-NEXT: andi a1, a1, -128
7481 ; RV64ZVE32F-NEXT: bnez a1, .LBB71_16
7482 ; RV64ZVE32F-NEXT: .LBB71_11: # %else14
7483 ; RV64ZVE32F-NEXT: ret
7484 ; RV64ZVE32F-NEXT: .LBB71_12: # %cond.store3
7485 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
7486 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7487 ; RV64ZVE32F-NEXT: add a2, a0, a2
7488 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
7489 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2
7490 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7491 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
7492 ; RV64ZVE32F-NEXT: andi a2, a1, 8
7493 ; RV64ZVE32F-NEXT: beqz a2, .LBB71_6
7494 ; RV64ZVE32F-NEXT: .LBB71_13: # %cond.store5
7495 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
7496 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
7497 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
7498 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7499 ; RV64ZVE32F-NEXT: add a2, a0, a2
7500 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
7501 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
7502 ; RV64ZVE32F-NEXT: vse32.v v10, (a2)
7503 ; RV64ZVE32F-NEXT: andi a2, a1, 16
7504 ; RV64ZVE32F-NEXT: beqz a2, .LBB71_7
7505 ; RV64ZVE32F-NEXT: .LBB71_14: # %cond.store7
7506 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
7507 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
7508 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7509 ; RV64ZVE32F-NEXT: add a2, a0, a2
7510 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
7511 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
7512 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7513 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
7514 ; RV64ZVE32F-NEXT: andi a2, a1, 32
7515 ; RV64ZVE32F-NEXT: bnez a2, .LBB71_8
7516 ; RV64ZVE32F-NEXT: j .LBB71_9
7517 ; RV64ZVE32F-NEXT: .LBB71_15: # %cond.store11
7518 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
7519 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7520 ; RV64ZVE32F-NEXT: add a2, a0, a2
7521 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
7522 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6
7523 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7524 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
7525 ; RV64ZVE32F-NEXT: andi a1, a1, -128
7526 ; RV64ZVE32F-NEXT: beqz a1, .LBB71_11
7527 ; RV64ZVE32F-NEXT: .LBB71_16: # %cond.store13
7528 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
7529 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
7530 ; RV64ZVE32F-NEXT: vmv.x.s a1, v10
7531 ; RV64ZVE32F-NEXT: slli a1, a1, 2
7532 ; RV64ZVE32F-NEXT: add a0, a0, a1
7533 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
7534 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
7535 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7536 ; RV64ZVE32F-NEXT: vse32.v v8, (a0)
7537 ; RV64ZVE32F-NEXT: ret
7538 %ptrs = getelementptr inbounds float, ptr %base, <8 x i16> %idxs
7539 call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> %val, <8 x ptr> %ptrs, i32 4, <8 x i1> %m)
7543 define void @mscatter_baseidx_sext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m) {
7544 ; RV32-LABEL: mscatter_baseidx_sext_v8i16_v8f32:
7546 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
7547 ; RV32-NEXT: vsext.vf2 v12, v10
7548 ; RV32-NEXT: vsll.vi v10, v12, 2
7549 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
7552 ; RV64-LABEL: mscatter_baseidx_sext_v8i16_v8f32:
7554 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
7555 ; RV64-NEXT: vsext.vf4 v12, v10
7556 ; RV64-NEXT: vsll.vi v12, v12, 2
7557 ; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
7558 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
7561 ; RV64ZVE32F-LABEL: mscatter_baseidx_sext_v8i16_v8f32:
7562 ; RV64ZVE32F: # %bb.0:
7563 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
7564 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
7565 ; RV64ZVE32F-NEXT: andi a2, a1, 1
7566 ; RV64ZVE32F-NEXT: beqz a2, .LBB72_2
7567 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
7568 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
7569 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
7570 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7571 ; RV64ZVE32F-NEXT: add a2, a0, a2
7572 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7573 ; RV64ZVE32F-NEXT: vse32.v v8, (a2)
7574 ; RV64ZVE32F-NEXT: .LBB72_2: # %else
7575 ; RV64ZVE32F-NEXT: andi a2, a1, 2
7576 ; RV64ZVE32F-NEXT: beqz a2, .LBB72_4
7577 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
7578 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
7579 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1
7580 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
7581 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7582 ; RV64ZVE32F-NEXT: add a2, a0, a2
7583 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
7584 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 1
7585 ; RV64ZVE32F-NEXT: vse32.v v11, (a2)
7586 ; RV64ZVE32F-NEXT: .LBB72_4: # %else2
7587 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
7588 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4
7589 ; RV64ZVE32F-NEXT: andi a2, a1, 4
7590 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
7591 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
7592 ; RV64ZVE32F-NEXT: bnez a2, .LBB72_12
7593 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
7594 ; RV64ZVE32F-NEXT: andi a2, a1, 8
7595 ; RV64ZVE32F-NEXT: bnez a2, .LBB72_13
7596 ; RV64ZVE32F-NEXT: .LBB72_6: # %else6
7597 ; RV64ZVE32F-NEXT: andi a2, a1, 16
7598 ; RV64ZVE32F-NEXT: bnez a2, .LBB72_14
7599 ; RV64ZVE32F-NEXT: .LBB72_7: # %else8
7600 ; RV64ZVE32F-NEXT: andi a2, a1, 32
7601 ; RV64ZVE32F-NEXT: beqz a2, .LBB72_9
7602 ; RV64ZVE32F-NEXT: .LBB72_8: # %cond.store9
7603 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
7604 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 1
7605 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
7606 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7607 ; RV64ZVE32F-NEXT: add a2, a0, a2
7608 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
7609 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5
7610 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7611 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
7612 ; RV64ZVE32F-NEXT: .LBB72_9: # %else10
7613 ; RV64ZVE32F-NEXT: andi a2, a1, 64
7614 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
7615 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2
7616 ; RV64ZVE32F-NEXT: bnez a2, .LBB72_15
7617 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
7618 ; RV64ZVE32F-NEXT: andi a1, a1, -128
7619 ; RV64ZVE32F-NEXT: bnez a1, .LBB72_16
7620 ; RV64ZVE32F-NEXT: .LBB72_11: # %else14
7621 ; RV64ZVE32F-NEXT: ret
7622 ; RV64ZVE32F-NEXT: .LBB72_12: # %cond.store3
7623 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
7624 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7625 ; RV64ZVE32F-NEXT: add a2, a0, a2
7626 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
7627 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2
7628 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7629 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
7630 ; RV64ZVE32F-NEXT: andi a2, a1, 8
7631 ; RV64ZVE32F-NEXT: beqz a2, .LBB72_6
7632 ; RV64ZVE32F-NEXT: .LBB72_13: # %cond.store5
7633 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
7634 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
7635 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
7636 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7637 ; RV64ZVE32F-NEXT: add a2, a0, a2
7638 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
7639 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
7640 ; RV64ZVE32F-NEXT: vse32.v v10, (a2)
7641 ; RV64ZVE32F-NEXT: andi a2, a1, 16
7642 ; RV64ZVE32F-NEXT: beqz a2, .LBB72_7
7643 ; RV64ZVE32F-NEXT: .LBB72_14: # %cond.store7
7644 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
7645 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
7646 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7647 ; RV64ZVE32F-NEXT: add a2, a0, a2
7648 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
7649 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
7650 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7651 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
7652 ; RV64ZVE32F-NEXT: andi a2, a1, 32
7653 ; RV64ZVE32F-NEXT: bnez a2, .LBB72_8
7654 ; RV64ZVE32F-NEXT: j .LBB72_9
7655 ; RV64ZVE32F-NEXT: .LBB72_15: # %cond.store11
7656 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
7657 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7658 ; RV64ZVE32F-NEXT: add a2, a0, a2
7659 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
7660 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6
7661 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7662 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
7663 ; RV64ZVE32F-NEXT: andi a1, a1, -128
7664 ; RV64ZVE32F-NEXT: beqz a1, .LBB72_11
7665 ; RV64ZVE32F-NEXT: .LBB72_16: # %cond.store13
7666 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
7667 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
7668 ; RV64ZVE32F-NEXT: vmv.x.s a1, v10
7669 ; RV64ZVE32F-NEXT: slli a1, a1, 2
7670 ; RV64ZVE32F-NEXT: add a0, a0, a1
7671 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
7672 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
7673 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7674 ; RV64ZVE32F-NEXT: vse32.v v8, (a0)
7675 ; RV64ZVE32F-NEXT: ret
7676 %eidxs = sext <8 x i16> %idxs to <8 x i32>
7677 %ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %eidxs
7678 call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> %val, <8 x ptr> %ptrs, i32 4, <8 x i1> %m)
7682 define void @mscatter_baseidx_zext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m) {
7683 ; RV32-LABEL: mscatter_baseidx_zext_v8i16_v8f32:
7685 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
7686 ; RV32-NEXT: vzext.vf2 v12, v10
7687 ; RV32-NEXT: vsll.vi v10, v12, 2
7688 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
7691 ; RV64-LABEL: mscatter_baseidx_zext_v8i16_v8f32:
7693 ; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma
7694 ; RV64-NEXT: vzext.vf2 v12, v10
7695 ; RV64-NEXT: vsll.vi v10, v12, 2
7696 ; RV64-NEXT: vsoxei32.v v8, (a0), v10, v0.t
7699 ; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i16_v8f32:
7700 ; RV64ZVE32F: # %bb.0:
7701 ; RV64ZVE32F-NEXT: lui a1, 16
7702 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
7703 ; RV64ZVE32F-NEXT: vmv.x.s a2, v0
7704 ; RV64ZVE32F-NEXT: andi a3, a2, 1
7705 ; RV64ZVE32F-NEXT: addiw a1, a1, -1
7706 ; RV64ZVE32F-NEXT: beqz a3, .LBB73_2
7707 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
7708 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
7709 ; RV64ZVE32F-NEXT: vmv.x.s a3, v10
7710 ; RV64ZVE32F-NEXT: and a3, a3, a1
7711 ; RV64ZVE32F-NEXT: slli a3, a3, 2
7712 ; RV64ZVE32F-NEXT: add a3, a0, a3
7713 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7714 ; RV64ZVE32F-NEXT: vse32.v v8, (a3)
7715 ; RV64ZVE32F-NEXT: .LBB73_2: # %else
7716 ; RV64ZVE32F-NEXT: andi a3, a2, 2
7717 ; RV64ZVE32F-NEXT: beqz a3, .LBB73_4
7718 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
7719 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
7720 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1
7721 ; RV64ZVE32F-NEXT: vmv.x.s a3, v11
7722 ; RV64ZVE32F-NEXT: and a3, a3, a1
7723 ; RV64ZVE32F-NEXT: slli a3, a3, 2
7724 ; RV64ZVE32F-NEXT: add a3, a0, a3
7725 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
7726 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 1
7727 ; RV64ZVE32F-NEXT: vse32.v v11, (a3)
7728 ; RV64ZVE32F-NEXT: .LBB73_4: # %else2
7729 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
7730 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4
7731 ; RV64ZVE32F-NEXT: andi a3, a2, 4
7732 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
7733 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
7734 ; RV64ZVE32F-NEXT: bnez a3, .LBB73_12
7735 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
7736 ; RV64ZVE32F-NEXT: andi a3, a2, 8
7737 ; RV64ZVE32F-NEXT: bnez a3, .LBB73_13
7738 ; RV64ZVE32F-NEXT: .LBB73_6: # %else6
7739 ; RV64ZVE32F-NEXT: andi a3, a2, 16
7740 ; RV64ZVE32F-NEXT: bnez a3, .LBB73_14
7741 ; RV64ZVE32F-NEXT: .LBB73_7: # %else8
7742 ; RV64ZVE32F-NEXT: andi a3, a2, 32
7743 ; RV64ZVE32F-NEXT: beqz a3, .LBB73_9
7744 ; RV64ZVE32F-NEXT: .LBB73_8: # %cond.store9
7745 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
7746 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 1
7747 ; RV64ZVE32F-NEXT: vmv.x.s a3, v10
7748 ; RV64ZVE32F-NEXT: and a3, a3, a1
7749 ; RV64ZVE32F-NEXT: slli a3, a3, 2
7750 ; RV64ZVE32F-NEXT: add a3, a0, a3
7751 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
7752 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5
7753 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7754 ; RV64ZVE32F-NEXT: vse32.v v12, (a3)
7755 ; RV64ZVE32F-NEXT: .LBB73_9: # %else10
7756 ; RV64ZVE32F-NEXT: andi a3, a2, 64
7757 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
7758 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2
7759 ; RV64ZVE32F-NEXT: bnez a3, .LBB73_15
7760 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
7761 ; RV64ZVE32F-NEXT: andi a2, a2, -128
7762 ; RV64ZVE32F-NEXT: bnez a2, .LBB73_16
7763 ; RV64ZVE32F-NEXT: .LBB73_11: # %else14
7764 ; RV64ZVE32F-NEXT: ret
7765 ; RV64ZVE32F-NEXT: .LBB73_12: # %cond.store3
7766 ; RV64ZVE32F-NEXT: vmv.x.s a3, v10
7767 ; RV64ZVE32F-NEXT: and a3, a3, a1
7768 ; RV64ZVE32F-NEXT: slli a3, a3, 2
7769 ; RV64ZVE32F-NEXT: add a3, a0, a3
7770 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
7771 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2
7772 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7773 ; RV64ZVE32F-NEXT: vse32.v v12, (a3)
7774 ; RV64ZVE32F-NEXT: andi a3, a2, 8
7775 ; RV64ZVE32F-NEXT: beqz a3, .LBB73_6
7776 ; RV64ZVE32F-NEXT: .LBB73_13: # %cond.store5
7777 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
7778 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
7779 ; RV64ZVE32F-NEXT: vmv.x.s a3, v10
7780 ; RV64ZVE32F-NEXT: and a3, a3, a1
7781 ; RV64ZVE32F-NEXT: slli a3, a3, 2
7782 ; RV64ZVE32F-NEXT: add a3, a0, a3
7783 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
7784 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
7785 ; RV64ZVE32F-NEXT: vse32.v v10, (a3)
7786 ; RV64ZVE32F-NEXT: andi a3, a2, 16
7787 ; RV64ZVE32F-NEXT: beqz a3, .LBB73_7
7788 ; RV64ZVE32F-NEXT: .LBB73_14: # %cond.store7
7789 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
7790 ; RV64ZVE32F-NEXT: vmv.x.s a3, v11
7791 ; RV64ZVE32F-NEXT: and a3, a3, a1
7792 ; RV64ZVE32F-NEXT: slli a3, a3, 2
7793 ; RV64ZVE32F-NEXT: add a3, a0, a3
7794 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
7795 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
7796 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7797 ; RV64ZVE32F-NEXT: vse32.v v12, (a3)
7798 ; RV64ZVE32F-NEXT: andi a3, a2, 32
7799 ; RV64ZVE32F-NEXT: bnez a3, .LBB73_8
7800 ; RV64ZVE32F-NEXT: j .LBB73_9
7801 ; RV64ZVE32F-NEXT: .LBB73_15: # %cond.store11
7802 ; RV64ZVE32F-NEXT: vmv.x.s a3, v10
7803 ; RV64ZVE32F-NEXT: and a3, a3, a1
7804 ; RV64ZVE32F-NEXT: slli a3, a3, 2
7805 ; RV64ZVE32F-NEXT: add a3, a0, a3
7806 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
7807 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6
7808 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7809 ; RV64ZVE32F-NEXT: vse32.v v12, (a3)
7810 ; RV64ZVE32F-NEXT: andi a2, a2, -128
7811 ; RV64ZVE32F-NEXT: beqz a2, .LBB73_11
7812 ; RV64ZVE32F-NEXT: .LBB73_16: # %cond.store13
7813 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
7814 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
7815 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
7816 ; RV64ZVE32F-NEXT: and a1, a2, a1
7817 ; RV64ZVE32F-NEXT: slli a1, a1, 2
7818 ; RV64ZVE32F-NEXT: add a0, a0, a1
7819 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
7820 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
7821 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7822 ; RV64ZVE32F-NEXT: vse32.v v8, (a0)
7823 ; RV64ZVE32F-NEXT: ret
7824 %eidxs = zext <8 x i16> %idxs to <8 x i32>
7825 %ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %eidxs
7826 call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> %val, <8 x ptr> %ptrs, i32 4, <8 x i1> %m)
7830 define void @mscatter_baseidx_v8f32(<8 x float> %val, ptr %base, <8 x i32> %idxs, <8 x i1> %m) {
7831 ; RV32-LABEL: mscatter_baseidx_v8f32:
7833 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
7834 ; RV32-NEXT: vsll.vi v10, v10, 2
7835 ; RV32-NEXT: vsoxei32.v v8, (a0), v10, v0.t
7838 ; RV64-LABEL: mscatter_baseidx_v8f32:
7840 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
7841 ; RV64-NEXT: vsext.vf2 v12, v10
7842 ; RV64-NEXT: vsll.vi v12, v12, 2
7843 ; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
7844 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
7847 ; RV64ZVE32F-LABEL: mscatter_baseidx_v8f32:
7848 ; RV64ZVE32F: # %bb.0:
7849 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
7850 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
7851 ; RV64ZVE32F-NEXT: andi a2, a1, 1
7852 ; RV64ZVE32F-NEXT: beqz a2, .LBB74_2
7853 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
7854 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7855 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
7856 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7857 ; RV64ZVE32F-NEXT: add a2, a0, a2
7858 ; RV64ZVE32F-NEXT: vse32.v v8, (a2)
7859 ; RV64ZVE32F-NEXT: .LBB74_2: # %else
7860 ; RV64ZVE32F-NEXT: andi a2, a1, 2
7861 ; RV64ZVE32F-NEXT: beqz a2, .LBB74_4
7862 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
7863 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7864 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v10, 1
7865 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12
7866 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7867 ; RV64ZVE32F-NEXT: add a2, a0, a2
7868 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 1
7869 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
7870 ; RV64ZVE32F-NEXT: .LBB74_4: # %else2
7871 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
7872 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v10, 4
7873 ; RV64ZVE32F-NEXT: andi a2, a1, 4
7874 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
7875 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
7876 ; RV64ZVE32F-NEXT: bnez a2, .LBB74_12
7877 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
7878 ; RV64ZVE32F-NEXT: andi a2, a1, 8
7879 ; RV64ZVE32F-NEXT: bnez a2, .LBB74_13
7880 ; RV64ZVE32F-NEXT: .LBB74_6: # %else6
7881 ; RV64ZVE32F-NEXT: andi a2, a1, 16
7882 ; RV64ZVE32F-NEXT: bnez a2, .LBB74_14
7883 ; RV64ZVE32F-NEXT: .LBB74_7: # %else8
7884 ; RV64ZVE32F-NEXT: andi a2, a1, 32
7885 ; RV64ZVE32F-NEXT: beqz a2, .LBB74_9
7886 ; RV64ZVE32F-NEXT: .LBB74_8: # %cond.store9
7887 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7888 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v12, 1
7889 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
7890 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7891 ; RV64ZVE32F-NEXT: add a2, a0, a2
7892 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
7893 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 5
7894 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7895 ; RV64ZVE32F-NEXT: vse32.v v10, (a2)
7896 ; RV64ZVE32F-NEXT: .LBB74_9: # %else10
7897 ; RV64ZVE32F-NEXT: andi a2, a1, 64
7898 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
7899 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v12, 2
7900 ; RV64ZVE32F-NEXT: bnez a2, .LBB74_15
7901 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
7902 ; RV64ZVE32F-NEXT: andi a1, a1, -128
7903 ; RV64ZVE32F-NEXT: bnez a1, .LBB74_16
7904 ; RV64ZVE32F-NEXT: .LBB74_11: # %else14
7905 ; RV64ZVE32F-NEXT: ret
7906 ; RV64ZVE32F-NEXT: .LBB74_12: # %cond.store3
7907 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
7908 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7909 ; RV64ZVE32F-NEXT: add a2, a0, a2
7910 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2
7911 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7912 ; RV64ZVE32F-NEXT: vse32.v v11, (a2)
7913 ; RV64ZVE32F-NEXT: andi a2, a1, 8
7914 ; RV64ZVE32F-NEXT: beqz a2, .LBB74_6
7915 ; RV64ZVE32F-NEXT: .LBB74_13: # %cond.store5
7916 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7917 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
7918 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
7919 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7920 ; RV64ZVE32F-NEXT: add a2, a0, a2
7921 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
7922 ; RV64ZVE32F-NEXT: vse32.v v10, (a2)
7923 ; RV64ZVE32F-NEXT: andi a2, a1, 16
7924 ; RV64ZVE32F-NEXT: beqz a2, .LBB74_7
7925 ; RV64ZVE32F-NEXT: .LBB74_14: # %cond.store7
7926 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
7927 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12
7928 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7929 ; RV64ZVE32F-NEXT: add a2, a0, a2
7930 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
7931 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7932 ; RV64ZVE32F-NEXT: vse32.v v10, (a2)
7933 ; RV64ZVE32F-NEXT: andi a2, a1, 32
7934 ; RV64ZVE32F-NEXT: bnez a2, .LBB74_8
7935 ; RV64ZVE32F-NEXT: j .LBB74_9
7936 ; RV64ZVE32F-NEXT: .LBB74_15: # %cond.store11
7937 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
7938 ; RV64ZVE32F-NEXT: slli a2, a2, 2
7939 ; RV64ZVE32F-NEXT: add a2, a0, a2
7940 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
7941 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6
7942 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7943 ; RV64ZVE32F-NEXT: vse32.v v12, (a2)
7944 ; RV64ZVE32F-NEXT: andi a1, a1, -128
7945 ; RV64ZVE32F-NEXT: beqz a1, .LBB74_11
7946 ; RV64ZVE32F-NEXT: .LBB74_16: # %cond.store13
7947 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7948 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
7949 ; RV64ZVE32F-NEXT: vmv.x.s a1, v10
7950 ; RV64ZVE32F-NEXT: slli a1, a1, 2
7951 ; RV64ZVE32F-NEXT: add a0, a0, a1
7952 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
7953 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
7954 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
7955 ; RV64ZVE32F-NEXT: vse32.v v8, (a0)
7956 ; RV64ZVE32F-NEXT: ret
7957 %ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %idxs
7958 call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> %val, <8 x ptr> %ptrs, i32 4, <8 x i1> %m)
7962 declare void @llvm.masked.scatter.v1f64.v1p0(<1 x double>, <1 x ptr>, i32, <1 x i1>)
7964 define void @mscatter_v1f64(<1 x double> %val, <1 x ptr> %ptrs, <1 x i1> %m) {
7965 ; RV32V-LABEL: mscatter_v1f64:
7967 ; RV32V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
7968 ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t
7971 ; RV64-LABEL: mscatter_v1f64:
7973 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
7974 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
7977 ; RV32ZVE32F-LABEL: mscatter_v1f64:
7978 ; RV32ZVE32F: # %bb.0:
7979 ; RV32ZVE32F-NEXT: vsetvli a0, zero, e8, mf4, ta, ma
7980 ; RV32ZVE32F-NEXT: vfirst.m a0, v0
7981 ; RV32ZVE32F-NEXT: bnez a0, .LBB75_2
7982 ; RV32ZVE32F-NEXT: # %bb.1: # %cond.store
7983 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
7984 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
7985 ; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
7986 ; RV32ZVE32F-NEXT: .LBB75_2: # %else
7987 ; RV32ZVE32F-NEXT: ret
7989 ; RV64ZVE32F-LABEL: mscatter_v1f64:
7990 ; RV64ZVE32F: # %bb.0:
7991 ; RV64ZVE32F-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
7992 ; RV64ZVE32F-NEXT: vfirst.m a1, v0
7993 ; RV64ZVE32F-NEXT: bnez a1, .LBB75_2
7994 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
7995 ; RV64ZVE32F-NEXT: fsd fa0, 0(a0)
7996 ; RV64ZVE32F-NEXT: .LBB75_2: # %else
7997 ; RV64ZVE32F-NEXT: ret
7998 call void @llvm.masked.scatter.v1f64.v1p0(<1 x double> %val, <1 x ptr> %ptrs, i32 8, <1 x i1> %m)
8002 declare void @llvm.masked.scatter.v2f64.v2p0(<2 x double>, <2 x ptr>, i32, <2 x i1>)
8004 define void @mscatter_v2f64(<2 x double> %val, <2 x ptr> %ptrs, <2 x i1> %m) {
8005 ; RV32V-LABEL: mscatter_v2f64:
8007 ; RV32V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
8008 ; RV32V-NEXT: vsoxei32.v v8, (zero), v9, v0.t
8011 ; RV64-LABEL: mscatter_v2f64:
8013 ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
8014 ; RV64-NEXT: vsoxei64.v v8, (zero), v9, v0.t
8017 ; RV32ZVE32F-LABEL: mscatter_v2f64:
8018 ; RV32ZVE32F: # %bb.0:
8019 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
8020 ; RV32ZVE32F-NEXT: vmv.x.s a0, v0
8021 ; RV32ZVE32F-NEXT: andi a1, a0, 1
8022 ; RV32ZVE32F-NEXT: bnez a1, .LBB76_3
8023 ; RV32ZVE32F-NEXT: # %bb.1: # %else
8024 ; RV32ZVE32F-NEXT: andi a0, a0, 2
8025 ; RV32ZVE32F-NEXT: bnez a0, .LBB76_4
8026 ; RV32ZVE32F-NEXT: .LBB76_2: # %else2
8027 ; RV32ZVE32F-NEXT: ret
8028 ; RV32ZVE32F-NEXT: .LBB76_3: # %cond.store
8029 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
8030 ; RV32ZVE32F-NEXT: vmv.x.s a1, v8
8031 ; RV32ZVE32F-NEXT: fsd fa0, 0(a1)
8032 ; RV32ZVE32F-NEXT: andi a0, a0, 2
8033 ; RV32ZVE32F-NEXT: beqz a0, .LBB76_2
8034 ; RV32ZVE32F-NEXT: .LBB76_4: # %cond.store1
8035 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
8036 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 1
8037 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
8038 ; RV32ZVE32F-NEXT: fsd fa1, 0(a0)
8039 ; RV32ZVE32F-NEXT: ret
8041 ; RV64ZVE32F-LABEL: mscatter_v2f64:
8042 ; RV64ZVE32F: # %bb.0:
8043 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
8044 ; RV64ZVE32F-NEXT: vmv.x.s a2, v0
8045 ; RV64ZVE32F-NEXT: andi a3, a2, 1
8046 ; RV64ZVE32F-NEXT: bnez a3, .LBB76_3
8047 ; RV64ZVE32F-NEXT: # %bb.1: # %else
8048 ; RV64ZVE32F-NEXT: andi a2, a2, 2
8049 ; RV64ZVE32F-NEXT: bnez a2, .LBB76_4
8050 ; RV64ZVE32F-NEXT: .LBB76_2: # %else2
8051 ; RV64ZVE32F-NEXT: ret
8052 ; RV64ZVE32F-NEXT: .LBB76_3: # %cond.store
8053 ; RV64ZVE32F-NEXT: fsd fa0, 0(a0)
8054 ; RV64ZVE32F-NEXT: andi a2, a2, 2
8055 ; RV64ZVE32F-NEXT: beqz a2, .LBB76_2
8056 ; RV64ZVE32F-NEXT: .LBB76_4: # %cond.store1
8057 ; RV64ZVE32F-NEXT: fsd fa1, 0(a1)
8058 ; RV64ZVE32F-NEXT: ret
8059 call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> %val, <2 x ptr> %ptrs, i32 8, <2 x i1> %m)
8063 declare void @llvm.masked.scatter.v4f64.v4p0(<4 x double>, <4 x ptr>, i32, <4 x i1>)
8065 define void @mscatter_v4f64(<4 x double> %val, <4 x ptr> %ptrs, <4 x i1> %m) {
8066 ; RV32V-LABEL: mscatter_v4f64:
8068 ; RV32V-NEXT: vsetivli zero, 4, e64, m2, ta, ma
8069 ; RV32V-NEXT: vsoxei32.v v8, (zero), v10, v0.t
8072 ; RV64-LABEL: mscatter_v4f64:
8074 ; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
8075 ; RV64-NEXT: vsoxei64.v v8, (zero), v10, v0.t
8078 ; RV32ZVE32F-LABEL: mscatter_v4f64:
8079 ; RV32ZVE32F: # %bb.0:
8080 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
8081 ; RV32ZVE32F-NEXT: vmv.x.s a0, v0
8082 ; RV32ZVE32F-NEXT: andi a1, a0, 1
8083 ; RV32ZVE32F-NEXT: bnez a1, .LBB77_5
8084 ; RV32ZVE32F-NEXT: # %bb.1: # %else
8085 ; RV32ZVE32F-NEXT: andi a1, a0, 2
8086 ; RV32ZVE32F-NEXT: bnez a1, .LBB77_6
8087 ; RV32ZVE32F-NEXT: .LBB77_2: # %else2
8088 ; RV32ZVE32F-NEXT: andi a1, a0, 4
8089 ; RV32ZVE32F-NEXT: bnez a1, .LBB77_7
8090 ; RV32ZVE32F-NEXT: .LBB77_3: # %else4
8091 ; RV32ZVE32F-NEXT: andi a0, a0, 8
8092 ; RV32ZVE32F-NEXT: bnez a0, .LBB77_8
8093 ; RV32ZVE32F-NEXT: .LBB77_4: # %else6
8094 ; RV32ZVE32F-NEXT: ret
8095 ; RV32ZVE32F-NEXT: .LBB77_5: # %cond.store
8096 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
8097 ; RV32ZVE32F-NEXT: vmv.x.s a1, v8
8098 ; RV32ZVE32F-NEXT: fsd fa0, 0(a1)
8099 ; RV32ZVE32F-NEXT: andi a1, a0, 2
8100 ; RV32ZVE32F-NEXT: beqz a1, .LBB77_2
8101 ; RV32ZVE32F-NEXT: .LBB77_6: # %cond.store1
8102 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
8103 ; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 1
8104 ; RV32ZVE32F-NEXT: vmv.x.s a1, v9
8105 ; RV32ZVE32F-NEXT: fsd fa1, 0(a1)
8106 ; RV32ZVE32F-NEXT: andi a1, a0, 4
8107 ; RV32ZVE32F-NEXT: beqz a1, .LBB77_3
8108 ; RV32ZVE32F-NEXT: .LBB77_7: # %cond.store3
8109 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
8110 ; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 2
8111 ; RV32ZVE32F-NEXT: vmv.x.s a1, v9
8112 ; RV32ZVE32F-NEXT: fsd fa2, 0(a1)
8113 ; RV32ZVE32F-NEXT: andi a0, a0, 8
8114 ; RV32ZVE32F-NEXT: beqz a0, .LBB77_4
8115 ; RV32ZVE32F-NEXT: .LBB77_8: # %cond.store5
8116 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
8117 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 3
8118 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
8119 ; RV32ZVE32F-NEXT: fsd fa3, 0(a0)
8120 ; RV32ZVE32F-NEXT: ret
8122 ; RV64ZVE32F-LABEL: mscatter_v4f64:
8123 ; RV64ZVE32F: # %bb.0:
8124 ; RV64ZVE32F-NEXT: ld a1, 24(a0)
8125 ; RV64ZVE32F-NEXT: ld a2, 16(a0)
8126 ; RV64ZVE32F-NEXT: ld a4, 8(a0)
8127 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
8128 ; RV64ZVE32F-NEXT: vmv.x.s a3, v0
8129 ; RV64ZVE32F-NEXT: andi a5, a3, 1
8130 ; RV64ZVE32F-NEXT: bnez a5, .LBB77_5
8131 ; RV64ZVE32F-NEXT: # %bb.1: # %else
8132 ; RV64ZVE32F-NEXT: andi a0, a3, 2
8133 ; RV64ZVE32F-NEXT: bnez a0, .LBB77_6
8134 ; RV64ZVE32F-NEXT: .LBB77_2: # %else2
8135 ; RV64ZVE32F-NEXT: andi a0, a3, 4
8136 ; RV64ZVE32F-NEXT: bnez a0, .LBB77_7
8137 ; RV64ZVE32F-NEXT: .LBB77_3: # %else4
8138 ; RV64ZVE32F-NEXT: andi a3, a3, 8
8139 ; RV64ZVE32F-NEXT: bnez a3, .LBB77_8
8140 ; RV64ZVE32F-NEXT: .LBB77_4: # %else6
8141 ; RV64ZVE32F-NEXT: ret
8142 ; RV64ZVE32F-NEXT: .LBB77_5: # %cond.store
8143 ; RV64ZVE32F-NEXT: ld a0, 0(a0)
8144 ; RV64ZVE32F-NEXT: fsd fa0, 0(a0)
8145 ; RV64ZVE32F-NEXT: andi a0, a3, 2
8146 ; RV64ZVE32F-NEXT: beqz a0, .LBB77_2
8147 ; RV64ZVE32F-NEXT: .LBB77_6: # %cond.store1
8148 ; RV64ZVE32F-NEXT: fsd fa1, 0(a4)
8149 ; RV64ZVE32F-NEXT: andi a0, a3, 4
8150 ; RV64ZVE32F-NEXT: beqz a0, .LBB77_3
8151 ; RV64ZVE32F-NEXT: .LBB77_7: # %cond.store3
8152 ; RV64ZVE32F-NEXT: fsd fa2, 0(a2)
8153 ; RV64ZVE32F-NEXT: andi a3, a3, 8
8154 ; RV64ZVE32F-NEXT: beqz a3, .LBB77_4
8155 ; RV64ZVE32F-NEXT: .LBB77_8: # %cond.store5
8156 ; RV64ZVE32F-NEXT: fsd fa3, 0(a1)
8157 ; RV64ZVE32F-NEXT: ret
8158 call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> %val, <4 x ptr> %ptrs, i32 8, <4 x i1> %m)
8162 define void @mscatter_truemask_v4f64(<4 x double> %val, <4 x ptr> %ptrs) {
8163 ; RV32V-LABEL: mscatter_truemask_v4f64:
8165 ; RV32V-NEXT: vsetivli zero, 4, e64, m2, ta, ma
8166 ; RV32V-NEXT: vsoxei32.v v8, (zero), v10
8169 ; RV64-LABEL: mscatter_truemask_v4f64:
8171 ; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
8172 ; RV64-NEXT: vsoxei64.v v8, (zero), v10
8175 ; RV32ZVE32F-LABEL: mscatter_truemask_v4f64:
8176 ; RV32ZVE32F: # %bb.0:
8177 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
8178 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
8179 ; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
8180 ; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 1
8181 ; RV32ZVE32F-NEXT: vmv.x.s a0, v9
8182 ; RV32ZVE32F-NEXT: fsd fa1, 0(a0)
8183 ; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 2
8184 ; RV32ZVE32F-NEXT: vmv.x.s a0, v9
8185 ; RV32ZVE32F-NEXT: fsd fa2, 0(a0)
8186 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 3
8187 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
8188 ; RV32ZVE32F-NEXT: fsd fa3, 0(a0)
8189 ; RV32ZVE32F-NEXT: ret
8191 ; RV64ZVE32F-LABEL: mscatter_truemask_v4f64:
8192 ; RV64ZVE32F: # %bb.0:
8193 ; RV64ZVE32F-NEXT: ld a1, 0(a0)
8194 ; RV64ZVE32F-NEXT: ld a2, 8(a0)
8195 ; RV64ZVE32F-NEXT: ld a3, 16(a0)
8196 ; RV64ZVE32F-NEXT: ld a0, 24(a0)
8197 ; RV64ZVE32F-NEXT: fsd fa0, 0(a1)
8198 ; RV64ZVE32F-NEXT: fsd fa1, 0(a2)
8199 ; RV64ZVE32F-NEXT: fsd fa2, 0(a3)
8200 ; RV64ZVE32F-NEXT: fsd fa3, 0(a0)
8201 ; RV64ZVE32F-NEXT: ret
8202 call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> %val, <4 x ptr> %ptrs, i32 8, <4 x i1> splat (i1 1))
8206 define void @mscatter_falsemask_v4f64(<4 x double> %val, <4 x ptr> %ptrs) {
8207 ; CHECK-LABEL: mscatter_falsemask_v4f64:
8210 call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> %val, <4 x ptr> %ptrs, i32 8, <4 x i1> zeroinitializer)
8214 declare void @llvm.masked.scatter.v8f64.v8p0(<8 x double>, <8 x ptr>, i32, <8 x i1>)
8216 define void @mscatter_v8f64(<8 x double> %val, <8 x ptr> %ptrs, <8 x i1> %m) {
8217 ; RV32V-LABEL: mscatter_v8f64:
8219 ; RV32V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
8220 ; RV32V-NEXT: vsoxei32.v v8, (zero), v12, v0.t
8223 ; RV64-LABEL: mscatter_v8f64:
8225 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
8226 ; RV64-NEXT: vsoxei64.v v8, (zero), v12, v0.t
8229 ; RV32ZVE32F-LABEL: mscatter_v8f64:
8230 ; RV32ZVE32F: # %bb.0:
8231 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
8232 ; RV32ZVE32F-NEXT: vmv.x.s a0, v0
8233 ; RV32ZVE32F-NEXT: andi a1, a0, 1
8234 ; RV32ZVE32F-NEXT: bnez a1, .LBB80_9
8235 ; RV32ZVE32F-NEXT: # %bb.1: # %else
8236 ; RV32ZVE32F-NEXT: andi a1, a0, 2
8237 ; RV32ZVE32F-NEXT: bnez a1, .LBB80_10
8238 ; RV32ZVE32F-NEXT: .LBB80_2: # %else2
8239 ; RV32ZVE32F-NEXT: andi a1, a0, 4
8240 ; RV32ZVE32F-NEXT: bnez a1, .LBB80_11
8241 ; RV32ZVE32F-NEXT: .LBB80_3: # %else4
8242 ; RV32ZVE32F-NEXT: andi a1, a0, 8
8243 ; RV32ZVE32F-NEXT: bnez a1, .LBB80_12
8244 ; RV32ZVE32F-NEXT: .LBB80_4: # %else6
8245 ; RV32ZVE32F-NEXT: andi a1, a0, 16
8246 ; RV32ZVE32F-NEXT: bnez a1, .LBB80_13
8247 ; RV32ZVE32F-NEXT: .LBB80_5: # %else8
8248 ; RV32ZVE32F-NEXT: andi a1, a0, 32
8249 ; RV32ZVE32F-NEXT: bnez a1, .LBB80_14
8250 ; RV32ZVE32F-NEXT: .LBB80_6: # %else10
8251 ; RV32ZVE32F-NEXT: andi a1, a0, 64
8252 ; RV32ZVE32F-NEXT: bnez a1, .LBB80_15
8253 ; RV32ZVE32F-NEXT: .LBB80_7: # %else12
8254 ; RV32ZVE32F-NEXT: andi a0, a0, -128
8255 ; RV32ZVE32F-NEXT: bnez a0, .LBB80_16
8256 ; RV32ZVE32F-NEXT: .LBB80_8: # %else14
8257 ; RV32ZVE32F-NEXT: ret
8258 ; RV32ZVE32F-NEXT: .LBB80_9: # %cond.store
8259 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
8260 ; RV32ZVE32F-NEXT: vmv.x.s a1, v8
8261 ; RV32ZVE32F-NEXT: fsd fa0, 0(a1)
8262 ; RV32ZVE32F-NEXT: andi a1, a0, 2
8263 ; RV32ZVE32F-NEXT: beqz a1, .LBB80_2
8264 ; RV32ZVE32F-NEXT: .LBB80_10: # %cond.store1
8265 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
8266 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
8267 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
8268 ; RV32ZVE32F-NEXT: fsd fa1, 0(a1)
8269 ; RV32ZVE32F-NEXT: andi a1, a0, 4
8270 ; RV32ZVE32F-NEXT: beqz a1, .LBB80_3
8271 ; RV32ZVE32F-NEXT: .LBB80_11: # %cond.store3
8272 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
8273 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
8274 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
8275 ; RV32ZVE32F-NEXT: fsd fa2, 0(a1)
8276 ; RV32ZVE32F-NEXT: andi a1, a0, 8
8277 ; RV32ZVE32F-NEXT: beqz a1, .LBB80_4
8278 ; RV32ZVE32F-NEXT: .LBB80_12: # %cond.store5
8279 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
8280 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
8281 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
8282 ; RV32ZVE32F-NEXT: fsd fa3, 0(a1)
8283 ; RV32ZVE32F-NEXT: andi a1, a0, 16
8284 ; RV32ZVE32F-NEXT: beqz a1, .LBB80_5
8285 ; RV32ZVE32F-NEXT: .LBB80_13: # %cond.store7
8286 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
8287 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
8288 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
8289 ; RV32ZVE32F-NEXT: fsd fa4, 0(a1)
8290 ; RV32ZVE32F-NEXT: andi a1, a0, 32
8291 ; RV32ZVE32F-NEXT: beqz a1, .LBB80_6
8292 ; RV32ZVE32F-NEXT: .LBB80_14: # %cond.store9
8293 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
8294 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
8295 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
8296 ; RV32ZVE32F-NEXT: fsd fa5, 0(a1)
8297 ; RV32ZVE32F-NEXT: andi a1, a0, 64
8298 ; RV32ZVE32F-NEXT: beqz a1, .LBB80_7
8299 ; RV32ZVE32F-NEXT: .LBB80_15: # %cond.store11
8300 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
8301 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
8302 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
8303 ; RV32ZVE32F-NEXT: fsd fa6, 0(a1)
8304 ; RV32ZVE32F-NEXT: andi a0, a0, -128
8305 ; RV32ZVE32F-NEXT: beqz a0, .LBB80_8
8306 ; RV32ZVE32F-NEXT: .LBB80_16: # %cond.store13
8307 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
8308 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
8309 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
8310 ; RV32ZVE32F-NEXT: fsd fa7, 0(a0)
8311 ; RV32ZVE32F-NEXT: ret
8313 ; RV64ZVE32F-LABEL: mscatter_v8f64:
8314 ; RV64ZVE32F: # %bb.0:
8315 ; RV64ZVE32F-NEXT: ld a1, 56(a0)
8316 ; RV64ZVE32F-NEXT: ld a2, 48(a0)
8317 ; RV64ZVE32F-NEXT: ld a4, 40(a0)
8318 ; RV64ZVE32F-NEXT: ld a5, 32(a0)
8319 ; RV64ZVE32F-NEXT: ld a6, 24(a0)
8320 ; RV64ZVE32F-NEXT: ld a7, 16(a0)
8321 ; RV64ZVE32F-NEXT: ld t0, 8(a0)
8322 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
8323 ; RV64ZVE32F-NEXT: vmv.x.s a3, v0
8324 ; RV64ZVE32F-NEXT: andi t1, a3, 1
8325 ; RV64ZVE32F-NEXT: bnez t1, .LBB80_9
8326 ; RV64ZVE32F-NEXT: # %bb.1: # %else
8327 ; RV64ZVE32F-NEXT: andi a0, a3, 2
8328 ; RV64ZVE32F-NEXT: bnez a0, .LBB80_10
8329 ; RV64ZVE32F-NEXT: .LBB80_2: # %else2
8330 ; RV64ZVE32F-NEXT: andi a0, a3, 4
8331 ; RV64ZVE32F-NEXT: bnez a0, .LBB80_11
8332 ; RV64ZVE32F-NEXT: .LBB80_3: # %else4
8333 ; RV64ZVE32F-NEXT: andi a0, a3, 8
8334 ; RV64ZVE32F-NEXT: bnez a0, .LBB80_12
8335 ; RV64ZVE32F-NEXT: .LBB80_4: # %else6
8336 ; RV64ZVE32F-NEXT: andi a0, a3, 16
8337 ; RV64ZVE32F-NEXT: bnez a0, .LBB80_13
8338 ; RV64ZVE32F-NEXT: .LBB80_5: # %else8
8339 ; RV64ZVE32F-NEXT: andi a0, a3, 32
8340 ; RV64ZVE32F-NEXT: bnez a0, .LBB80_14
8341 ; RV64ZVE32F-NEXT: .LBB80_6: # %else10
8342 ; RV64ZVE32F-NEXT: andi a0, a3, 64
8343 ; RV64ZVE32F-NEXT: bnez a0, .LBB80_15
8344 ; RV64ZVE32F-NEXT: .LBB80_7: # %else12
8345 ; RV64ZVE32F-NEXT: andi a0, a3, -128
8346 ; RV64ZVE32F-NEXT: bnez a0, .LBB80_16
8347 ; RV64ZVE32F-NEXT: .LBB80_8: # %else14
8348 ; RV64ZVE32F-NEXT: ret
8349 ; RV64ZVE32F-NEXT: .LBB80_9: # %cond.store
8350 ; RV64ZVE32F-NEXT: ld a0, 0(a0)
8351 ; RV64ZVE32F-NEXT: fsd fa0, 0(a0)
8352 ; RV64ZVE32F-NEXT: andi a0, a3, 2
8353 ; RV64ZVE32F-NEXT: beqz a0, .LBB80_2
8354 ; RV64ZVE32F-NEXT: .LBB80_10: # %cond.store1
8355 ; RV64ZVE32F-NEXT: fsd fa1, 0(t0)
8356 ; RV64ZVE32F-NEXT: andi a0, a3, 4
8357 ; RV64ZVE32F-NEXT: beqz a0, .LBB80_3
8358 ; RV64ZVE32F-NEXT: .LBB80_11: # %cond.store3
8359 ; RV64ZVE32F-NEXT: fsd fa2, 0(a7)
8360 ; RV64ZVE32F-NEXT: andi a0, a3, 8
8361 ; RV64ZVE32F-NEXT: beqz a0, .LBB80_4
8362 ; RV64ZVE32F-NEXT: .LBB80_12: # %cond.store5
8363 ; RV64ZVE32F-NEXT: fsd fa3, 0(a6)
8364 ; RV64ZVE32F-NEXT: andi a0, a3, 16
8365 ; RV64ZVE32F-NEXT: beqz a0, .LBB80_5
8366 ; RV64ZVE32F-NEXT: .LBB80_13: # %cond.store7
8367 ; RV64ZVE32F-NEXT: fsd fa4, 0(a5)
8368 ; RV64ZVE32F-NEXT: andi a0, a3, 32
8369 ; RV64ZVE32F-NEXT: beqz a0, .LBB80_6
8370 ; RV64ZVE32F-NEXT: .LBB80_14: # %cond.store9
8371 ; RV64ZVE32F-NEXT: fsd fa5, 0(a4)
8372 ; RV64ZVE32F-NEXT: andi a0, a3, 64
8373 ; RV64ZVE32F-NEXT: beqz a0, .LBB80_7
8374 ; RV64ZVE32F-NEXT: .LBB80_15: # %cond.store11
8375 ; RV64ZVE32F-NEXT: fsd fa6, 0(a2)
8376 ; RV64ZVE32F-NEXT: andi a0, a3, -128
8377 ; RV64ZVE32F-NEXT: beqz a0, .LBB80_8
8378 ; RV64ZVE32F-NEXT: .LBB80_16: # %cond.store13
8379 ; RV64ZVE32F-NEXT: fsd fa7, 0(a1)
8380 ; RV64ZVE32F-NEXT: ret
8381 call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m)
8385 define void @mscatter_baseidx_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) {
8386 ; RV32V-LABEL: mscatter_baseidx_v8i8_v8f64:
8388 ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
8389 ; RV32V-NEXT: vsext.vf4 v14, v12
8390 ; RV32V-NEXT: vsll.vi v12, v14, 3
8391 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
8392 ; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t
8395 ; RV64-LABEL: mscatter_baseidx_v8i8_v8f64:
8397 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
8398 ; RV64-NEXT: vsext.vf8 v16, v12
8399 ; RV64-NEXT: vsll.vi v12, v16, 3
8400 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
8403 ; RV32ZVE32F-LABEL: mscatter_baseidx_v8i8_v8f64:
8404 ; RV32ZVE32F: # %bb.0:
8405 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
8406 ; RV32ZVE32F-NEXT: vsext.vf4 v10, v8
8407 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3
8408 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
8409 ; RV32ZVE32F-NEXT: vmv.x.s a1, v0
8410 ; RV32ZVE32F-NEXT: andi a2, a1, 1
8411 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
8412 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0
8413 ; RV32ZVE32F-NEXT: bnez a2, .LBB81_9
8414 ; RV32ZVE32F-NEXT: # %bb.1: # %else
8415 ; RV32ZVE32F-NEXT: andi a0, a1, 2
8416 ; RV32ZVE32F-NEXT: bnez a0, .LBB81_10
8417 ; RV32ZVE32F-NEXT: .LBB81_2: # %else2
8418 ; RV32ZVE32F-NEXT: andi a0, a1, 4
8419 ; RV32ZVE32F-NEXT: bnez a0, .LBB81_11
8420 ; RV32ZVE32F-NEXT: .LBB81_3: # %else4
8421 ; RV32ZVE32F-NEXT: andi a0, a1, 8
8422 ; RV32ZVE32F-NEXT: bnez a0, .LBB81_12
8423 ; RV32ZVE32F-NEXT: .LBB81_4: # %else6
8424 ; RV32ZVE32F-NEXT: andi a0, a1, 16
8425 ; RV32ZVE32F-NEXT: bnez a0, .LBB81_13
8426 ; RV32ZVE32F-NEXT: .LBB81_5: # %else8
8427 ; RV32ZVE32F-NEXT: andi a0, a1, 32
8428 ; RV32ZVE32F-NEXT: bnez a0, .LBB81_14
8429 ; RV32ZVE32F-NEXT: .LBB81_6: # %else10
8430 ; RV32ZVE32F-NEXT: andi a0, a1, 64
8431 ; RV32ZVE32F-NEXT: bnez a0, .LBB81_15
8432 ; RV32ZVE32F-NEXT: .LBB81_7: # %else12
8433 ; RV32ZVE32F-NEXT: andi a0, a1, -128
8434 ; RV32ZVE32F-NEXT: bnez a0, .LBB81_16
8435 ; RV32ZVE32F-NEXT: .LBB81_8: # %else14
8436 ; RV32ZVE32F-NEXT: ret
8437 ; RV32ZVE32F-NEXT: .LBB81_9: # %cond.store
8438 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
8439 ; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
8440 ; RV32ZVE32F-NEXT: andi a0, a1, 2
8441 ; RV32ZVE32F-NEXT: beqz a0, .LBB81_2
8442 ; RV32ZVE32F-NEXT: .LBB81_10: # %cond.store1
8443 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
8444 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
8445 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
8446 ; RV32ZVE32F-NEXT: fsd fa1, 0(a0)
8447 ; RV32ZVE32F-NEXT: andi a0, a1, 4
8448 ; RV32ZVE32F-NEXT: beqz a0, .LBB81_3
8449 ; RV32ZVE32F-NEXT: .LBB81_11: # %cond.store3
8450 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
8451 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
8452 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
8453 ; RV32ZVE32F-NEXT: fsd fa2, 0(a0)
8454 ; RV32ZVE32F-NEXT: andi a0, a1, 8
8455 ; RV32ZVE32F-NEXT: beqz a0, .LBB81_4
8456 ; RV32ZVE32F-NEXT: .LBB81_12: # %cond.store5
8457 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
8458 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
8459 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
8460 ; RV32ZVE32F-NEXT: fsd fa3, 0(a0)
8461 ; RV32ZVE32F-NEXT: andi a0, a1, 16
8462 ; RV32ZVE32F-NEXT: beqz a0, .LBB81_5
8463 ; RV32ZVE32F-NEXT: .LBB81_13: # %cond.store7
8464 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
8465 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
8466 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
8467 ; RV32ZVE32F-NEXT: fsd fa4, 0(a0)
8468 ; RV32ZVE32F-NEXT: andi a0, a1, 32
8469 ; RV32ZVE32F-NEXT: beqz a0, .LBB81_6
8470 ; RV32ZVE32F-NEXT: .LBB81_14: # %cond.store9
8471 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
8472 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
8473 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
8474 ; RV32ZVE32F-NEXT: fsd fa5, 0(a0)
8475 ; RV32ZVE32F-NEXT: andi a0, a1, 64
8476 ; RV32ZVE32F-NEXT: beqz a0, .LBB81_7
8477 ; RV32ZVE32F-NEXT: .LBB81_15: # %cond.store11
8478 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
8479 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
8480 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
8481 ; RV32ZVE32F-NEXT: fsd fa6, 0(a0)
8482 ; RV32ZVE32F-NEXT: andi a0, a1, -128
8483 ; RV32ZVE32F-NEXT: beqz a0, .LBB81_8
8484 ; RV32ZVE32F-NEXT: .LBB81_16: # %cond.store13
8485 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
8486 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
8487 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
8488 ; RV32ZVE32F-NEXT: fsd fa7, 0(a0)
8489 ; RV32ZVE32F-NEXT: ret
8491 ; RV64ZVE32F-LABEL: mscatter_baseidx_v8i8_v8f64:
8492 ; RV64ZVE32F: # %bb.0:
8493 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
8494 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
8495 ; RV64ZVE32F-NEXT: andi a2, a1, 1
8496 ; RV64ZVE32F-NEXT: beqz a2, .LBB81_2
8497 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
8498 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
8499 ; RV64ZVE32F-NEXT: slli a2, a2, 3
8500 ; RV64ZVE32F-NEXT: add a2, a0, a2
8501 ; RV64ZVE32F-NEXT: fsd fa0, 0(a2)
8502 ; RV64ZVE32F-NEXT: .LBB81_2: # %else
8503 ; RV64ZVE32F-NEXT: andi a2, a1, 2
8504 ; RV64ZVE32F-NEXT: beqz a2, .LBB81_4
8505 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
8506 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
8507 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
8508 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
8509 ; RV64ZVE32F-NEXT: slli a2, a2, 3
8510 ; RV64ZVE32F-NEXT: add a2, a0, a2
8511 ; RV64ZVE32F-NEXT: fsd fa1, 0(a2)
8512 ; RV64ZVE32F-NEXT: .LBB81_4: # %else2
8513 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
8514 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
8515 ; RV64ZVE32F-NEXT: andi a2, a1, 4
8516 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
8517 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
8518 ; RV64ZVE32F-NEXT: bnez a2, .LBB81_12
8519 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
8520 ; RV64ZVE32F-NEXT: andi a2, a1, 8
8521 ; RV64ZVE32F-NEXT: bnez a2, .LBB81_13
8522 ; RV64ZVE32F-NEXT: .LBB81_6: # %else6
8523 ; RV64ZVE32F-NEXT: andi a2, a1, 16
8524 ; RV64ZVE32F-NEXT: bnez a2, .LBB81_14
8525 ; RV64ZVE32F-NEXT: .LBB81_7: # %else8
8526 ; RV64ZVE32F-NEXT: andi a2, a1, 32
8527 ; RV64ZVE32F-NEXT: beqz a2, .LBB81_9
8528 ; RV64ZVE32F-NEXT: .LBB81_8: # %cond.store9
8529 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
8530 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
8531 ; RV64ZVE32F-NEXT: slli a2, a2, 3
8532 ; RV64ZVE32F-NEXT: add a2, a0, a2
8533 ; RV64ZVE32F-NEXT: fsd fa5, 0(a2)
8534 ; RV64ZVE32F-NEXT: .LBB81_9: # %else10
8535 ; RV64ZVE32F-NEXT: andi a2, a1, 64
8536 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
8537 ; RV64ZVE32F-NEXT: bnez a2, .LBB81_15
8538 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
8539 ; RV64ZVE32F-NEXT: andi a1, a1, -128
8540 ; RV64ZVE32F-NEXT: bnez a1, .LBB81_16
8541 ; RV64ZVE32F-NEXT: .LBB81_11: # %else14
8542 ; RV64ZVE32F-NEXT: ret
8543 ; RV64ZVE32F-NEXT: .LBB81_12: # %cond.store3
8544 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
8545 ; RV64ZVE32F-NEXT: slli a2, a2, 3
8546 ; RV64ZVE32F-NEXT: add a2, a0, a2
8547 ; RV64ZVE32F-NEXT: fsd fa2, 0(a2)
8548 ; RV64ZVE32F-NEXT: andi a2, a1, 8
8549 ; RV64ZVE32F-NEXT: beqz a2, .LBB81_6
8550 ; RV64ZVE32F-NEXT: .LBB81_13: # %cond.store5
8551 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
8552 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
8553 ; RV64ZVE32F-NEXT: slli a2, a2, 3
8554 ; RV64ZVE32F-NEXT: add a2, a0, a2
8555 ; RV64ZVE32F-NEXT: fsd fa3, 0(a2)
8556 ; RV64ZVE32F-NEXT: andi a2, a1, 16
8557 ; RV64ZVE32F-NEXT: beqz a2, .LBB81_7
8558 ; RV64ZVE32F-NEXT: .LBB81_14: # %cond.store7
8559 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
8560 ; RV64ZVE32F-NEXT: slli a2, a2, 3
8561 ; RV64ZVE32F-NEXT: add a2, a0, a2
8562 ; RV64ZVE32F-NEXT: fsd fa4, 0(a2)
8563 ; RV64ZVE32F-NEXT: andi a2, a1, 32
8564 ; RV64ZVE32F-NEXT: bnez a2, .LBB81_8
8565 ; RV64ZVE32F-NEXT: j .LBB81_9
8566 ; RV64ZVE32F-NEXT: .LBB81_15: # %cond.store11
8567 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
8568 ; RV64ZVE32F-NEXT: slli a2, a2, 3
8569 ; RV64ZVE32F-NEXT: add a2, a0, a2
8570 ; RV64ZVE32F-NEXT: fsd fa6, 0(a2)
8571 ; RV64ZVE32F-NEXT: andi a1, a1, -128
8572 ; RV64ZVE32F-NEXT: beqz a1, .LBB81_11
8573 ; RV64ZVE32F-NEXT: .LBB81_16: # %cond.store13
8574 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
8575 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8
8576 ; RV64ZVE32F-NEXT: slli a1, a1, 3
8577 ; RV64ZVE32F-NEXT: add a0, a0, a1
8578 ; RV64ZVE32F-NEXT: fsd fa7, 0(a0)
8579 ; RV64ZVE32F-NEXT: ret
8580 %ptrs = getelementptr inbounds double, ptr %base, <8 x i8> %idxs
8581 call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m)
8585 define void @mscatter_baseidx_sext_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) {
8586 ; RV32V-LABEL: mscatter_baseidx_sext_v8i8_v8f64:
8588 ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
8589 ; RV32V-NEXT: vsext.vf4 v14, v12
8590 ; RV32V-NEXT: vsll.vi v12, v14, 3
8591 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
8592 ; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t
8595 ; RV64-LABEL: mscatter_baseidx_sext_v8i8_v8f64:
8597 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
8598 ; RV64-NEXT: vsext.vf8 v16, v12
8599 ; RV64-NEXT: vsll.vi v12, v16, 3
8600 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
8603 ; RV32ZVE32F-LABEL: mscatter_baseidx_sext_v8i8_v8f64:
8604 ; RV32ZVE32F: # %bb.0:
8605 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
8606 ; RV32ZVE32F-NEXT: vsext.vf4 v10, v8
8607 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3
8608 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
8609 ; RV32ZVE32F-NEXT: vmv.x.s a1, v0
8610 ; RV32ZVE32F-NEXT: andi a2, a1, 1
8611 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
8612 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0
8613 ; RV32ZVE32F-NEXT: bnez a2, .LBB82_9
8614 ; RV32ZVE32F-NEXT: # %bb.1: # %else
8615 ; RV32ZVE32F-NEXT: andi a0, a1, 2
8616 ; RV32ZVE32F-NEXT: bnez a0, .LBB82_10
8617 ; RV32ZVE32F-NEXT: .LBB82_2: # %else2
8618 ; RV32ZVE32F-NEXT: andi a0, a1, 4
8619 ; RV32ZVE32F-NEXT: bnez a0, .LBB82_11
8620 ; RV32ZVE32F-NEXT: .LBB82_3: # %else4
8621 ; RV32ZVE32F-NEXT: andi a0, a1, 8
8622 ; RV32ZVE32F-NEXT: bnez a0, .LBB82_12
8623 ; RV32ZVE32F-NEXT: .LBB82_4: # %else6
8624 ; RV32ZVE32F-NEXT: andi a0, a1, 16
8625 ; RV32ZVE32F-NEXT: bnez a0, .LBB82_13
8626 ; RV32ZVE32F-NEXT: .LBB82_5: # %else8
8627 ; RV32ZVE32F-NEXT: andi a0, a1, 32
8628 ; RV32ZVE32F-NEXT: bnez a0, .LBB82_14
8629 ; RV32ZVE32F-NEXT: .LBB82_6: # %else10
8630 ; RV32ZVE32F-NEXT: andi a0, a1, 64
8631 ; RV32ZVE32F-NEXT: bnez a0, .LBB82_15
8632 ; RV32ZVE32F-NEXT: .LBB82_7: # %else12
8633 ; RV32ZVE32F-NEXT: andi a0, a1, -128
8634 ; RV32ZVE32F-NEXT: bnez a0, .LBB82_16
8635 ; RV32ZVE32F-NEXT: .LBB82_8: # %else14
8636 ; RV32ZVE32F-NEXT: ret
8637 ; RV32ZVE32F-NEXT: .LBB82_9: # %cond.store
8638 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
8639 ; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
8640 ; RV32ZVE32F-NEXT: andi a0, a1, 2
8641 ; RV32ZVE32F-NEXT: beqz a0, .LBB82_2
8642 ; RV32ZVE32F-NEXT: .LBB82_10: # %cond.store1
8643 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
8644 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
8645 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
8646 ; RV32ZVE32F-NEXT: fsd fa1, 0(a0)
8647 ; RV32ZVE32F-NEXT: andi a0, a1, 4
8648 ; RV32ZVE32F-NEXT: beqz a0, .LBB82_3
8649 ; RV32ZVE32F-NEXT: .LBB82_11: # %cond.store3
8650 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
8651 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
8652 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
8653 ; RV32ZVE32F-NEXT: fsd fa2, 0(a0)
8654 ; RV32ZVE32F-NEXT: andi a0, a1, 8
8655 ; RV32ZVE32F-NEXT: beqz a0, .LBB82_4
8656 ; RV32ZVE32F-NEXT: .LBB82_12: # %cond.store5
8657 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
8658 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
8659 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
8660 ; RV32ZVE32F-NEXT: fsd fa3, 0(a0)
8661 ; RV32ZVE32F-NEXT: andi a0, a1, 16
8662 ; RV32ZVE32F-NEXT: beqz a0, .LBB82_5
8663 ; RV32ZVE32F-NEXT: .LBB82_13: # %cond.store7
8664 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
8665 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
8666 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
8667 ; RV32ZVE32F-NEXT: fsd fa4, 0(a0)
8668 ; RV32ZVE32F-NEXT: andi a0, a1, 32
8669 ; RV32ZVE32F-NEXT: beqz a0, .LBB82_6
8670 ; RV32ZVE32F-NEXT: .LBB82_14: # %cond.store9
8671 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
8672 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
8673 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
8674 ; RV32ZVE32F-NEXT: fsd fa5, 0(a0)
8675 ; RV32ZVE32F-NEXT: andi a0, a1, 64
8676 ; RV32ZVE32F-NEXT: beqz a0, .LBB82_7
8677 ; RV32ZVE32F-NEXT: .LBB82_15: # %cond.store11
8678 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
8679 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
8680 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
8681 ; RV32ZVE32F-NEXT: fsd fa6, 0(a0)
8682 ; RV32ZVE32F-NEXT: andi a0, a1, -128
8683 ; RV32ZVE32F-NEXT: beqz a0, .LBB82_8
8684 ; RV32ZVE32F-NEXT: .LBB82_16: # %cond.store13
8685 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
8686 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
8687 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
8688 ; RV32ZVE32F-NEXT: fsd fa7, 0(a0)
8689 ; RV32ZVE32F-NEXT: ret
8691 ; RV64ZVE32F-LABEL: mscatter_baseidx_sext_v8i8_v8f64:
8692 ; RV64ZVE32F: # %bb.0:
8693 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
8694 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
8695 ; RV64ZVE32F-NEXT: andi a2, a1, 1
8696 ; RV64ZVE32F-NEXT: beqz a2, .LBB82_2
8697 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
8698 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
8699 ; RV64ZVE32F-NEXT: slli a2, a2, 3
8700 ; RV64ZVE32F-NEXT: add a2, a0, a2
8701 ; RV64ZVE32F-NEXT: fsd fa0, 0(a2)
8702 ; RV64ZVE32F-NEXT: .LBB82_2: # %else
8703 ; RV64ZVE32F-NEXT: andi a2, a1, 2
8704 ; RV64ZVE32F-NEXT: beqz a2, .LBB82_4
8705 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
8706 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
8707 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
8708 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
8709 ; RV64ZVE32F-NEXT: slli a2, a2, 3
8710 ; RV64ZVE32F-NEXT: add a2, a0, a2
8711 ; RV64ZVE32F-NEXT: fsd fa1, 0(a2)
8712 ; RV64ZVE32F-NEXT: .LBB82_4: # %else2
8713 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
8714 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
8715 ; RV64ZVE32F-NEXT: andi a2, a1, 4
8716 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
8717 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
8718 ; RV64ZVE32F-NEXT: bnez a2, .LBB82_12
8719 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
8720 ; RV64ZVE32F-NEXT: andi a2, a1, 8
8721 ; RV64ZVE32F-NEXT: bnez a2, .LBB82_13
8722 ; RV64ZVE32F-NEXT: .LBB82_6: # %else6
8723 ; RV64ZVE32F-NEXT: andi a2, a1, 16
8724 ; RV64ZVE32F-NEXT: bnez a2, .LBB82_14
8725 ; RV64ZVE32F-NEXT: .LBB82_7: # %else8
8726 ; RV64ZVE32F-NEXT: andi a2, a1, 32
8727 ; RV64ZVE32F-NEXT: beqz a2, .LBB82_9
8728 ; RV64ZVE32F-NEXT: .LBB82_8: # %cond.store9
8729 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
8730 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
8731 ; RV64ZVE32F-NEXT: slli a2, a2, 3
8732 ; RV64ZVE32F-NEXT: add a2, a0, a2
8733 ; RV64ZVE32F-NEXT: fsd fa5, 0(a2)
8734 ; RV64ZVE32F-NEXT: .LBB82_9: # %else10
8735 ; RV64ZVE32F-NEXT: andi a2, a1, 64
8736 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
8737 ; RV64ZVE32F-NEXT: bnez a2, .LBB82_15
8738 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
8739 ; RV64ZVE32F-NEXT: andi a1, a1, -128
8740 ; RV64ZVE32F-NEXT: bnez a1, .LBB82_16
8741 ; RV64ZVE32F-NEXT: .LBB82_11: # %else14
8742 ; RV64ZVE32F-NEXT: ret
8743 ; RV64ZVE32F-NEXT: .LBB82_12: # %cond.store3
8744 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
8745 ; RV64ZVE32F-NEXT: slli a2, a2, 3
8746 ; RV64ZVE32F-NEXT: add a2, a0, a2
8747 ; RV64ZVE32F-NEXT: fsd fa2, 0(a2)
8748 ; RV64ZVE32F-NEXT: andi a2, a1, 8
8749 ; RV64ZVE32F-NEXT: beqz a2, .LBB82_6
8750 ; RV64ZVE32F-NEXT: .LBB82_13: # %cond.store5
8751 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
8752 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
8753 ; RV64ZVE32F-NEXT: slli a2, a2, 3
8754 ; RV64ZVE32F-NEXT: add a2, a0, a2
8755 ; RV64ZVE32F-NEXT: fsd fa3, 0(a2)
8756 ; RV64ZVE32F-NEXT: andi a2, a1, 16
8757 ; RV64ZVE32F-NEXT: beqz a2, .LBB82_7
8758 ; RV64ZVE32F-NEXT: .LBB82_14: # %cond.store7
8759 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
8760 ; RV64ZVE32F-NEXT: slli a2, a2, 3
8761 ; RV64ZVE32F-NEXT: add a2, a0, a2
8762 ; RV64ZVE32F-NEXT: fsd fa4, 0(a2)
8763 ; RV64ZVE32F-NEXT: andi a2, a1, 32
8764 ; RV64ZVE32F-NEXT: bnez a2, .LBB82_8
8765 ; RV64ZVE32F-NEXT: j .LBB82_9
8766 ; RV64ZVE32F-NEXT: .LBB82_15: # %cond.store11
8767 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
8768 ; RV64ZVE32F-NEXT: slli a2, a2, 3
8769 ; RV64ZVE32F-NEXT: add a2, a0, a2
8770 ; RV64ZVE32F-NEXT: fsd fa6, 0(a2)
8771 ; RV64ZVE32F-NEXT: andi a1, a1, -128
8772 ; RV64ZVE32F-NEXT: beqz a1, .LBB82_11
8773 ; RV64ZVE32F-NEXT: .LBB82_16: # %cond.store13
8774 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
8775 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8
8776 ; RV64ZVE32F-NEXT: slli a1, a1, 3
8777 ; RV64ZVE32F-NEXT: add a0, a0, a1
8778 ; RV64ZVE32F-NEXT: fsd fa7, 0(a0)
8779 ; RV64ZVE32F-NEXT: ret
8780 %eidxs = sext <8 x i8> %idxs to <8 x i64>
8781 %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
8782 call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m)
8786 define void @mscatter_baseidx_zext_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m) {
8787 ; RV32V-LABEL: mscatter_baseidx_zext_v8i8_v8f64:
8789 ; RV32V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
8790 ; RV32V-NEXT: vzext.vf2 v13, v12
8791 ; RV32V-NEXT: vsll.vi v12, v13, 3
8792 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
8793 ; RV32V-NEXT: vsoxei16.v v8, (a0), v12, v0.t
8796 ; RV64-LABEL: mscatter_baseidx_zext_v8i8_v8f64:
8798 ; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
8799 ; RV64-NEXT: vzext.vf2 v13, v12
8800 ; RV64-NEXT: vsll.vi v12, v13, 3
8801 ; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, ma
8802 ; RV64-NEXT: vsoxei16.v v8, (a0), v12, v0.t
8805 ; RV32ZVE32F-LABEL: mscatter_baseidx_zext_v8i8_v8f64:
8806 ; RV32ZVE32F: # %bb.0:
8807 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
8808 ; RV32ZVE32F-NEXT: vzext.vf4 v10, v8
8809 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3
8810 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
8811 ; RV32ZVE32F-NEXT: vmv.x.s a1, v0
8812 ; RV32ZVE32F-NEXT: andi a2, a1, 1
8813 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
8814 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0
8815 ; RV32ZVE32F-NEXT: bnez a2, .LBB83_9
8816 ; RV32ZVE32F-NEXT: # %bb.1: # %else
8817 ; RV32ZVE32F-NEXT: andi a0, a1, 2
8818 ; RV32ZVE32F-NEXT: bnez a0, .LBB83_10
8819 ; RV32ZVE32F-NEXT: .LBB83_2: # %else2
8820 ; RV32ZVE32F-NEXT: andi a0, a1, 4
8821 ; RV32ZVE32F-NEXT: bnez a0, .LBB83_11
8822 ; RV32ZVE32F-NEXT: .LBB83_3: # %else4
8823 ; RV32ZVE32F-NEXT: andi a0, a1, 8
8824 ; RV32ZVE32F-NEXT: bnez a0, .LBB83_12
8825 ; RV32ZVE32F-NEXT: .LBB83_4: # %else6
8826 ; RV32ZVE32F-NEXT: andi a0, a1, 16
8827 ; RV32ZVE32F-NEXT: bnez a0, .LBB83_13
8828 ; RV32ZVE32F-NEXT: .LBB83_5: # %else8
8829 ; RV32ZVE32F-NEXT: andi a0, a1, 32
8830 ; RV32ZVE32F-NEXT: bnez a0, .LBB83_14
8831 ; RV32ZVE32F-NEXT: .LBB83_6: # %else10
8832 ; RV32ZVE32F-NEXT: andi a0, a1, 64
8833 ; RV32ZVE32F-NEXT: bnez a0, .LBB83_15
8834 ; RV32ZVE32F-NEXT: .LBB83_7: # %else12
8835 ; RV32ZVE32F-NEXT: andi a0, a1, -128
8836 ; RV32ZVE32F-NEXT: bnez a0, .LBB83_16
8837 ; RV32ZVE32F-NEXT: .LBB83_8: # %else14
8838 ; RV32ZVE32F-NEXT: ret
8839 ; RV32ZVE32F-NEXT: .LBB83_9: # %cond.store
8840 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
8841 ; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
8842 ; RV32ZVE32F-NEXT: andi a0, a1, 2
8843 ; RV32ZVE32F-NEXT: beqz a0, .LBB83_2
8844 ; RV32ZVE32F-NEXT: .LBB83_10: # %cond.store1
8845 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
8846 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
8847 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
8848 ; RV32ZVE32F-NEXT: fsd fa1, 0(a0)
8849 ; RV32ZVE32F-NEXT: andi a0, a1, 4
8850 ; RV32ZVE32F-NEXT: beqz a0, .LBB83_3
8851 ; RV32ZVE32F-NEXT: .LBB83_11: # %cond.store3
8852 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
8853 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
8854 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
8855 ; RV32ZVE32F-NEXT: fsd fa2, 0(a0)
8856 ; RV32ZVE32F-NEXT: andi a0, a1, 8
8857 ; RV32ZVE32F-NEXT: beqz a0, .LBB83_4
8858 ; RV32ZVE32F-NEXT: .LBB83_12: # %cond.store5
8859 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
8860 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
8861 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
8862 ; RV32ZVE32F-NEXT: fsd fa3, 0(a0)
8863 ; RV32ZVE32F-NEXT: andi a0, a1, 16
8864 ; RV32ZVE32F-NEXT: beqz a0, .LBB83_5
8865 ; RV32ZVE32F-NEXT: .LBB83_13: # %cond.store7
8866 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
8867 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
8868 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
8869 ; RV32ZVE32F-NEXT: fsd fa4, 0(a0)
8870 ; RV32ZVE32F-NEXT: andi a0, a1, 32
8871 ; RV32ZVE32F-NEXT: beqz a0, .LBB83_6
8872 ; RV32ZVE32F-NEXT: .LBB83_14: # %cond.store9
8873 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
8874 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
8875 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
8876 ; RV32ZVE32F-NEXT: fsd fa5, 0(a0)
8877 ; RV32ZVE32F-NEXT: andi a0, a1, 64
8878 ; RV32ZVE32F-NEXT: beqz a0, .LBB83_7
8879 ; RV32ZVE32F-NEXT: .LBB83_15: # %cond.store11
8880 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
8881 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
8882 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
8883 ; RV32ZVE32F-NEXT: fsd fa6, 0(a0)
8884 ; RV32ZVE32F-NEXT: andi a0, a1, -128
8885 ; RV32ZVE32F-NEXT: beqz a0, .LBB83_8
8886 ; RV32ZVE32F-NEXT: .LBB83_16: # %cond.store13
8887 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
8888 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
8889 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
8890 ; RV32ZVE32F-NEXT: fsd fa7, 0(a0)
8891 ; RV32ZVE32F-NEXT: ret
8893 ; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i8_v8f64:
8894 ; RV64ZVE32F: # %bb.0:
8895 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
8896 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
8897 ; RV64ZVE32F-NEXT: andi a2, a1, 1
8898 ; RV64ZVE32F-NEXT: beqz a2, .LBB83_2
8899 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
8900 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
8901 ; RV64ZVE32F-NEXT: andi a2, a2, 255
8902 ; RV64ZVE32F-NEXT: slli a2, a2, 3
8903 ; RV64ZVE32F-NEXT: add a2, a0, a2
8904 ; RV64ZVE32F-NEXT: fsd fa0, 0(a2)
8905 ; RV64ZVE32F-NEXT: .LBB83_2: # %else
8906 ; RV64ZVE32F-NEXT: andi a2, a1, 2
8907 ; RV64ZVE32F-NEXT: beqz a2, .LBB83_4
8908 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
8909 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
8910 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
8911 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
8912 ; RV64ZVE32F-NEXT: andi a2, a2, 255
8913 ; RV64ZVE32F-NEXT: slli a2, a2, 3
8914 ; RV64ZVE32F-NEXT: add a2, a0, a2
8915 ; RV64ZVE32F-NEXT: fsd fa1, 0(a2)
8916 ; RV64ZVE32F-NEXT: .LBB83_4: # %else2
8917 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
8918 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
8919 ; RV64ZVE32F-NEXT: andi a2, a1, 4
8920 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
8921 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
8922 ; RV64ZVE32F-NEXT: bnez a2, .LBB83_12
8923 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
8924 ; RV64ZVE32F-NEXT: andi a2, a1, 8
8925 ; RV64ZVE32F-NEXT: bnez a2, .LBB83_13
8926 ; RV64ZVE32F-NEXT: .LBB83_6: # %else6
8927 ; RV64ZVE32F-NEXT: andi a2, a1, 16
8928 ; RV64ZVE32F-NEXT: bnez a2, .LBB83_14
8929 ; RV64ZVE32F-NEXT: .LBB83_7: # %else8
8930 ; RV64ZVE32F-NEXT: andi a2, a1, 32
8931 ; RV64ZVE32F-NEXT: beqz a2, .LBB83_9
8932 ; RV64ZVE32F-NEXT: .LBB83_8: # %cond.store9
8933 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
8934 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
8935 ; RV64ZVE32F-NEXT: andi a2, a2, 255
8936 ; RV64ZVE32F-NEXT: slli a2, a2, 3
8937 ; RV64ZVE32F-NEXT: add a2, a0, a2
8938 ; RV64ZVE32F-NEXT: fsd fa5, 0(a2)
8939 ; RV64ZVE32F-NEXT: .LBB83_9: # %else10
8940 ; RV64ZVE32F-NEXT: andi a2, a1, 64
8941 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
8942 ; RV64ZVE32F-NEXT: bnez a2, .LBB83_15
8943 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
8944 ; RV64ZVE32F-NEXT: andi a1, a1, -128
8945 ; RV64ZVE32F-NEXT: bnez a1, .LBB83_16
8946 ; RV64ZVE32F-NEXT: .LBB83_11: # %else14
8947 ; RV64ZVE32F-NEXT: ret
8948 ; RV64ZVE32F-NEXT: .LBB83_12: # %cond.store3
8949 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
8950 ; RV64ZVE32F-NEXT: andi a2, a2, 255
8951 ; RV64ZVE32F-NEXT: slli a2, a2, 3
8952 ; RV64ZVE32F-NEXT: add a2, a0, a2
8953 ; RV64ZVE32F-NEXT: fsd fa2, 0(a2)
8954 ; RV64ZVE32F-NEXT: andi a2, a1, 8
8955 ; RV64ZVE32F-NEXT: beqz a2, .LBB83_6
8956 ; RV64ZVE32F-NEXT: .LBB83_13: # %cond.store5
8957 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
8958 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
8959 ; RV64ZVE32F-NEXT: andi a2, a2, 255
8960 ; RV64ZVE32F-NEXT: slli a2, a2, 3
8961 ; RV64ZVE32F-NEXT: add a2, a0, a2
8962 ; RV64ZVE32F-NEXT: fsd fa3, 0(a2)
8963 ; RV64ZVE32F-NEXT: andi a2, a1, 16
8964 ; RV64ZVE32F-NEXT: beqz a2, .LBB83_7
8965 ; RV64ZVE32F-NEXT: .LBB83_14: # %cond.store7
8966 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
8967 ; RV64ZVE32F-NEXT: andi a2, a2, 255
8968 ; RV64ZVE32F-NEXT: slli a2, a2, 3
8969 ; RV64ZVE32F-NEXT: add a2, a0, a2
8970 ; RV64ZVE32F-NEXT: fsd fa4, 0(a2)
8971 ; RV64ZVE32F-NEXT: andi a2, a1, 32
8972 ; RV64ZVE32F-NEXT: bnez a2, .LBB83_8
8973 ; RV64ZVE32F-NEXT: j .LBB83_9
8974 ; RV64ZVE32F-NEXT: .LBB83_15: # %cond.store11
8975 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
8976 ; RV64ZVE32F-NEXT: andi a2, a2, 255
8977 ; RV64ZVE32F-NEXT: slli a2, a2, 3
8978 ; RV64ZVE32F-NEXT: add a2, a0, a2
8979 ; RV64ZVE32F-NEXT: fsd fa6, 0(a2)
8980 ; RV64ZVE32F-NEXT: andi a1, a1, -128
8981 ; RV64ZVE32F-NEXT: beqz a1, .LBB83_11
8982 ; RV64ZVE32F-NEXT: .LBB83_16: # %cond.store13
8983 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
8984 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8
8985 ; RV64ZVE32F-NEXT: andi a1, a1, 255
8986 ; RV64ZVE32F-NEXT: slli a1, a1, 3
8987 ; RV64ZVE32F-NEXT: add a0, a0, a1
8988 ; RV64ZVE32F-NEXT: fsd fa7, 0(a0)
8989 ; RV64ZVE32F-NEXT: ret
8990 %eidxs = zext <8 x i8> %idxs to <8 x i64>
8991 %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
8992 call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m)
8996 define void @mscatter_baseidx_v8i16_v8f64(<8 x double> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m) {
8997 ; RV32V-LABEL: mscatter_baseidx_v8i16_v8f64:
8999 ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
9000 ; RV32V-NEXT: vsext.vf2 v14, v12
9001 ; RV32V-NEXT: vsll.vi v12, v14, 3
9002 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
9003 ; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t
9006 ; RV64-LABEL: mscatter_baseidx_v8i16_v8f64:
9008 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
9009 ; RV64-NEXT: vsext.vf4 v16, v12
9010 ; RV64-NEXT: vsll.vi v12, v16, 3
9011 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
9014 ; RV32ZVE32F-LABEL: mscatter_baseidx_v8i16_v8f64:
9015 ; RV32ZVE32F: # %bb.0:
9016 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
9017 ; RV32ZVE32F-NEXT: vsext.vf2 v10, v8
9018 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3
9019 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
9020 ; RV32ZVE32F-NEXT: vmv.x.s a1, v0
9021 ; RV32ZVE32F-NEXT: andi a2, a1, 1
9022 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
9023 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0
9024 ; RV32ZVE32F-NEXT: bnez a2, .LBB84_9
9025 ; RV32ZVE32F-NEXT: # %bb.1: # %else
9026 ; RV32ZVE32F-NEXT: andi a0, a1, 2
9027 ; RV32ZVE32F-NEXT: bnez a0, .LBB84_10
9028 ; RV32ZVE32F-NEXT: .LBB84_2: # %else2
9029 ; RV32ZVE32F-NEXT: andi a0, a1, 4
9030 ; RV32ZVE32F-NEXT: bnez a0, .LBB84_11
9031 ; RV32ZVE32F-NEXT: .LBB84_3: # %else4
9032 ; RV32ZVE32F-NEXT: andi a0, a1, 8
9033 ; RV32ZVE32F-NEXT: bnez a0, .LBB84_12
9034 ; RV32ZVE32F-NEXT: .LBB84_4: # %else6
9035 ; RV32ZVE32F-NEXT: andi a0, a1, 16
9036 ; RV32ZVE32F-NEXT: bnez a0, .LBB84_13
9037 ; RV32ZVE32F-NEXT: .LBB84_5: # %else8
9038 ; RV32ZVE32F-NEXT: andi a0, a1, 32
9039 ; RV32ZVE32F-NEXT: bnez a0, .LBB84_14
9040 ; RV32ZVE32F-NEXT: .LBB84_6: # %else10
9041 ; RV32ZVE32F-NEXT: andi a0, a1, 64
9042 ; RV32ZVE32F-NEXT: bnez a0, .LBB84_15
9043 ; RV32ZVE32F-NEXT: .LBB84_7: # %else12
9044 ; RV32ZVE32F-NEXT: andi a0, a1, -128
9045 ; RV32ZVE32F-NEXT: bnez a0, .LBB84_16
9046 ; RV32ZVE32F-NEXT: .LBB84_8: # %else14
9047 ; RV32ZVE32F-NEXT: ret
9048 ; RV32ZVE32F-NEXT: .LBB84_9: # %cond.store
9049 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
9050 ; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
9051 ; RV32ZVE32F-NEXT: andi a0, a1, 2
9052 ; RV32ZVE32F-NEXT: beqz a0, .LBB84_2
9053 ; RV32ZVE32F-NEXT: .LBB84_10: # %cond.store1
9054 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
9055 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
9056 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
9057 ; RV32ZVE32F-NEXT: fsd fa1, 0(a0)
9058 ; RV32ZVE32F-NEXT: andi a0, a1, 4
9059 ; RV32ZVE32F-NEXT: beqz a0, .LBB84_3
9060 ; RV32ZVE32F-NEXT: .LBB84_11: # %cond.store3
9061 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
9062 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
9063 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
9064 ; RV32ZVE32F-NEXT: fsd fa2, 0(a0)
9065 ; RV32ZVE32F-NEXT: andi a0, a1, 8
9066 ; RV32ZVE32F-NEXT: beqz a0, .LBB84_4
9067 ; RV32ZVE32F-NEXT: .LBB84_12: # %cond.store5
9068 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
9069 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
9070 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
9071 ; RV32ZVE32F-NEXT: fsd fa3, 0(a0)
9072 ; RV32ZVE32F-NEXT: andi a0, a1, 16
9073 ; RV32ZVE32F-NEXT: beqz a0, .LBB84_5
9074 ; RV32ZVE32F-NEXT: .LBB84_13: # %cond.store7
9075 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
9076 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
9077 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
9078 ; RV32ZVE32F-NEXT: fsd fa4, 0(a0)
9079 ; RV32ZVE32F-NEXT: andi a0, a1, 32
9080 ; RV32ZVE32F-NEXT: beqz a0, .LBB84_6
9081 ; RV32ZVE32F-NEXT: .LBB84_14: # %cond.store9
9082 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
9083 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
9084 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
9085 ; RV32ZVE32F-NEXT: fsd fa5, 0(a0)
9086 ; RV32ZVE32F-NEXT: andi a0, a1, 64
9087 ; RV32ZVE32F-NEXT: beqz a0, .LBB84_7
9088 ; RV32ZVE32F-NEXT: .LBB84_15: # %cond.store11
9089 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
9090 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
9091 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
9092 ; RV32ZVE32F-NEXT: fsd fa6, 0(a0)
9093 ; RV32ZVE32F-NEXT: andi a0, a1, -128
9094 ; RV32ZVE32F-NEXT: beqz a0, .LBB84_8
9095 ; RV32ZVE32F-NEXT: .LBB84_16: # %cond.store13
9096 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
9097 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
9098 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
9099 ; RV32ZVE32F-NEXT: fsd fa7, 0(a0)
9100 ; RV32ZVE32F-NEXT: ret
9102 ; RV64ZVE32F-LABEL: mscatter_baseidx_v8i16_v8f64:
9103 ; RV64ZVE32F: # %bb.0:
9104 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
9105 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
9106 ; RV64ZVE32F-NEXT: andi a2, a1, 1
9107 ; RV64ZVE32F-NEXT: beqz a2, .LBB84_2
9108 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
9109 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
9110 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
9111 ; RV64ZVE32F-NEXT: slli a2, a2, 3
9112 ; RV64ZVE32F-NEXT: add a2, a0, a2
9113 ; RV64ZVE32F-NEXT: fsd fa0, 0(a2)
9114 ; RV64ZVE32F-NEXT: .LBB84_2: # %else
9115 ; RV64ZVE32F-NEXT: andi a2, a1, 2
9116 ; RV64ZVE32F-NEXT: beqz a2, .LBB84_4
9117 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
9118 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
9119 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
9120 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
9121 ; RV64ZVE32F-NEXT: slli a2, a2, 3
9122 ; RV64ZVE32F-NEXT: add a2, a0, a2
9123 ; RV64ZVE32F-NEXT: fsd fa1, 0(a2)
9124 ; RV64ZVE32F-NEXT: .LBB84_4: # %else2
9125 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
9126 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
9127 ; RV64ZVE32F-NEXT: andi a2, a1, 4
9128 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
9129 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
9130 ; RV64ZVE32F-NEXT: bnez a2, .LBB84_12
9131 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
9132 ; RV64ZVE32F-NEXT: andi a2, a1, 8
9133 ; RV64ZVE32F-NEXT: bnez a2, .LBB84_13
9134 ; RV64ZVE32F-NEXT: .LBB84_6: # %else6
9135 ; RV64ZVE32F-NEXT: andi a2, a1, 16
9136 ; RV64ZVE32F-NEXT: bnez a2, .LBB84_14
9137 ; RV64ZVE32F-NEXT: .LBB84_7: # %else8
9138 ; RV64ZVE32F-NEXT: andi a2, a1, 32
9139 ; RV64ZVE32F-NEXT: beqz a2, .LBB84_9
9140 ; RV64ZVE32F-NEXT: .LBB84_8: # %cond.store9
9141 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
9142 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
9143 ; RV64ZVE32F-NEXT: slli a2, a2, 3
9144 ; RV64ZVE32F-NEXT: add a2, a0, a2
9145 ; RV64ZVE32F-NEXT: fsd fa5, 0(a2)
9146 ; RV64ZVE32F-NEXT: .LBB84_9: # %else10
9147 ; RV64ZVE32F-NEXT: andi a2, a1, 64
9148 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
9149 ; RV64ZVE32F-NEXT: bnez a2, .LBB84_15
9150 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
9151 ; RV64ZVE32F-NEXT: andi a1, a1, -128
9152 ; RV64ZVE32F-NEXT: bnez a1, .LBB84_16
9153 ; RV64ZVE32F-NEXT: .LBB84_11: # %else14
9154 ; RV64ZVE32F-NEXT: ret
9155 ; RV64ZVE32F-NEXT: .LBB84_12: # %cond.store3
9156 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
9157 ; RV64ZVE32F-NEXT: slli a2, a2, 3
9158 ; RV64ZVE32F-NEXT: add a2, a0, a2
9159 ; RV64ZVE32F-NEXT: fsd fa2, 0(a2)
9160 ; RV64ZVE32F-NEXT: andi a2, a1, 8
9161 ; RV64ZVE32F-NEXT: beqz a2, .LBB84_6
9162 ; RV64ZVE32F-NEXT: .LBB84_13: # %cond.store5
9163 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
9164 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
9165 ; RV64ZVE32F-NEXT: slli a2, a2, 3
9166 ; RV64ZVE32F-NEXT: add a2, a0, a2
9167 ; RV64ZVE32F-NEXT: fsd fa3, 0(a2)
9168 ; RV64ZVE32F-NEXT: andi a2, a1, 16
9169 ; RV64ZVE32F-NEXT: beqz a2, .LBB84_7
9170 ; RV64ZVE32F-NEXT: .LBB84_14: # %cond.store7
9171 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
9172 ; RV64ZVE32F-NEXT: slli a2, a2, 3
9173 ; RV64ZVE32F-NEXT: add a2, a0, a2
9174 ; RV64ZVE32F-NEXT: fsd fa4, 0(a2)
9175 ; RV64ZVE32F-NEXT: andi a2, a1, 32
9176 ; RV64ZVE32F-NEXT: bnez a2, .LBB84_8
9177 ; RV64ZVE32F-NEXT: j .LBB84_9
9178 ; RV64ZVE32F-NEXT: .LBB84_15: # %cond.store11
9179 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
9180 ; RV64ZVE32F-NEXT: slli a2, a2, 3
9181 ; RV64ZVE32F-NEXT: add a2, a0, a2
9182 ; RV64ZVE32F-NEXT: fsd fa6, 0(a2)
9183 ; RV64ZVE32F-NEXT: andi a1, a1, -128
9184 ; RV64ZVE32F-NEXT: beqz a1, .LBB84_11
9185 ; RV64ZVE32F-NEXT: .LBB84_16: # %cond.store13
9186 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
9187 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8
9188 ; RV64ZVE32F-NEXT: slli a1, a1, 3
9189 ; RV64ZVE32F-NEXT: add a0, a0, a1
9190 ; RV64ZVE32F-NEXT: fsd fa7, 0(a0)
9191 ; RV64ZVE32F-NEXT: ret
9192 %ptrs = getelementptr inbounds double, ptr %base, <8 x i16> %idxs
9193 call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m)
9197 define void @mscatter_baseidx_sext_v8i16_v8f64(<8 x double> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m) {
9198 ; RV32V-LABEL: mscatter_baseidx_sext_v8i16_v8f64:
9200 ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
9201 ; RV32V-NEXT: vsext.vf2 v14, v12
9202 ; RV32V-NEXT: vsll.vi v12, v14, 3
9203 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
9204 ; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t
9207 ; RV64-LABEL: mscatter_baseidx_sext_v8i16_v8f64:
9209 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
9210 ; RV64-NEXT: vsext.vf4 v16, v12
9211 ; RV64-NEXT: vsll.vi v12, v16, 3
9212 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
9215 ; RV32ZVE32F-LABEL: mscatter_baseidx_sext_v8i16_v8f64:
9216 ; RV32ZVE32F: # %bb.0:
9217 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
9218 ; RV32ZVE32F-NEXT: vsext.vf2 v10, v8
9219 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3
9220 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
9221 ; RV32ZVE32F-NEXT: vmv.x.s a1, v0
9222 ; RV32ZVE32F-NEXT: andi a2, a1, 1
9223 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
9224 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0
9225 ; RV32ZVE32F-NEXT: bnez a2, .LBB85_9
9226 ; RV32ZVE32F-NEXT: # %bb.1: # %else
9227 ; RV32ZVE32F-NEXT: andi a0, a1, 2
9228 ; RV32ZVE32F-NEXT: bnez a0, .LBB85_10
9229 ; RV32ZVE32F-NEXT: .LBB85_2: # %else2
9230 ; RV32ZVE32F-NEXT: andi a0, a1, 4
9231 ; RV32ZVE32F-NEXT: bnez a0, .LBB85_11
9232 ; RV32ZVE32F-NEXT: .LBB85_3: # %else4
9233 ; RV32ZVE32F-NEXT: andi a0, a1, 8
9234 ; RV32ZVE32F-NEXT: bnez a0, .LBB85_12
9235 ; RV32ZVE32F-NEXT: .LBB85_4: # %else6
9236 ; RV32ZVE32F-NEXT: andi a0, a1, 16
9237 ; RV32ZVE32F-NEXT: bnez a0, .LBB85_13
9238 ; RV32ZVE32F-NEXT: .LBB85_5: # %else8
9239 ; RV32ZVE32F-NEXT: andi a0, a1, 32
9240 ; RV32ZVE32F-NEXT: bnez a0, .LBB85_14
9241 ; RV32ZVE32F-NEXT: .LBB85_6: # %else10
9242 ; RV32ZVE32F-NEXT: andi a0, a1, 64
9243 ; RV32ZVE32F-NEXT: bnez a0, .LBB85_15
9244 ; RV32ZVE32F-NEXT: .LBB85_7: # %else12
9245 ; RV32ZVE32F-NEXT: andi a0, a1, -128
9246 ; RV32ZVE32F-NEXT: bnez a0, .LBB85_16
9247 ; RV32ZVE32F-NEXT: .LBB85_8: # %else14
9248 ; RV32ZVE32F-NEXT: ret
9249 ; RV32ZVE32F-NEXT: .LBB85_9: # %cond.store
9250 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
9251 ; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
9252 ; RV32ZVE32F-NEXT: andi a0, a1, 2
9253 ; RV32ZVE32F-NEXT: beqz a0, .LBB85_2
9254 ; RV32ZVE32F-NEXT: .LBB85_10: # %cond.store1
9255 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
9256 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
9257 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
9258 ; RV32ZVE32F-NEXT: fsd fa1, 0(a0)
9259 ; RV32ZVE32F-NEXT: andi a0, a1, 4
9260 ; RV32ZVE32F-NEXT: beqz a0, .LBB85_3
9261 ; RV32ZVE32F-NEXT: .LBB85_11: # %cond.store3
9262 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
9263 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
9264 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
9265 ; RV32ZVE32F-NEXT: fsd fa2, 0(a0)
9266 ; RV32ZVE32F-NEXT: andi a0, a1, 8
9267 ; RV32ZVE32F-NEXT: beqz a0, .LBB85_4
9268 ; RV32ZVE32F-NEXT: .LBB85_12: # %cond.store5
9269 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
9270 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
9271 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
9272 ; RV32ZVE32F-NEXT: fsd fa3, 0(a0)
9273 ; RV32ZVE32F-NEXT: andi a0, a1, 16
9274 ; RV32ZVE32F-NEXT: beqz a0, .LBB85_5
9275 ; RV32ZVE32F-NEXT: .LBB85_13: # %cond.store7
9276 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
9277 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
9278 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
9279 ; RV32ZVE32F-NEXT: fsd fa4, 0(a0)
9280 ; RV32ZVE32F-NEXT: andi a0, a1, 32
9281 ; RV32ZVE32F-NEXT: beqz a0, .LBB85_6
9282 ; RV32ZVE32F-NEXT: .LBB85_14: # %cond.store9
9283 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
9284 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
9285 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
9286 ; RV32ZVE32F-NEXT: fsd fa5, 0(a0)
9287 ; RV32ZVE32F-NEXT: andi a0, a1, 64
9288 ; RV32ZVE32F-NEXT: beqz a0, .LBB85_7
9289 ; RV32ZVE32F-NEXT: .LBB85_15: # %cond.store11
9290 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
9291 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
9292 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
9293 ; RV32ZVE32F-NEXT: fsd fa6, 0(a0)
9294 ; RV32ZVE32F-NEXT: andi a0, a1, -128
9295 ; RV32ZVE32F-NEXT: beqz a0, .LBB85_8
9296 ; RV32ZVE32F-NEXT: .LBB85_16: # %cond.store13
9297 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
9298 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
9299 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
9300 ; RV32ZVE32F-NEXT: fsd fa7, 0(a0)
9301 ; RV32ZVE32F-NEXT: ret
9303 ; RV64ZVE32F-LABEL: mscatter_baseidx_sext_v8i16_v8f64:
9304 ; RV64ZVE32F: # %bb.0:
9305 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
9306 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
9307 ; RV64ZVE32F-NEXT: andi a2, a1, 1
9308 ; RV64ZVE32F-NEXT: beqz a2, .LBB85_2
9309 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
9310 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
9311 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
9312 ; RV64ZVE32F-NEXT: slli a2, a2, 3
9313 ; RV64ZVE32F-NEXT: add a2, a0, a2
9314 ; RV64ZVE32F-NEXT: fsd fa0, 0(a2)
9315 ; RV64ZVE32F-NEXT: .LBB85_2: # %else
9316 ; RV64ZVE32F-NEXT: andi a2, a1, 2
9317 ; RV64ZVE32F-NEXT: beqz a2, .LBB85_4
9318 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
9319 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
9320 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
9321 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
9322 ; RV64ZVE32F-NEXT: slli a2, a2, 3
9323 ; RV64ZVE32F-NEXT: add a2, a0, a2
9324 ; RV64ZVE32F-NEXT: fsd fa1, 0(a2)
9325 ; RV64ZVE32F-NEXT: .LBB85_4: # %else2
9326 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
9327 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
9328 ; RV64ZVE32F-NEXT: andi a2, a1, 4
9329 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
9330 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
9331 ; RV64ZVE32F-NEXT: bnez a2, .LBB85_12
9332 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
9333 ; RV64ZVE32F-NEXT: andi a2, a1, 8
9334 ; RV64ZVE32F-NEXT: bnez a2, .LBB85_13
9335 ; RV64ZVE32F-NEXT: .LBB85_6: # %else6
9336 ; RV64ZVE32F-NEXT: andi a2, a1, 16
9337 ; RV64ZVE32F-NEXT: bnez a2, .LBB85_14
9338 ; RV64ZVE32F-NEXT: .LBB85_7: # %else8
9339 ; RV64ZVE32F-NEXT: andi a2, a1, 32
9340 ; RV64ZVE32F-NEXT: beqz a2, .LBB85_9
9341 ; RV64ZVE32F-NEXT: .LBB85_8: # %cond.store9
9342 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
9343 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
9344 ; RV64ZVE32F-NEXT: slli a2, a2, 3
9345 ; RV64ZVE32F-NEXT: add a2, a0, a2
9346 ; RV64ZVE32F-NEXT: fsd fa5, 0(a2)
9347 ; RV64ZVE32F-NEXT: .LBB85_9: # %else10
9348 ; RV64ZVE32F-NEXT: andi a2, a1, 64
9349 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
9350 ; RV64ZVE32F-NEXT: bnez a2, .LBB85_15
9351 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
9352 ; RV64ZVE32F-NEXT: andi a1, a1, -128
9353 ; RV64ZVE32F-NEXT: bnez a1, .LBB85_16
9354 ; RV64ZVE32F-NEXT: .LBB85_11: # %else14
9355 ; RV64ZVE32F-NEXT: ret
9356 ; RV64ZVE32F-NEXT: .LBB85_12: # %cond.store3
9357 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
9358 ; RV64ZVE32F-NEXT: slli a2, a2, 3
9359 ; RV64ZVE32F-NEXT: add a2, a0, a2
9360 ; RV64ZVE32F-NEXT: fsd fa2, 0(a2)
9361 ; RV64ZVE32F-NEXT: andi a2, a1, 8
9362 ; RV64ZVE32F-NEXT: beqz a2, .LBB85_6
9363 ; RV64ZVE32F-NEXT: .LBB85_13: # %cond.store5
9364 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
9365 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
9366 ; RV64ZVE32F-NEXT: slli a2, a2, 3
9367 ; RV64ZVE32F-NEXT: add a2, a0, a2
9368 ; RV64ZVE32F-NEXT: fsd fa3, 0(a2)
9369 ; RV64ZVE32F-NEXT: andi a2, a1, 16
9370 ; RV64ZVE32F-NEXT: beqz a2, .LBB85_7
9371 ; RV64ZVE32F-NEXT: .LBB85_14: # %cond.store7
9372 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
9373 ; RV64ZVE32F-NEXT: slli a2, a2, 3
9374 ; RV64ZVE32F-NEXT: add a2, a0, a2
9375 ; RV64ZVE32F-NEXT: fsd fa4, 0(a2)
9376 ; RV64ZVE32F-NEXT: andi a2, a1, 32
9377 ; RV64ZVE32F-NEXT: bnez a2, .LBB85_8
9378 ; RV64ZVE32F-NEXT: j .LBB85_9
9379 ; RV64ZVE32F-NEXT: .LBB85_15: # %cond.store11
9380 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
9381 ; RV64ZVE32F-NEXT: slli a2, a2, 3
9382 ; RV64ZVE32F-NEXT: add a2, a0, a2
9383 ; RV64ZVE32F-NEXT: fsd fa6, 0(a2)
9384 ; RV64ZVE32F-NEXT: andi a1, a1, -128
9385 ; RV64ZVE32F-NEXT: beqz a1, .LBB85_11
9386 ; RV64ZVE32F-NEXT: .LBB85_16: # %cond.store13
9387 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
9388 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8
9389 ; RV64ZVE32F-NEXT: slli a1, a1, 3
9390 ; RV64ZVE32F-NEXT: add a0, a0, a1
9391 ; RV64ZVE32F-NEXT: fsd fa7, 0(a0)
9392 ; RV64ZVE32F-NEXT: ret
9393 %eidxs = sext <8 x i16> %idxs to <8 x i64>
9394 %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
9395 call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m)
9399 define void @mscatter_baseidx_zext_v8i16_v8f64(<8 x double> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m) {
9400 ; RV32V-LABEL: mscatter_baseidx_zext_v8i16_v8f64:
9402 ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
9403 ; RV32V-NEXT: vzext.vf2 v14, v12
9404 ; RV32V-NEXT: vsll.vi v12, v14, 3
9405 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
9406 ; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t
9409 ; RV64-LABEL: mscatter_baseidx_zext_v8i16_v8f64:
9411 ; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma
9412 ; RV64-NEXT: vzext.vf2 v14, v12
9413 ; RV64-NEXT: vsll.vi v12, v14, 3
9414 ; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, ma
9415 ; RV64-NEXT: vsoxei32.v v8, (a0), v12, v0.t
9418 ; RV32ZVE32F-LABEL: mscatter_baseidx_zext_v8i16_v8f64:
9419 ; RV32ZVE32F: # %bb.0:
9420 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
9421 ; RV32ZVE32F-NEXT: vzext.vf2 v10, v8
9422 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3
9423 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
9424 ; RV32ZVE32F-NEXT: vmv.x.s a1, v0
9425 ; RV32ZVE32F-NEXT: andi a2, a1, 1
9426 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
9427 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0
9428 ; RV32ZVE32F-NEXT: bnez a2, .LBB86_9
9429 ; RV32ZVE32F-NEXT: # %bb.1: # %else
9430 ; RV32ZVE32F-NEXT: andi a0, a1, 2
9431 ; RV32ZVE32F-NEXT: bnez a0, .LBB86_10
9432 ; RV32ZVE32F-NEXT: .LBB86_2: # %else2
9433 ; RV32ZVE32F-NEXT: andi a0, a1, 4
9434 ; RV32ZVE32F-NEXT: bnez a0, .LBB86_11
9435 ; RV32ZVE32F-NEXT: .LBB86_3: # %else4
9436 ; RV32ZVE32F-NEXT: andi a0, a1, 8
9437 ; RV32ZVE32F-NEXT: bnez a0, .LBB86_12
9438 ; RV32ZVE32F-NEXT: .LBB86_4: # %else6
9439 ; RV32ZVE32F-NEXT: andi a0, a1, 16
9440 ; RV32ZVE32F-NEXT: bnez a0, .LBB86_13
9441 ; RV32ZVE32F-NEXT: .LBB86_5: # %else8
9442 ; RV32ZVE32F-NEXT: andi a0, a1, 32
9443 ; RV32ZVE32F-NEXT: bnez a0, .LBB86_14
9444 ; RV32ZVE32F-NEXT: .LBB86_6: # %else10
9445 ; RV32ZVE32F-NEXT: andi a0, a1, 64
9446 ; RV32ZVE32F-NEXT: bnez a0, .LBB86_15
9447 ; RV32ZVE32F-NEXT: .LBB86_7: # %else12
9448 ; RV32ZVE32F-NEXT: andi a0, a1, -128
9449 ; RV32ZVE32F-NEXT: bnez a0, .LBB86_16
9450 ; RV32ZVE32F-NEXT: .LBB86_8: # %else14
9451 ; RV32ZVE32F-NEXT: ret
9452 ; RV32ZVE32F-NEXT: .LBB86_9: # %cond.store
9453 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
9454 ; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
9455 ; RV32ZVE32F-NEXT: andi a0, a1, 2
9456 ; RV32ZVE32F-NEXT: beqz a0, .LBB86_2
9457 ; RV32ZVE32F-NEXT: .LBB86_10: # %cond.store1
9458 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
9459 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
9460 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
9461 ; RV32ZVE32F-NEXT: fsd fa1, 0(a0)
9462 ; RV32ZVE32F-NEXT: andi a0, a1, 4
9463 ; RV32ZVE32F-NEXT: beqz a0, .LBB86_3
9464 ; RV32ZVE32F-NEXT: .LBB86_11: # %cond.store3
9465 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
9466 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
9467 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
9468 ; RV32ZVE32F-NEXT: fsd fa2, 0(a0)
9469 ; RV32ZVE32F-NEXT: andi a0, a1, 8
9470 ; RV32ZVE32F-NEXT: beqz a0, .LBB86_4
9471 ; RV32ZVE32F-NEXT: .LBB86_12: # %cond.store5
9472 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
9473 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
9474 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
9475 ; RV32ZVE32F-NEXT: fsd fa3, 0(a0)
9476 ; RV32ZVE32F-NEXT: andi a0, a1, 16
9477 ; RV32ZVE32F-NEXT: beqz a0, .LBB86_5
9478 ; RV32ZVE32F-NEXT: .LBB86_13: # %cond.store7
9479 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
9480 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
9481 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
9482 ; RV32ZVE32F-NEXT: fsd fa4, 0(a0)
9483 ; RV32ZVE32F-NEXT: andi a0, a1, 32
9484 ; RV32ZVE32F-NEXT: beqz a0, .LBB86_6
9485 ; RV32ZVE32F-NEXT: .LBB86_14: # %cond.store9
9486 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
9487 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
9488 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
9489 ; RV32ZVE32F-NEXT: fsd fa5, 0(a0)
9490 ; RV32ZVE32F-NEXT: andi a0, a1, 64
9491 ; RV32ZVE32F-NEXT: beqz a0, .LBB86_7
9492 ; RV32ZVE32F-NEXT: .LBB86_15: # %cond.store11
9493 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
9494 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
9495 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
9496 ; RV32ZVE32F-NEXT: fsd fa6, 0(a0)
9497 ; RV32ZVE32F-NEXT: andi a0, a1, -128
9498 ; RV32ZVE32F-NEXT: beqz a0, .LBB86_8
9499 ; RV32ZVE32F-NEXT: .LBB86_16: # %cond.store13
9500 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
9501 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
9502 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
9503 ; RV32ZVE32F-NEXT: fsd fa7, 0(a0)
9504 ; RV32ZVE32F-NEXT: ret
9506 ; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i16_v8f64:
9507 ; RV64ZVE32F: # %bb.0:
9508 ; RV64ZVE32F-NEXT: lui a1, 16
9509 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
9510 ; RV64ZVE32F-NEXT: vmv.x.s a2, v0
9511 ; RV64ZVE32F-NEXT: andi a3, a2, 1
9512 ; RV64ZVE32F-NEXT: addiw a1, a1, -1
9513 ; RV64ZVE32F-NEXT: beqz a3, .LBB86_2
9514 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
9515 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma
9516 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
9517 ; RV64ZVE32F-NEXT: and a3, a3, a1
9518 ; RV64ZVE32F-NEXT: slli a3, a3, 3
9519 ; RV64ZVE32F-NEXT: add a3, a0, a3
9520 ; RV64ZVE32F-NEXT: fsd fa0, 0(a3)
9521 ; RV64ZVE32F-NEXT: .LBB86_2: # %else
9522 ; RV64ZVE32F-NEXT: andi a3, a2, 2
9523 ; RV64ZVE32F-NEXT: beqz a3, .LBB86_4
9524 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
9525 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
9526 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
9527 ; RV64ZVE32F-NEXT: vmv.x.s a3, v9
9528 ; RV64ZVE32F-NEXT: and a3, a3, a1
9529 ; RV64ZVE32F-NEXT: slli a3, a3, 3
9530 ; RV64ZVE32F-NEXT: add a3, a0, a3
9531 ; RV64ZVE32F-NEXT: fsd fa1, 0(a3)
9532 ; RV64ZVE32F-NEXT: .LBB86_4: # %else2
9533 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
9534 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
9535 ; RV64ZVE32F-NEXT: andi a3, a2, 4
9536 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
9537 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
9538 ; RV64ZVE32F-NEXT: bnez a3, .LBB86_12
9539 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
9540 ; RV64ZVE32F-NEXT: andi a3, a2, 8
9541 ; RV64ZVE32F-NEXT: bnez a3, .LBB86_13
9542 ; RV64ZVE32F-NEXT: .LBB86_6: # %else6
9543 ; RV64ZVE32F-NEXT: andi a3, a2, 16
9544 ; RV64ZVE32F-NEXT: bnez a3, .LBB86_14
9545 ; RV64ZVE32F-NEXT: .LBB86_7: # %else8
9546 ; RV64ZVE32F-NEXT: andi a3, a2, 32
9547 ; RV64ZVE32F-NEXT: beqz a3, .LBB86_9
9548 ; RV64ZVE32F-NEXT: .LBB86_8: # %cond.store9
9549 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
9550 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
9551 ; RV64ZVE32F-NEXT: and a3, a3, a1
9552 ; RV64ZVE32F-NEXT: slli a3, a3, 3
9553 ; RV64ZVE32F-NEXT: add a3, a0, a3
9554 ; RV64ZVE32F-NEXT: fsd fa5, 0(a3)
9555 ; RV64ZVE32F-NEXT: .LBB86_9: # %else10
9556 ; RV64ZVE32F-NEXT: andi a3, a2, 64
9557 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
9558 ; RV64ZVE32F-NEXT: bnez a3, .LBB86_15
9559 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
9560 ; RV64ZVE32F-NEXT: andi a2, a2, -128
9561 ; RV64ZVE32F-NEXT: bnez a2, .LBB86_16
9562 ; RV64ZVE32F-NEXT: .LBB86_11: # %else14
9563 ; RV64ZVE32F-NEXT: ret
9564 ; RV64ZVE32F-NEXT: .LBB86_12: # %cond.store3
9565 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
9566 ; RV64ZVE32F-NEXT: and a3, a3, a1
9567 ; RV64ZVE32F-NEXT: slli a3, a3, 3
9568 ; RV64ZVE32F-NEXT: add a3, a0, a3
9569 ; RV64ZVE32F-NEXT: fsd fa2, 0(a3)
9570 ; RV64ZVE32F-NEXT: andi a3, a2, 8
9571 ; RV64ZVE32F-NEXT: beqz a3, .LBB86_6
9572 ; RV64ZVE32F-NEXT: .LBB86_13: # %cond.store5
9573 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
9574 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
9575 ; RV64ZVE32F-NEXT: and a3, a3, a1
9576 ; RV64ZVE32F-NEXT: slli a3, a3, 3
9577 ; RV64ZVE32F-NEXT: add a3, a0, a3
9578 ; RV64ZVE32F-NEXT: fsd fa3, 0(a3)
9579 ; RV64ZVE32F-NEXT: andi a3, a2, 16
9580 ; RV64ZVE32F-NEXT: beqz a3, .LBB86_7
9581 ; RV64ZVE32F-NEXT: .LBB86_14: # %cond.store7
9582 ; RV64ZVE32F-NEXT: vmv.x.s a3, v9
9583 ; RV64ZVE32F-NEXT: and a3, a3, a1
9584 ; RV64ZVE32F-NEXT: slli a3, a3, 3
9585 ; RV64ZVE32F-NEXT: add a3, a0, a3
9586 ; RV64ZVE32F-NEXT: fsd fa4, 0(a3)
9587 ; RV64ZVE32F-NEXT: andi a3, a2, 32
9588 ; RV64ZVE32F-NEXT: bnez a3, .LBB86_8
9589 ; RV64ZVE32F-NEXT: j .LBB86_9
9590 ; RV64ZVE32F-NEXT: .LBB86_15: # %cond.store11
9591 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
9592 ; RV64ZVE32F-NEXT: and a3, a3, a1
9593 ; RV64ZVE32F-NEXT: slli a3, a3, 3
9594 ; RV64ZVE32F-NEXT: add a3, a0, a3
9595 ; RV64ZVE32F-NEXT: fsd fa6, 0(a3)
9596 ; RV64ZVE32F-NEXT: andi a2, a2, -128
9597 ; RV64ZVE32F-NEXT: beqz a2, .LBB86_11
9598 ; RV64ZVE32F-NEXT: .LBB86_16: # %cond.store13
9599 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
9600 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
9601 ; RV64ZVE32F-NEXT: and a1, a2, a1
9602 ; RV64ZVE32F-NEXT: slli a1, a1, 3
9603 ; RV64ZVE32F-NEXT: add a0, a0, a1
9604 ; RV64ZVE32F-NEXT: fsd fa7, 0(a0)
9605 ; RV64ZVE32F-NEXT: ret
9606 %eidxs = zext <8 x i16> %idxs to <8 x i64>
9607 %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
9608 call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m)
9612 define void @mscatter_baseidx_v8i32_v8f64(<8 x double> %val, ptr %base, <8 x i32> %idxs, <8 x i1> %m) {
9613 ; RV32V-LABEL: mscatter_baseidx_v8i32_v8f64:
9615 ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
9616 ; RV32V-NEXT: vsll.vi v12, v12, 3
9617 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
9618 ; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t
9621 ; RV64-LABEL: mscatter_baseidx_v8i32_v8f64:
9623 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
9624 ; RV64-NEXT: vsext.vf2 v16, v12
9625 ; RV64-NEXT: vsll.vi v12, v16, 3
9626 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
9629 ; RV32ZVE32F-LABEL: mscatter_baseidx_v8i32_v8f64:
9630 ; RV32ZVE32F: # %bb.0:
9631 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
9632 ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3
9633 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
9634 ; RV32ZVE32F-NEXT: vmv.x.s a1, v0
9635 ; RV32ZVE32F-NEXT: andi a2, a1, 1
9636 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
9637 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0
9638 ; RV32ZVE32F-NEXT: bnez a2, .LBB87_9
9639 ; RV32ZVE32F-NEXT: # %bb.1: # %else
9640 ; RV32ZVE32F-NEXT: andi a0, a1, 2
9641 ; RV32ZVE32F-NEXT: bnez a0, .LBB87_10
9642 ; RV32ZVE32F-NEXT: .LBB87_2: # %else2
9643 ; RV32ZVE32F-NEXT: andi a0, a1, 4
9644 ; RV32ZVE32F-NEXT: bnez a0, .LBB87_11
9645 ; RV32ZVE32F-NEXT: .LBB87_3: # %else4
9646 ; RV32ZVE32F-NEXT: andi a0, a1, 8
9647 ; RV32ZVE32F-NEXT: bnez a0, .LBB87_12
9648 ; RV32ZVE32F-NEXT: .LBB87_4: # %else6
9649 ; RV32ZVE32F-NEXT: andi a0, a1, 16
9650 ; RV32ZVE32F-NEXT: bnez a0, .LBB87_13
9651 ; RV32ZVE32F-NEXT: .LBB87_5: # %else8
9652 ; RV32ZVE32F-NEXT: andi a0, a1, 32
9653 ; RV32ZVE32F-NEXT: bnez a0, .LBB87_14
9654 ; RV32ZVE32F-NEXT: .LBB87_6: # %else10
9655 ; RV32ZVE32F-NEXT: andi a0, a1, 64
9656 ; RV32ZVE32F-NEXT: bnez a0, .LBB87_15
9657 ; RV32ZVE32F-NEXT: .LBB87_7: # %else12
9658 ; RV32ZVE32F-NEXT: andi a0, a1, -128
9659 ; RV32ZVE32F-NEXT: bnez a0, .LBB87_16
9660 ; RV32ZVE32F-NEXT: .LBB87_8: # %else14
9661 ; RV32ZVE32F-NEXT: ret
9662 ; RV32ZVE32F-NEXT: .LBB87_9: # %cond.store
9663 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
9664 ; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
9665 ; RV32ZVE32F-NEXT: andi a0, a1, 2
9666 ; RV32ZVE32F-NEXT: beqz a0, .LBB87_2
9667 ; RV32ZVE32F-NEXT: .LBB87_10: # %cond.store1
9668 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
9669 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
9670 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
9671 ; RV32ZVE32F-NEXT: fsd fa1, 0(a0)
9672 ; RV32ZVE32F-NEXT: andi a0, a1, 4
9673 ; RV32ZVE32F-NEXT: beqz a0, .LBB87_3
9674 ; RV32ZVE32F-NEXT: .LBB87_11: # %cond.store3
9675 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
9676 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
9677 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
9678 ; RV32ZVE32F-NEXT: fsd fa2, 0(a0)
9679 ; RV32ZVE32F-NEXT: andi a0, a1, 8
9680 ; RV32ZVE32F-NEXT: beqz a0, .LBB87_4
9681 ; RV32ZVE32F-NEXT: .LBB87_12: # %cond.store5
9682 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
9683 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
9684 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
9685 ; RV32ZVE32F-NEXT: fsd fa3, 0(a0)
9686 ; RV32ZVE32F-NEXT: andi a0, a1, 16
9687 ; RV32ZVE32F-NEXT: beqz a0, .LBB87_5
9688 ; RV32ZVE32F-NEXT: .LBB87_13: # %cond.store7
9689 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
9690 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
9691 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
9692 ; RV32ZVE32F-NEXT: fsd fa4, 0(a0)
9693 ; RV32ZVE32F-NEXT: andi a0, a1, 32
9694 ; RV32ZVE32F-NEXT: beqz a0, .LBB87_6
9695 ; RV32ZVE32F-NEXT: .LBB87_14: # %cond.store9
9696 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
9697 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
9698 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
9699 ; RV32ZVE32F-NEXT: fsd fa5, 0(a0)
9700 ; RV32ZVE32F-NEXT: andi a0, a1, 64
9701 ; RV32ZVE32F-NEXT: beqz a0, .LBB87_7
9702 ; RV32ZVE32F-NEXT: .LBB87_15: # %cond.store11
9703 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
9704 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
9705 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
9706 ; RV32ZVE32F-NEXT: fsd fa6, 0(a0)
9707 ; RV32ZVE32F-NEXT: andi a0, a1, -128
9708 ; RV32ZVE32F-NEXT: beqz a0, .LBB87_8
9709 ; RV32ZVE32F-NEXT: .LBB87_16: # %cond.store13
9710 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
9711 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
9712 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
9713 ; RV32ZVE32F-NEXT: fsd fa7, 0(a0)
9714 ; RV32ZVE32F-NEXT: ret
9716 ; RV64ZVE32F-LABEL: mscatter_baseidx_v8i32_v8f64:
9717 ; RV64ZVE32F: # %bb.0:
9718 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
9719 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
9720 ; RV64ZVE32F-NEXT: andi a2, a1, 1
9721 ; RV64ZVE32F-NEXT: beqz a2, .LBB87_2
9722 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
9723 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
9724 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
9725 ; RV64ZVE32F-NEXT: slli a2, a2, 3
9726 ; RV64ZVE32F-NEXT: add a2, a0, a2
9727 ; RV64ZVE32F-NEXT: fsd fa0, 0(a2)
9728 ; RV64ZVE32F-NEXT: .LBB87_2: # %else
9729 ; RV64ZVE32F-NEXT: andi a2, a1, 2
9730 ; RV64ZVE32F-NEXT: beqz a2, .LBB87_4
9731 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
9732 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
9733 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
9734 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
9735 ; RV64ZVE32F-NEXT: slli a2, a2, 3
9736 ; RV64ZVE32F-NEXT: add a2, a0, a2
9737 ; RV64ZVE32F-NEXT: fsd fa1, 0(a2)
9738 ; RV64ZVE32F-NEXT: .LBB87_4: # %else2
9739 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
9740 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
9741 ; RV64ZVE32F-NEXT: andi a2, a1, 4
9742 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
9743 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
9744 ; RV64ZVE32F-NEXT: bnez a2, .LBB87_12
9745 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
9746 ; RV64ZVE32F-NEXT: andi a2, a1, 8
9747 ; RV64ZVE32F-NEXT: bnez a2, .LBB87_13
9748 ; RV64ZVE32F-NEXT: .LBB87_6: # %else6
9749 ; RV64ZVE32F-NEXT: andi a2, a1, 16
9750 ; RV64ZVE32F-NEXT: bnez a2, .LBB87_14
9751 ; RV64ZVE32F-NEXT: .LBB87_7: # %else8
9752 ; RV64ZVE32F-NEXT: andi a2, a1, 32
9753 ; RV64ZVE32F-NEXT: beqz a2, .LBB87_9
9754 ; RV64ZVE32F-NEXT: .LBB87_8: # %cond.store9
9755 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
9756 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
9757 ; RV64ZVE32F-NEXT: slli a2, a2, 3
9758 ; RV64ZVE32F-NEXT: add a2, a0, a2
9759 ; RV64ZVE32F-NEXT: fsd fa5, 0(a2)
9760 ; RV64ZVE32F-NEXT: .LBB87_9: # %else10
9761 ; RV64ZVE32F-NEXT: andi a2, a1, 64
9762 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
9763 ; RV64ZVE32F-NEXT: bnez a2, .LBB87_15
9764 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
9765 ; RV64ZVE32F-NEXT: andi a1, a1, -128
9766 ; RV64ZVE32F-NEXT: bnez a1, .LBB87_16
9767 ; RV64ZVE32F-NEXT: .LBB87_11: # %else14
9768 ; RV64ZVE32F-NEXT: ret
9769 ; RV64ZVE32F-NEXT: .LBB87_12: # %cond.store3
9770 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
9771 ; RV64ZVE32F-NEXT: slli a2, a2, 3
9772 ; RV64ZVE32F-NEXT: add a2, a0, a2
9773 ; RV64ZVE32F-NEXT: fsd fa2, 0(a2)
9774 ; RV64ZVE32F-NEXT: andi a2, a1, 8
9775 ; RV64ZVE32F-NEXT: beqz a2, .LBB87_6
9776 ; RV64ZVE32F-NEXT: .LBB87_13: # %cond.store5
9777 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
9778 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
9779 ; RV64ZVE32F-NEXT: slli a2, a2, 3
9780 ; RV64ZVE32F-NEXT: add a2, a0, a2
9781 ; RV64ZVE32F-NEXT: fsd fa3, 0(a2)
9782 ; RV64ZVE32F-NEXT: andi a2, a1, 16
9783 ; RV64ZVE32F-NEXT: beqz a2, .LBB87_7
9784 ; RV64ZVE32F-NEXT: .LBB87_14: # %cond.store7
9785 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
9786 ; RV64ZVE32F-NEXT: slli a2, a2, 3
9787 ; RV64ZVE32F-NEXT: add a2, a0, a2
9788 ; RV64ZVE32F-NEXT: fsd fa4, 0(a2)
9789 ; RV64ZVE32F-NEXT: andi a2, a1, 32
9790 ; RV64ZVE32F-NEXT: bnez a2, .LBB87_8
9791 ; RV64ZVE32F-NEXT: j .LBB87_9
9792 ; RV64ZVE32F-NEXT: .LBB87_15: # %cond.store11
9793 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
9794 ; RV64ZVE32F-NEXT: slli a2, a2, 3
9795 ; RV64ZVE32F-NEXT: add a2, a0, a2
9796 ; RV64ZVE32F-NEXT: fsd fa6, 0(a2)
9797 ; RV64ZVE32F-NEXT: andi a1, a1, -128
9798 ; RV64ZVE32F-NEXT: beqz a1, .LBB87_11
9799 ; RV64ZVE32F-NEXT: .LBB87_16: # %cond.store13
9800 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
9801 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8
9802 ; RV64ZVE32F-NEXT: slli a1, a1, 3
9803 ; RV64ZVE32F-NEXT: add a0, a0, a1
9804 ; RV64ZVE32F-NEXT: fsd fa7, 0(a0)
9805 ; RV64ZVE32F-NEXT: ret
9806 %ptrs = getelementptr inbounds double, ptr %base, <8 x i32> %idxs
9807 call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m)
9811 define void @mscatter_baseidx_sext_v8i32_v8f64(<8 x double> %val, ptr %base, <8 x i32> %idxs, <8 x i1> %m) {
9812 ; RV32V-LABEL: mscatter_baseidx_sext_v8i32_v8f64:
9814 ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
9815 ; RV32V-NEXT: vsll.vi v12, v12, 3
9816 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
9817 ; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t
9820 ; RV64-LABEL: mscatter_baseidx_sext_v8i32_v8f64:
9822 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
9823 ; RV64-NEXT: vsext.vf2 v16, v12
9824 ; RV64-NEXT: vsll.vi v12, v16, 3
9825 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
9828 ; RV32ZVE32F-LABEL: mscatter_baseidx_sext_v8i32_v8f64:
9829 ; RV32ZVE32F: # %bb.0:
9830 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
9831 ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3
9832 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
9833 ; RV32ZVE32F-NEXT: vmv.x.s a1, v0
9834 ; RV32ZVE32F-NEXT: andi a2, a1, 1
9835 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
9836 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0
9837 ; RV32ZVE32F-NEXT: bnez a2, .LBB88_9
9838 ; RV32ZVE32F-NEXT: # %bb.1: # %else
9839 ; RV32ZVE32F-NEXT: andi a0, a1, 2
9840 ; RV32ZVE32F-NEXT: bnez a0, .LBB88_10
9841 ; RV32ZVE32F-NEXT: .LBB88_2: # %else2
9842 ; RV32ZVE32F-NEXT: andi a0, a1, 4
9843 ; RV32ZVE32F-NEXT: bnez a0, .LBB88_11
9844 ; RV32ZVE32F-NEXT: .LBB88_3: # %else4
9845 ; RV32ZVE32F-NEXT: andi a0, a1, 8
9846 ; RV32ZVE32F-NEXT: bnez a0, .LBB88_12
9847 ; RV32ZVE32F-NEXT: .LBB88_4: # %else6
9848 ; RV32ZVE32F-NEXT: andi a0, a1, 16
9849 ; RV32ZVE32F-NEXT: bnez a0, .LBB88_13
9850 ; RV32ZVE32F-NEXT: .LBB88_5: # %else8
9851 ; RV32ZVE32F-NEXT: andi a0, a1, 32
9852 ; RV32ZVE32F-NEXT: bnez a0, .LBB88_14
9853 ; RV32ZVE32F-NEXT: .LBB88_6: # %else10
9854 ; RV32ZVE32F-NEXT: andi a0, a1, 64
9855 ; RV32ZVE32F-NEXT: bnez a0, .LBB88_15
9856 ; RV32ZVE32F-NEXT: .LBB88_7: # %else12
9857 ; RV32ZVE32F-NEXT: andi a0, a1, -128
9858 ; RV32ZVE32F-NEXT: bnez a0, .LBB88_16
9859 ; RV32ZVE32F-NEXT: .LBB88_8: # %else14
9860 ; RV32ZVE32F-NEXT: ret
9861 ; RV32ZVE32F-NEXT: .LBB88_9: # %cond.store
9862 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
9863 ; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
9864 ; RV32ZVE32F-NEXT: andi a0, a1, 2
9865 ; RV32ZVE32F-NEXT: beqz a0, .LBB88_2
9866 ; RV32ZVE32F-NEXT: .LBB88_10: # %cond.store1
9867 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
9868 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
9869 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
9870 ; RV32ZVE32F-NEXT: fsd fa1, 0(a0)
9871 ; RV32ZVE32F-NEXT: andi a0, a1, 4
9872 ; RV32ZVE32F-NEXT: beqz a0, .LBB88_3
9873 ; RV32ZVE32F-NEXT: .LBB88_11: # %cond.store3
9874 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
9875 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
9876 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
9877 ; RV32ZVE32F-NEXT: fsd fa2, 0(a0)
9878 ; RV32ZVE32F-NEXT: andi a0, a1, 8
9879 ; RV32ZVE32F-NEXT: beqz a0, .LBB88_4
9880 ; RV32ZVE32F-NEXT: .LBB88_12: # %cond.store5
9881 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
9882 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
9883 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
9884 ; RV32ZVE32F-NEXT: fsd fa3, 0(a0)
9885 ; RV32ZVE32F-NEXT: andi a0, a1, 16
9886 ; RV32ZVE32F-NEXT: beqz a0, .LBB88_5
9887 ; RV32ZVE32F-NEXT: .LBB88_13: # %cond.store7
9888 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
9889 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
9890 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
9891 ; RV32ZVE32F-NEXT: fsd fa4, 0(a0)
9892 ; RV32ZVE32F-NEXT: andi a0, a1, 32
9893 ; RV32ZVE32F-NEXT: beqz a0, .LBB88_6
9894 ; RV32ZVE32F-NEXT: .LBB88_14: # %cond.store9
9895 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
9896 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
9897 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
9898 ; RV32ZVE32F-NEXT: fsd fa5, 0(a0)
9899 ; RV32ZVE32F-NEXT: andi a0, a1, 64
9900 ; RV32ZVE32F-NEXT: beqz a0, .LBB88_7
9901 ; RV32ZVE32F-NEXT: .LBB88_15: # %cond.store11
9902 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
9903 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
9904 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
9905 ; RV32ZVE32F-NEXT: fsd fa6, 0(a0)
9906 ; RV32ZVE32F-NEXT: andi a0, a1, -128
9907 ; RV32ZVE32F-NEXT: beqz a0, .LBB88_8
9908 ; RV32ZVE32F-NEXT: .LBB88_16: # %cond.store13
9909 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
9910 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
9911 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
9912 ; RV32ZVE32F-NEXT: fsd fa7, 0(a0)
9913 ; RV32ZVE32F-NEXT: ret
9915 ; RV64ZVE32F-LABEL: mscatter_baseidx_sext_v8i32_v8f64:
9916 ; RV64ZVE32F: # %bb.0:
9917 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
9918 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
9919 ; RV64ZVE32F-NEXT: andi a2, a1, 1
9920 ; RV64ZVE32F-NEXT: beqz a2, .LBB88_2
9921 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
9922 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
9923 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
9924 ; RV64ZVE32F-NEXT: slli a2, a2, 3
9925 ; RV64ZVE32F-NEXT: add a2, a0, a2
9926 ; RV64ZVE32F-NEXT: fsd fa0, 0(a2)
9927 ; RV64ZVE32F-NEXT: .LBB88_2: # %else
9928 ; RV64ZVE32F-NEXT: andi a2, a1, 2
9929 ; RV64ZVE32F-NEXT: beqz a2, .LBB88_4
9930 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
9931 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
9932 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
9933 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
9934 ; RV64ZVE32F-NEXT: slli a2, a2, 3
9935 ; RV64ZVE32F-NEXT: add a2, a0, a2
9936 ; RV64ZVE32F-NEXT: fsd fa1, 0(a2)
9937 ; RV64ZVE32F-NEXT: .LBB88_4: # %else2
9938 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
9939 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
9940 ; RV64ZVE32F-NEXT: andi a2, a1, 4
9941 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
9942 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
9943 ; RV64ZVE32F-NEXT: bnez a2, .LBB88_12
9944 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
9945 ; RV64ZVE32F-NEXT: andi a2, a1, 8
9946 ; RV64ZVE32F-NEXT: bnez a2, .LBB88_13
9947 ; RV64ZVE32F-NEXT: .LBB88_6: # %else6
9948 ; RV64ZVE32F-NEXT: andi a2, a1, 16
9949 ; RV64ZVE32F-NEXT: bnez a2, .LBB88_14
9950 ; RV64ZVE32F-NEXT: .LBB88_7: # %else8
9951 ; RV64ZVE32F-NEXT: andi a2, a1, 32
9952 ; RV64ZVE32F-NEXT: beqz a2, .LBB88_9
9953 ; RV64ZVE32F-NEXT: .LBB88_8: # %cond.store9
9954 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
9955 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
9956 ; RV64ZVE32F-NEXT: slli a2, a2, 3
9957 ; RV64ZVE32F-NEXT: add a2, a0, a2
9958 ; RV64ZVE32F-NEXT: fsd fa5, 0(a2)
9959 ; RV64ZVE32F-NEXT: .LBB88_9: # %else10
9960 ; RV64ZVE32F-NEXT: andi a2, a1, 64
9961 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
9962 ; RV64ZVE32F-NEXT: bnez a2, .LBB88_15
9963 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
9964 ; RV64ZVE32F-NEXT: andi a1, a1, -128
9965 ; RV64ZVE32F-NEXT: bnez a1, .LBB88_16
9966 ; RV64ZVE32F-NEXT: .LBB88_11: # %else14
9967 ; RV64ZVE32F-NEXT: ret
9968 ; RV64ZVE32F-NEXT: .LBB88_12: # %cond.store3
9969 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
9970 ; RV64ZVE32F-NEXT: slli a2, a2, 3
9971 ; RV64ZVE32F-NEXT: add a2, a0, a2
9972 ; RV64ZVE32F-NEXT: fsd fa2, 0(a2)
9973 ; RV64ZVE32F-NEXT: andi a2, a1, 8
9974 ; RV64ZVE32F-NEXT: beqz a2, .LBB88_6
9975 ; RV64ZVE32F-NEXT: .LBB88_13: # %cond.store5
9976 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
9977 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
9978 ; RV64ZVE32F-NEXT: slli a2, a2, 3
9979 ; RV64ZVE32F-NEXT: add a2, a0, a2
9980 ; RV64ZVE32F-NEXT: fsd fa3, 0(a2)
9981 ; RV64ZVE32F-NEXT: andi a2, a1, 16
9982 ; RV64ZVE32F-NEXT: beqz a2, .LBB88_7
9983 ; RV64ZVE32F-NEXT: .LBB88_14: # %cond.store7
9984 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
9985 ; RV64ZVE32F-NEXT: slli a2, a2, 3
9986 ; RV64ZVE32F-NEXT: add a2, a0, a2
9987 ; RV64ZVE32F-NEXT: fsd fa4, 0(a2)
9988 ; RV64ZVE32F-NEXT: andi a2, a1, 32
9989 ; RV64ZVE32F-NEXT: bnez a2, .LBB88_8
9990 ; RV64ZVE32F-NEXT: j .LBB88_9
9991 ; RV64ZVE32F-NEXT: .LBB88_15: # %cond.store11
9992 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
9993 ; RV64ZVE32F-NEXT: slli a2, a2, 3
9994 ; RV64ZVE32F-NEXT: add a2, a0, a2
9995 ; RV64ZVE32F-NEXT: fsd fa6, 0(a2)
9996 ; RV64ZVE32F-NEXT: andi a1, a1, -128
9997 ; RV64ZVE32F-NEXT: beqz a1, .LBB88_11
9998 ; RV64ZVE32F-NEXT: .LBB88_16: # %cond.store13
9999 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
10000 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8
10001 ; RV64ZVE32F-NEXT: slli a1, a1, 3
10002 ; RV64ZVE32F-NEXT: add a0, a0, a1
10003 ; RV64ZVE32F-NEXT: fsd fa7, 0(a0)
10004 ; RV64ZVE32F-NEXT: ret
10005 %eidxs = sext <8 x i32> %idxs to <8 x i64>
10006 %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
10007 call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m)
10011 define void @mscatter_baseidx_zext_v8i32_v8f64(<8 x double> %val, ptr %base, <8 x i32> %idxs, <8 x i1> %m) {
10012 ; RV32V-LABEL: mscatter_baseidx_zext_v8i32_v8f64:
10014 ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
10015 ; RV32V-NEXT: vsll.vi v12, v12, 3
10016 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
10017 ; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t
10020 ; RV64-LABEL: mscatter_baseidx_zext_v8i32_v8f64:
10022 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
10023 ; RV64-NEXT: vzext.vf2 v16, v12
10024 ; RV64-NEXT: vsll.vi v12, v16, 3
10025 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
10028 ; RV32ZVE32F-LABEL: mscatter_baseidx_zext_v8i32_v8f64:
10029 ; RV32ZVE32F: # %bb.0:
10030 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
10031 ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3
10032 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
10033 ; RV32ZVE32F-NEXT: vmv.x.s a1, v0
10034 ; RV32ZVE32F-NEXT: andi a2, a1, 1
10035 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
10036 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0
10037 ; RV32ZVE32F-NEXT: bnez a2, .LBB89_9
10038 ; RV32ZVE32F-NEXT: # %bb.1: # %else
10039 ; RV32ZVE32F-NEXT: andi a0, a1, 2
10040 ; RV32ZVE32F-NEXT: bnez a0, .LBB89_10
10041 ; RV32ZVE32F-NEXT: .LBB89_2: # %else2
10042 ; RV32ZVE32F-NEXT: andi a0, a1, 4
10043 ; RV32ZVE32F-NEXT: bnez a0, .LBB89_11
10044 ; RV32ZVE32F-NEXT: .LBB89_3: # %else4
10045 ; RV32ZVE32F-NEXT: andi a0, a1, 8
10046 ; RV32ZVE32F-NEXT: bnez a0, .LBB89_12
10047 ; RV32ZVE32F-NEXT: .LBB89_4: # %else6
10048 ; RV32ZVE32F-NEXT: andi a0, a1, 16
10049 ; RV32ZVE32F-NEXT: bnez a0, .LBB89_13
10050 ; RV32ZVE32F-NEXT: .LBB89_5: # %else8
10051 ; RV32ZVE32F-NEXT: andi a0, a1, 32
10052 ; RV32ZVE32F-NEXT: bnez a0, .LBB89_14
10053 ; RV32ZVE32F-NEXT: .LBB89_6: # %else10
10054 ; RV32ZVE32F-NEXT: andi a0, a1, 64
10055 ; RV32ZVE32F-NEXT: bnez a0, .LBB89_15
10056 ; RV32ZVE32F-NEXT: .LBB89_7: # %else12
10057 ; RV32ZVE32F-NEXT: andi a0, a1, -128
10058 ; RV32ZVE32F-NEXT: bnez a0, .LBB89_16
10059 ; RV32ZVE32F-NEXT: .LBB89_8: # %else14
10060 ; RV32ZVE32F-NEXT: ret
10061 ; RV32ZVE32F-NEXT: .LBB89_9: # %cond.store
10062 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
10063 ; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
10064 ; RV32ZVE32F-NEXT: andi a0, a1, 2
10065 ; RV32ZVE32F-NEXT: beqz a0, .LBB89_2
10066 ; RV32ZVE32F-NEXT: .LBB89_10: # %cond.store1
10067 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
10068 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
10069 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
10070 ; RV32ZVE32F-NEXT: fsd fa1, 0(a0)
10071 ; RV32ZVE32F-NEXT: andi a0, a1, 4
10072 ; RV32ZVE32F-NEXT: beqz a0, .LBB89_3
10073 ; RV32ZVE32F-NEXT: .LBB89_11: # %cond.store3
10074 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
10075 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
10076 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
10077 ; RV32ZVE32F-NEXT: fsd fa2, 0(a0)
10078 ; RV32ZVE32F-NEXT: andi a0, a1, 8
10079 ; RV32ZVE32F-NEXT: beqz a0, .LBB89_4
10080 ; RV32ZVE32F-NEXT: .LBB89_12: # %cond.store5
10081 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
10082 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
10083 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
10084 ; RV32ZVE32F-NEXT: fsd fa3, 0(a0)
10085 ; RV32ZVE32F-NEXT: andi a0, a1, 16
10086 ; RV32ZVE32F-NEXT: beqz a0, .LBB89_5
10087 ; RV32ZVE32F-NEXT: .LBB89_13: # %cond.store7
10088 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
10089 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
10090 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
10091 ; RV32ZVE32F-NEXT: fsd fa4, 0(a0)
10092 ; RV32ZVE32F-NEXT: andi a0, a1, 32
10093 ; RV32ZVE32F-NEXT: beqz a0, .LBB89_6
10094 ; RV32ZVE32F-NEXT: .LBB89_14: # %cond.store9
10095 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
10096 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
10097 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
10098 ; RV32ZVE32F-NEXT: fsd fa5, 0(a0)
10099 ; RV32ZVE32F-NEXT: andi a0, a1, 64
10100 ; RV32ZVE32F-NEXT: beqz a0, .LBB89_7
10101 ; RV32ZVE32F-NEXT: .LBB89_15: # %cond.store11
10102 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
10103 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
10104 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
10105 ; RV32ZVE32F-NEXT: fsd fa6, 0(a0)
10106 ; RV32ZVE32F-NEXT: andi a0, a1, -128
10107 ; RV32ZVE32F-NEXT: beqz a0, .LBB89_8
10108 ; RV32ZVE32F-NEXT: .LBB89_16: # %cond.store13
10109 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
10110 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
10111 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
10112 ; RV32ZVE32F-NEXT: fsd fa7, 0(a0)
10113 ; RV32ZVE32F-NEXT: ret
10115 ; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i32_v8f64:
10116 ; RV64ZVE32F: # %bb.0:
10117 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10118 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
10119 ; RV64ZVE32F-NEXT: andi a2, a1, 1
10120 ; RV64ZVE32F-NEXT: beqz a2, .LBB89_2
10121 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
10122 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma
10123 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
10124 ; RV64ZVE32F-NEXT: slli a2, a2, 32
10125 ; RV64ZVE32F-NEXT: srli a2, a2, 29
10126 ; RV64ZVE32F-NEXT: add a2, a0, a2
10127 ; RV64ZVE32F-NEXT: fsd fa0, 0(a2)
10128 ; RV64ZVE32F-NEXT: .LBB89_2: # %else
10129 ; RV64ZVE32F-NEXT: andi a2, a1, 2
10130 ; RV64ZVE32F-NEXT: beqz a2, .LBB89_4
10131 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
10132 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
10133 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
10134 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
10135 ; RV64ZVE32F-NEXT: slli a2, a2, 32
10136 ; RV64ZVE32F-NEXT: srli a2, a2, 29
10137 ; RV64ZVE32F-NEXT: add a2, a0, a2
10138 ; RV64ZVE32F-NEXT: fsd fa1, 0(a2)
10139 ; RV64ZVE32F-NEXT: .LBB89_4: # %else2
10140 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
10141 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
10142 ; RV64ZVE32F-NEXT: andi a2, a1, 4
10143 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
10144 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
10145 ; RV64ZVE32F-NEXT: bnez a2, .LBB89_12
10146 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
10147 ; RV64ZVE32F-NEXT: andi a2, a1, 8
10148 ; RV64ZVE32F-NEXT: bnez a2, .LBB89_13
10149 ; RV64ZVE32F-NEXT: .LBB89_6: # %else6
10150 ; RV64ZVE32F-NEXT: andi a2, a1, 16
10151 ; RV64ZVE32F-NEXT: bnez a2, .LBB89_14
10152 ; RV64ZVE32F-NEXT: .LBB89_7: # %else8
10153 ; RV64ZVE32F-NEXT: andi a2, a1, 32
10154 ; RV64ZVE32F-NEXT: beqz a2, .LBB89_9
10155 ; RV64ZVE32F-NEXT: .LBB89_8: # %cond.store9
10156 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
10157 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
10158 ; RV64ZVE32F-NEXT: slli a2, a2, 32
10159 ; RV64ZVE32F-NEXT: srli a2, a2, 29
10160 ; RV64ZVE32F-NEXT: add a2, a0, a2
10161 ; RV64ZVE32F-NEXT: fsd fa5, 0(a2)
10162 ; RV64ZVE32F-NEXT: .LBB89_9: # %else10
10163 ; RV64ZVE32F-NEXT: andi a2, a1, 64
10164 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
10165 ; RV64ZVE32F-NEXT: bnez a2, .LBB89_15
10166 ; RV64ZVE32F-NEXT: # %bb.10: # %else12
10167 ; RV64ZVE32F-NEXT: andi a1, a1, -128
10168 ; RV64ZVE32F-NEXT: bnez a1, .LBB89_16
10169 ; RV64ZVE32F-NEXT: .LBB89_11: # %else14
10170 ; RV64ZVE32F-NEXT: ret
10171 ; RV64ZVE32F-NEXT: .LBB89_12: # %cond.store3
10172 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
10173 ; RV64ZVE32F-NEXT: slli a2, a2, 32
10174 ; RV64ZVE32F-NEXT: srli a2, a2, 29
10175 ; RV64ZVE32F-NEXT: add a2, a0, a2
10176 ; RV64ZVE32F-NEXT: fsd fa2, 0(a2)
10177 ; RV64ZVE32F-NEXT: andi a2, a1, 8
10178 ; RV64ZVE32F-NEXT: beqz a2, .LBB89_6
10179 ; RV64ZVE32F-NEXT: .LBB89_13: # %cond.store5
10180 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
10181 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
10182 ; RV64ZVE32F-NEXT: slli a2, a2, 32
10183 ; RV64ZVE32F-NEXT: srli a2, a2, 29
10184 ; RV64ZVE32F-NEXT: add a2, a0, a2
10185 ; RV64ZVE32F-NEXT: fsd fa3, 0(a2)
10186 ; RV64ZVE32F-NEXT: andi a2, a1, 16
10187 ; RV64ZVE32F-NEXT: beqz a2, .LBB89_7
10188 ; RV64ZVE32F-NEXT: .LBB89_14: # %cond.store7
10189 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
10190 ; RV64ZVE32F-NEXT: slli a2, a2, 32
10191 ; RV64ZVE32F-NEXT: srli a2, a2, 29
10192 ; RV64ZVE32F-NEXT: add a2, a0, a2
10193 ; RV64ZVE32F-NEXT: fsd fa4, 0(a2)
10194 ; RV64ZVE32F-NEXT: andi a2, a1, 32
10195 ; RV64ZVE32F-NEXT: bnez a2, .LBB89_8
10196 ; RV64ZVE32F-NEXT: j .LBB89_9
10197 ; RV64ZVE32F-NEXT: .LBB89_15: # %cond.store11
10198 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
10199 ; RV64ZVE32F-NEXT: slli a2, a2, 32
10200 ; RV64ZVE32F-NEXT: srli a2, a2, 29
10201 ; RV64ZVE32F-NEXT: add a2, a0, a2
10202 ; RV64ZVE32F-NEXT: fsd fa6, 0(a2)
10203 ; RV64ZVE32F-NEXT: andi a1, a1, -128
10204 ; RV64ZVE32F-NEXT: beqz a1, .LBB89_11
10205 ; RV64ZVE32F-NEXT: .LBB89_16: # %cond.store13
10206 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
10207 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8
10208 ; RV64ZVE32F-NEXT: slli a1, a1, 32
10209 ; RV64ZVE32F-NEXT: srli a1, a1, 29
10210 ; RV64ZVE32F-NEXT: add a0, a0, a1
10211 ; RV64ZVE32F-NEXT: fsd fa7, 0(a0)
10212 ; RV64ZVE32F-NEXT: ret
10213 %eidxs = zext <8 x i32> %idxs to <8 x i64>
10214 %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
10215 call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m)
10219 define void @mscatter_baseidx_v8f64(<8 x double> %val, ptr %base, <8 x i64> %idxs, <8 x i1> %m) {
10220 ; RV32V-LABEL: mscatter_baseidx_v8f64:
10222 ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
10223 ; RV32V-NEXT: vnsrl.wi v16, v12, 0
10224 ; RV32V-NEXT: vsll.vi v12, v16, 3
10225 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
10226 ; RV32V-NEXT: vsoxei32.v v8, (a0), v12, v0.t
10229 ; RV64-LABEL: mscatter_baseidx_v8f64:
10231 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
10232 ; RV64-NEXT: vsll.vi v12, v12, 3
10233 ; RV64-NEXT: vsoxei64.v v8, (a0), v12, v0.t
10236 ; RV32ZVE32F-LABEL: mscatter_baseidx_v8f64:
10237 ; RV32ZVE32F: # %bb.0:
10238 ; RV32ZVE32F-NEXT: lw a2, 56(a1)
10239 ; RV32ZVE32F-NEXT: lw a3, 48(a1)
10240 ; RV32ZVE32F-NEXT: lw a4, 40(a1)
10241 ; RV32ZVE32F-NEXT: lw a5, 8(a1)
10242 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
10243 ; RV32ZVE32F-NEXT: vlse32.v v8, (a1), zero
10244 ; RV32ZVE32F-NEXT: lw a6, 16(a1)
10245 ; RV32ZVE32F-NEXT: lw a7, 24(a1)
10246 ; RV32ZVE32F-NEXT: lw a1, 32(a1)
10247 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a5
10248 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a6
10249 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a7
10250 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a1
10251 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a4
10252 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a3
10253 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a2
10254 ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3
10255 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
10256 ; RV32ZVE32F-NEXT: vmv.x.s a1, v0
10257 ; RV32ZVE32F-NEXT: andi a2, a1, 1
10258 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
10259 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0
10260 ; RV32ZVE32F-NEXT: bnez a2, .LBB90_9
10261 ; RV32ZVE32F-NEXT: # %bb.1: # %else
10262 ; RV32ZVE32F-NEXT: andi a0, a1, 2
10263 ; RV32ZVE32F-NEXT: bnez a0, .LBB90_10
10264 ; RV32ZVE32F-NEXT: .LBB90_2: # %else2
10265 ; RV32ZVE32F-NEXT: andi a0, a1, 4
10266 ; RV32ZVE32F-NEXT: bnez a0, .LBB90_11
10267 ; RV32ZVE32F-NEXT: .LBB90_3: # %else4
10268 ; RV32ZVE32F-NEXT: andi a0, a1, 8
10269 ; RV32ZVE32F-NEXT: bnez a0, .LBB90_12
10270 ; RV32ZVE32F-NEXT: .LBB90_4: # %else6
10271 ; RV32ZVE32F-NEXT: andi a0, a1, 16
10272 ; RV32ZVE32F-NEXT: bnez a0, .LBB90_13
10273 ; RV32ZVE32F-NEXT: .LBB90_5: # %else8
10274 ; RV32ZVE32F-NEXT: andi a0, a1, 32
10275 ; RV32ZVE32F-NEXT: bnez a0, .LBB90_14
10276 ; RV32ZVE32F-NEXT: .LBB90_6: # %else10
10277 ; RV32ZVE32F-NEXT: andi a0, a1, 64
10278 ; RV32ZVE32F-NEXT: bnez a0, .LBB90_15
10279 ; RV32ZVE32F-NEXT: .LBB90_7: # %else12
10280 ; RV32ZVE32F-NEXT: andi a0, a1, -128
10281 ; RV32ZVE32F-NEXT: bnez a0, .LBB90_16
10282 ; RV32ZVE32F-NEXT: .LBB90_8: # %else14
10283 ; RV32ZVE32F-NEXT: ret
10284 ; RV32ZVE32F-NEXT: .LBB90_9: # %cond.store
10285 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
10286 ; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
10287 ; RV32ZVE32F-NEXT: andi a0, a1, 2
10288 ; RV32ZVE32F-NEXT: beqz a0, .LBB90_2
10289 ; RV32ZVE32F-NEXT: .LBB90_10: # %cond.store1
10290 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
10291 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
10292 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
10293 ; RV32ZVE32F-NEXT: fsd fa1, 0(a0)
10294 ; RV32ZVE32F-NEXT: andi a0, a1, 4
10295 ; RV32ZVE32F-NEXT: beqz a0, .LBB90_3
10296 ; RV32ZVE32F-NEXT: .LBB90_11: # %cond.store3
10297 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
10298 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
10299 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
10300 ; RV32ZVE32F-NEXT: fsd fa2, 0(a0)
10301 ; RV32ZVE32F-NEXT: andi a0, a1, 8
10302 ; RV32ZVE32F-NEXT: beqz a0, .LBB90_4
10303 ; RV32ZVE32F-NEXT: .LBB90_12: # %cond.store5
10304 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
10305 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
10306 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
10307 ; RV32ZVE32F-NEXT: fsd fa3, 0(a0)
10308 ; RV32ZVE32F-NEXT: andi a0, a1, 16
10309 ; RV32ZVE32F-NEXT: beqz a0, .LBB90_5
10310 ; RV32ZVE32F-NEXT: .LBB90_13: # %cond.store7
10311 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
10312 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
10313 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
10314 ; RV32ZVE32F-NEXT: fsd fa4, 0(a0)
10315 ; RV32ZVE32F-NEXT: andi a0, a1, 32
10316 ; RV32ZVE32F-NEXT: beqz a0, .LBB90_6
10317 ; RV32ZVE32F-NEXT: .LBB90_14: # %cond.store9
10318 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
10319 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
10320 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
10321 ; RV32ZVE32F-NEXT: fsd fa5, 0(a0)
10322 ; RV32ZVE32F-NEXT: andi a0, a1, 64
10323 ; RV32ZVE32F-NEXT: beqz a0, .LBB90_7
10324 ; RV32ZVE32F-NEXT: .LBB90_15: # %cond.store11
10325 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
10326 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
10327 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
10328 ; RV32ZVE32F-NEXT: fsd fa6, 0(a0)
10329 ; RV32ZVE32F-NEXT: andi a0, a1, -128
10330 ; RV32ZVE32F-NEXT: beqz a0, .LBB90_8
10331 ; RV32ZVE32F-NEXT: .LBB90_16: # %cond.store13
10332 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
10333 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
10334 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
10335 ; RV32ZVE32F-NEXT: fsd fa7, 0(a0)
10336 ; RV32ZVE32F-NEXT: ret
10338 ; RV64ZVE32F-LABEL: mscatter_baseidx_v8f64:
10339 ; RV64ZVE32F: # %bb.0:
10340 ; RV64ZVE32F-NEXT: ld t1, 8(a1)
10341 ; RV64ZVE32F-NEXT: ld t0, 16(a1)
10342 ; RV64ZVE32F-NEXT: ld a7, 24(a1)
10343 ; RV64ZVE32F-NEXT: ld a6, 32(a1)
10344 ; RV64ZVE32F-NEXT: ld a5, 40(a1)
10345 ; RV64ZVE32F-NEXT: ld a4, 48(a1)
10346 ; RV64ZVE32F-NEXT: ld a2, 56(a1)
10347 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10348 ; RV64ZVE32F-NEXT: vmv.x.s a3, v0
10349 ; RV64ZVE32F-NEXT: andi t2, a3, 1
10350 ; RV64ZVE32F-NEXT: bnez t2, .LBB90_9
10351 ; RV64ZVE32F-NEXT: # %bb.1: # %else
10352 ; RV64ZVE32F-NEXT: andi a1, a3, 2
10353 ; RV64ZVE32F-NEXT: bnez a1, .LBB90_10
10354 ; RV64ZVE32F-NEXT: .LBB90_2: # %else2
10355 ; RV64ZVE32F-NEXT: andi a1, a3, 4
10356 ; RV64ZVE32F-NEXT: bnez a1, .LBB90_11
10357 ; RV64ZVE32F-NEXT: .LBB90_3: # %else4
10358 ; RV64ZVE32F-NEXT: andi a1, a3, 8
10359 ; RV64ZVE32F-NEXT: bnez a1, .LBB90_12
10360 ; RV64ZVE32F-NEXT: .LBB90_4: # %else6
10361 ; RV64ZVE32F-NEXT: andi a1, a3, 16
10362 ; RV64ZVE32F-NEXT: bnez a1, .LBB90_13
10363 ; RV64ZVE32F-NEXT: .LBB90_5: # %else8
10364 ; RV64ZVE32F-NEXT: andi a1, a3, 32
10365 ; RV64ZVE32F-NEXT: bnez a1, .LBB90_14
10366 ; RV64ZVE32F-NEXT: .LBB90_6: # %else10
10367 ; RV64ZVE32F-NEXT: andi a1, a3, 64
10368 ; RV64ZVE32F-NEXT: bnez a1, .LBB90_15
10369 ; RV64ZVE32F-NEXT: .LBB90_7: # %else12
10370 ; RV64ZVE32F-NEXT: andi a1, a3, -128
10371 ; RV64ZVE32F-NEXT: bnez a1, .LBB90_16
10372 ; RV64ZVE32F-NEXT: .LBB90_8: # %else14
10373 ; RV64ZVE32F-NEXT: ret
10374 ; RV64ZVE32F-NEXT: .LBB90_9: # %cond.store
10375 ; RV64ZVE32F-NEXT: ld a1, 0(a1)
10376 ; RV64ZVE32F-NEXT: slli a1, a1, 3
10377 ; RV64ZVE32F-NEXT: add a1, a0, a1
10378 ; RV64ZVE32F-NEXT: fsd fa0, 0(a1)
10379 ; RV64ZVE32F-NEXT: andi a1, a3, 2
10380 ; RV64ZVE32F-NEXT: beqz a1, .LBB90_2
10381 ; RV64ZVE32F-NEXT: .LBB90_10: # %cond.store1
10382 ; RV64ZVE32F-NEXT: slli t1, t1, 3
10383 ; RV64ZVE32F-NEXT: add t1, a0, t1
10384 ; RV64ZVE32F-NEXT: fsd fa1, 0(t1)
10385 ; RV64ZVE32F-NEXT: andi a1, a3, 4
10386 ; RV64ZVE32F-NEXT: beqz a1, .LBB90_3
10387 ; RV64ZVE32F-NEXT: .LBB90_11: # %cond.store3
10388 ; RV64ZVE32F-NEXT: slli t0, t0, 3
10389 ; RV64ZVE32F-NEXT: add t0, a0, t0
10390 ; RV64ZVE32F-NEXT: fsd fa2, 0(t0)
10391 ; RV64ZVE32F-NEXT: andi a1, a3, 8
10392 ; RV64ZVE32F-NEXT: beqz a1, .LBB90_4
10393 ; RV64ZVE32F-NEXT: .LBB90_12: # %cond.store5
10394 ; RV64ZVE32F-NEXT: slli a7, a7, 3
10395 ; RV64ZVE32F-NEXT: add a7, a0, a7
10396 ; RV64ZVE32F-NEXT: fsd fa3, 0(a7)
10397 ; RV64ZVE32F-NEXT: andi a1, a3, 16
10398 ; RV64ZVE32F-NEXT: beqz a1, .LBB90_5
10399 ; RV64ZVE32F-NEXT: .LBB90_13: # %cond.store7
10400 ; RV64ZVE32F-NEXT: slli a6, a6, 3
10401 ; RV64ZVE32F-NEXT: add a6, a0, a6
10402 ; RV64ZVE32F-NEXT: fsd fa4, 0(a6)
10403 ; RV64ZVE32F-NEXT: andi a1, a3, 32
10404 ; RV64ZVE32F-NEXT: beqz a1, .LBB90_6
10405 ; RV64ZVE32F-NEXT: .LBB90_14: # %cond.store9
10406 ; RV64ZVE32F-NEXT: slli a5, a5, 3
10407 ; RV64ZVE32F-NEXT: add a5, a0, a5
10408 ; RV64ZVE32F-NEXT: fsd fa5, 0(a5)
10409 ; RV64ZVE32F-NEXT: andi a1, a3, 64
10410 ; RV64ZVE32F-NEXT: beqz a1, .LBB90_7
10411 ; RV64ZVE32F-NEXT: .LBB90_15: # %cond.store11
10412 ; RV64ZVE32F-NEXT: slli a4, a4, 3
10413 ; RV64ZVE32F-NEXT: add a4, a0, a4
10414 ; RV64ZVE32F-NEXT: fsd fa6, 0(a4)
10415 ; RV64ZVE32F-NEXT: andi a1, a3, -128
10416 ; RV64ZVE32F-NEXT: beqz a1, .LBB90_8
10417 ; RV64ZVE32F-NEXT: .LBB90_16: # %cond.store13
10418 ; RV64ZVE32F-NEXT: slli a2, a2, 3
10419 ; RV64ZVE32F-NEXT: add a0, a0, a2
10420 ; RV64ZVE32F-NEXT: fsd fa7, 0(a0)
10421 ; RV64ZVE32F-NEXT: ret
10422 %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %idxs
10423 call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> %val, <8 x ptr> %ptrs, i32 8, <8 x i1> %m)
10427 declare void @llvm.masked.scatter.v16i8.v16p0(<16 x i8>, <16 x ptr>, i32, <16 x i1>)
10429 define void @mscatter_baseidx_v16i8(<16 x i8> %val, ptr %base, <16 x i8> %idxs, <16 x i1> %m) {
10430 ; RV32-LABEL: mscatter_baseidx_v16i8:
10432 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
10433 ; RV32-NEXT: vsext.vf4 v12, v9
10434 ; RV32-NEXT: vsetvli zero, zero, e8, m1, ta, ma
10435 ; RV32-NEXT: vsoxei32.v v8, (a0), v12, v0.t
10438 ; RV64-LABEL: mscatter_baseidx_v16i8:
10440 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
10441 ; RV64-NEXT: vsext.vf8 v16, v9
10442 ; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, ma
10443 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
10446 ; RV64ZVE32F-LABEL: mscatter_baseidx_v16i8:
10447 ; RV64ZVE32F: # %bb.0:
10448 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
10449 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
10450 ; RV64ZVE32F-NEXT: andi a2, a1, 1
10451 ; RV64ZVE32F-NEXT: beqz a2, .LBB91_2
10452 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
10453 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10454 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
10455 ; RV64ZVE32F-NEXT: add a2, a0, a2
10456 ; RV64ZVE32F-NEXT: vse8.v v8, (a2)
10457 ; RV64ZVE32F-NEXT: .LBB91_2: # %else
10458 ; RV64ZVE32F-NEXT: andi a2, a1, 2
10459 ; RV64ZVE32F-NEXT: beqz a2, .LBB91_4
10460 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
10461 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
10462 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1
10463 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
10464 ; RV64ZVE32F-NEXT: add a2, a0, a2
10465 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10466 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
10467 ; RV64ZVE32F-NEXT: vse8.v v10, (a2)
10468 ; RV64ZVE32F-NEXT: .LBB91_4: # %else2
10469 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
10470 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4
10471 ; RV64ZVE32F-NEXT: andi a2, a1, 4
10472 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
10473 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v9, 2
10474 ; RV64ZVE32F-NEXT: bnez a2, .LBB91_25
10475 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
10476 ; RV64ZVE32F-NEXT: andi a2, a1, 8
10477 ; RV64ZVE32F-NEXT: bnez a2, .LBB91_26
10478 ; RV64ZVE32F-NEXT: .LBB91_6: # %else6
10479 ; RV64ZVE32F-NEXT: andi a2, a1, 16
10480 ; RV64ZVE32F-NEXT: beqz a2, .LBB91_8
10481 ; RV64ZVE32F-NEXT: .LBB91_7: # %cond.store7
10482 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10483 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
10484 ; RV64ZVE32F-NEXT: add a2, a0, a2
10485 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 4
10486 ; RV64ZVE32F-NEXT: vse8.v v11, (a2)
10487 ; RV64ZVE32F-NEXT: .LBB91_8: # %else8
10488 ; RV64ZVE32F-NEXT: andi a2, a1, 32
10489 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, ta, ma
10490 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 8
10491 ; RV64ZVE32F-NEXT: beqz a2, .LBB91_10
10492 ; RV64ZVE32F-NEXT: # %bb.9: # %cond.store9
10493 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
10494 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1
10495 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
10496 ; RV64ZVE32F-NEXT: add a2, a0, a2
10497 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10498 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 5
10499 ; RV64ZVE32F-NEXT: vse8.v v11, (a2)
10500 ; RV64ZVE32F-NEXT: .LBB91_10: # %else10
10501 ; RV64ZVE32F-NEXT: andi a2, a1, 64
10502 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
10503 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
10504 ; RV64ZVE32F-NEXT: bnez a2, .LBB91_27
10505 ; RV64ZVE32F-NEXT: # %bb.11: # %else12
10506 ; RV64ZVE32F-NEXT: andi a2, a1, 128
10507 ; RV64ZVE32F-NEXT: bnez a2, .LBB91_28
10508 ; RV64ZVE32F-NEXT: .LBB91_12: # %else14
10509 ; RV64ZVE32F-NEXT: andi a2, a1, 256
10510 ; RV64ZVE32F-NEXT: bnez a2, .LBB91_29
10511 ; RV64ZVE32F-NEXT: .LBB91_13: # %else16
10512 ; RV64ZVE32F-NEXT: andi a2, a1, 512
10513 ; RV64ZVE32F-NEXT: beqz a2, .LBB91_15
10514 ; RV64ZVE32F-NEXT: .LBB91_14: # %cond.store17
10515 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
10516 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1
10517 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
10518 ; RV64ZVE32F-NEXT: add a2, a0, a2
10519 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10520 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 9
10521 ; RV64ZVE32F-NEXT: vse8.v v10, (a2)
10522 ; RV64ZVE32F-NEXT: .LBB91_15: # %else18
10523 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
10524 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4
10525 ; RV64ZVE32F-NEXT: andi a2, a1, 1024
10526 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
10527 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2
10528 ; RV64ZVE32F-NEXT: bnez a2, .LBB91_30
10529 ; RV64ZVE32F-NEXT: # %bb.16: # %else20
10530 ; RV64ZVE32F-NEXT: slli a2, a1, 52
10531 ; RV64ZVE32F-NEXT: bltz a2, .LBB91_31
10532 ; RV64ZVE32F-NEXT: .LBB91_17: # %else22
10533 ; RV64ZVE32F-NEXT: slli a2, a1, 51
10534 ; RV64ZVE32F-NEXT: bltz a2, .LBB91_32
10535 ; RV64ZVE32F-NEXT: .LBB91_18: # %else24
10536 ; RV64ZVE32F-NEXT: slli a2, a1, 50
10537 ; RV64ZVE32F-NEXT: bgez a2, .LBB91_20
10538 ; RV64ZVE32F-NEXT: .LBB91_19: # %cond.store25
10539 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
10540 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 1
10541 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
10542 ; RV64ZVE32F-NEXT: add a2, a0, a2
10543 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10544 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 13
10545 ; RV64ZVE32F-NEXT: vse8.v v9, (a2)
10546 ; RV64ZVE32F-NEXT: .LBB91_20: # %else26
10547 ; RV64ZVE32F-NEXT: slli a2, a1, 49
10548 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
10549 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 2
10550 ; RV64ZVE32F-NEXT: bgez a2, .LBB91_22
10551 ; RV64ZVE32F-NEXT: # %bb.21: # %cond.store27
10552 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
10553 ; RV64ZVE32F-NEXT: add a2, a0, a2
10554 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10555 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 14
10556 ; RV64ZVE32F-NEXT: vse8.v v10, (a2)
10557 ; RV64ZVE32F-NEXT: .LBB91_22: # %else28
10558 ; RV64ZVE32F-NEXT: lui a2, 1048568
10559 ; RV64ZVE32F-NEXT: and a1, a1, a2
10560 ; RV64ZVE32F-NEXT: beqz a1, .LBB91_24
10561 ; RV64ZVE32F-NEXT: # %bb.23: # %cond.store29
10562 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
10563 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
10564 ; RV64ZVE32F-NEXT: vmv.x.s a1, v9
10565 ; RV64ZVE32F-NEXT: add a0, a0, a1
10566 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10567 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 15
10568 ; RV64ZVE32F-NEXT: vse8.v v8, (a0)
10569 ; RV64ZVE32F-NEXT: .LBB91_24: # %else30
10570 ; RV64ZVE32F-NEXT: ret
10571 ; RV64ZVE32F-NEXT: .LBB91_25: # %cond.store3
10572 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
10573 ; RV64ZVE32F-NEXT: add a2, a0, a2
10574 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10575 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2
10576 ; RV64ZVE32F-NEXT: vse8.v v12, (a2)
10577 ; RV64ZVE32F-NEXT: andi a2, a1, 8
10578 ; RV64ZVE32F-NEXT: beqz a2, .LBB91_6
10579 ; RV64ZVE32F-NEXT: .LBB91_26: # %cond.store5
10580 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
10581 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1
10582 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
10583 ; RV64ZVE32F-NEXT: add a2, a0, a2
10584 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10585 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 3
10586 ; RV64ZVE32F-NEXT: vse8.v v11, (a2)
10587 ; RV64ZVE32F-NEXT: andi a2, a1, 16
10588 ; RV64ZVE32F-NEXT: bnez a2, .LBB91_7
10589 ; RV64ZVE32F-NEXT: j .LBB91_8
10590 ; RV64ZVE32F-NEXT: .LBB91_27: # %cond.store11
10591 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
10592 ; RV64ZVE32F-NEXT: add a2, a0, a2
10593 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10594 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 6
10595 ; RV64ZVE32F-NEXT: vse8.v v11, (a2)
10596 ; RV64ZVE32F-NEXT: andi a2, a1, 128
10597 ; RV64ZVE32F-NEXT: beqz a2, .LBB91_12
10598 ; RV64ZVE32F-NEXT: .LBB91_28: # %cond.store13
10599 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
10600 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
10601 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
10602 ; RV64ZVE32F-NEXT: add a2, a0, a2
10603 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10604 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 7
10605 ; RV64ZVE32F-NEXT: vse8.v v10, (a2)
10606 ; RV64ZVE32F-NEXT: andi a2, a1, 256
10607 ; RV64ZVE32F-NEXT: beqz a2, .LBB91_13
10608 ; RV64ZVE32F-NEXT: .LBB91_29: # %cond.store15
10609 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10610 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
10611 ; RV64ZVE32F-NEXT: add a2, a0, a2
10612 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 8
10613 ; RV64ZVE32F-NEXT: vse8.v v10, (a2)
10614 ; RV64ZVE32F-NEXT: andi a2, a1, 512
10615 ; RV64ZVE32F-NEXT: bnez a2, .LBB91_14
10616 ; RV64ZVE32F-NEXT: j .LBB91_15
10617 ; RV64ZVE32F-NEXT: .LBB91_30: # %cond.store19
10618 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
10619 ; RV64ZVE32F-NEXT: add a2, a0, a2
10620 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10621 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 10
10622 ; RV64ZVE32F-NEXT: vse8.v v11, (a2)
10623 ; RV64ZVE32F-NEXT: slli a2, a1, 52
10624 ; RV64ZVE32F-NEXT: bgez a2, .LBB91_17
10625 ; RV64ZVE32F-NEXT: .LBB91_31: # %cond.store21
10626 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
10627 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
10628 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
10629 ; RV64ZVE32F-NEXT: add a2, a0, a2
10630 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10631 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 11
10632 ; RV64ZVE32F-NEXT: vse8.v v9, (a2)
10633 ; RV64ZVE32F-NEXT: slli a2, a1, 51
10634 ; RV64ZVE32F-NEXT: bgez a2, .LBB91_18
10635 ; RV64ZVE32F-NEXT: .LBB91_32: # %cond.store23
10636 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10637 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
10638 ; RV64ZVE32F-NEXT: add a2, a0, a2
10639 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 12
10640 ; RV64ZVE32F-NEXT: vse8.v v9, (a2)
10641 ; RV64ZVE32F-NEXT: slli a2, a1, 50
10642 ; RV64ZVE32F-NEXT: bltz a2, .LBB91_19
10643 ; RV64ZVE32F-NEXT: j .LBB91_20
10644 %ptrs = getelementptr inbounds i8, ptr %base, <16 x i8> %idxs
10645 call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> %val, <16 x ptr> %ptrs, i32 1, <16 x i1> %m)
10649 declare void @llvm.masked.scatter.v32i8.v32p0(<32 x i8>, <32 x ptr>, i32, <32 x i1>)
10651 define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs, <32 x i1> %m) {
10652 ; RV32-LABEL: mscatter_baseidx_v32i8:
10654 ; RV32-NEXT: li a1, 32
10655 ; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma
10656 ; RV32-NEXT: vsext.vf4 v16, v10
10657 ; RV32-NEXT: vsetvli zero, zero, e8, m2, ta, ma
10658 ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t
10661 ; RV64-LABEL: mscatter_baseidx_v32i8:
10663 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
10664 ; RV64-NEXT: vsext.vf8 v16, v10
10665 ; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, ma
10666 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
10667 ; RV64-NEXT: vsetivli zero, 16, e8, m2, ta, ma
10668 ; RV64-NEXT: vslidedown.vi v8, v8, 16
10669 ; RV64-NEXT: vslidedown.vi v10, v10, 16
10670 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
10671 ; RV64-NEXT: vslidedown.vi v0, v0, 2
10672 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
10673 ; RV64-NEXT: vsext.vf8 v16, v10
10674 ; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, ma
10675 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t
10678 ; RV64ZVE32F-LABEL: mscatter_baseidx_v32i8:
10679 ; RV64ZVE32F: # %bb.0:
10680 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
10681 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
10682 ; RV64ZVE32F-NEXT: andi a2, a1, 1
10683 ; RV64ZVE32F-NEXT: beqz a2, .LBB92_2
10684 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store
10685 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10686 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
10687 ; RV64ZVE32F-NEXT: add a2, a0, a2
10688 ; RV64ZVE32F-NEXT: vse8.v v8, (a2)
10689 ; RV64ZVE32F-NEXT: .LBB92_2: # %else
10690 ; RV64ZVE32F-NEXT: andi a2, a1, 2
10691 ; RV64ZVE32F-NEXT: beqz a2, .LBB92_4
10692 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1
10693 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
10694 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v10, 1
10695 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12
10696 ; RV64ZVE32F-NEXT: add a2, a0, a2
10697 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10698 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 1
10699 ; RV64ZVE32F-NEXT: vse8.v v12, (a2)
10700 ; RV64ZVE32F-NEXT: .LBB92_4: # %else2
10701 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
10702 ; RV64ZVE32F-NEXT: vslidedown.vi v13, v10, 4
10703 ; RV64ZVE32F-NEXT: andi a2, a1, 4
10704 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
10705 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v10, 2
10706 ; RV64ZVE32F-NEXT: bnez a2, .LBB92_49
10707 ; RV64ZVE32F-NEXT: # %bb.5: # %else4
10708 ; RV64ZVE32F-NEXT: andi a2, a1, 8
10709 ; RV64ZVE32F-NEXT: bnez a2, .LBB92_50
10710 ; RV64ZVE32F-NEXT: .LBB92_6: # %else6
10711 ; RV64ZVE32F-NEXT: andi a2, a1, 16
10712 ; RV64ZVE32F-NEXT: beqz a2, .LBB92_8
10713 ; RV64ZVE32F-NEXT: .LBB92_7: # %cond.store7
10714 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10715 ; RV64ZVE32F-NEXT: vmv.x.s a2, v13
10716 ; RV64ZVE32F-NEXT: add a2, a0, a2
10717 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
10718 ; RV64ZVE32F-NEXT: vse8.v v12, (a2)
10719 ; RV64ZVE32F-NEXT: .LBB92_8: # %else8
10720 ; RV64ZVE32F-NEXT: andi a2, a1, 32
10721 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, ta, ma
10722 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v10, 8
10723 ; RV64ZVE32F-NEXT: beqz a2, .LBB92_10
10724 ; RV64ZVE32F-NEXT: # %bb.9: # %cond.store9
10725 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
10726 ; RV64ZVE32F-NEXT: vslidedown.vi v14, v13, 1
10727 ; RV64ZVE32F-NEXT: vmv.x.s a2, v14
10728 ; RV64ZVE32F-NEXT: add a2, a0, a2
10729 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10730 ; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 5
10731 ; RV64ZVE32F-NEXT: vse8.v v14, (a2)
10732 ; RV64ZVE32F-NEXT: .LBB92_10: # %else10
10733 ; RV64ZVE32F-NEXT: andi a2, a1, 64
10734 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
10735 ; RV64ZVE32F-NEXT: vslidedown.vi v13, v13, 2
10736 ; RV64ZVE32F-NEXT: bnez a2, .LBB92_51
10737 ; RV64ZVE32F-NEXT: # %bb.11: # %else12
10738 ; RV64ZVE32F-NEXT: andi a2, a1, 128
10739 ; RV64ZVE32F-NEXT: bnez a2, .LBB92_52
10740 ; RV64ZVE32F-NEXT: .LBB92_12: # %else14
10741 ; RV64ZVE32F-NEXT: andi a2, a1, 256
10742 ; RV64ZVE32F-NEXT: bnez a2, .LBB92_53
10743 ; RV64ZVE32F-NEXT: .LBB92_13: # %else16
10744 ; RV64ZVE32F-NEXT: andi a2, a1, 512
10745 ; RV64ZVE32F-NEXT: beqz a2, .LBB92_15
10746 ; RV64ZVE32F-NEXT: .LBB92_14: # %cond.store17
10747 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
10748 ; RV64ZVE32F-NEXT: vslidedown.vi v13, v12, 1
10749 ; RV64ZVE32F-NEXT: vmv.x.s a2, v13
10750 ; RV64ZVE32F-NEXT: add a2, a0, a2
10751 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10752 ; RV64ZVE32F-NEXT: vslidedown.vi v13, v8, 9
10753 ; RV64ZVE32F-NEXT: vse8.v v13, (a2)
10754 ; RV64ZVE32F-NEXT: .LBB92_15: # %else18
10755 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
10756 ; RV64ZVE32F-NEXT: vslidedown.vi v13, v12, 4
10757 ; RV64ZVE32F-NEXT: andi a2, a1, 1024
10758 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
10759 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 2
10760 ; RV64ZVE32F-NEXT: beqz a2, .LBB92_17
10761 ; RV64ZVE32F-NEXT: # %bb.16: # %cond.store19
10762 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12
10763 ; RV64ZVE32F-NEXT: add a2, a0, a2
10764 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10765 ; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 10
10766 ; RV64ZVE32F-NEXT: vse8.v v14, (a2)
10767 ; RV64ZVE32F-NEXT: .LBB92_17: # %else20
10768 ; RV64ZVE32F-NEXT: slli a2, a1, 52
10769 ; RV64ZVE32F-NEXT: bgez a2, .LBB92_19
10770 ; RV64ZVE32F-NEXT: # %bb.18: # %cond.store21
10771 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
10772 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 1
10773 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12
10774 ; RV64ZVE32F-NEXT: add a2, a0, a2
10775 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10776 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 11
10777 ; RV64ZVE32F-NEXT: vse8.v v12, (a2)
10778 ; RV64ZVE32F-NEXT: .LBB92_19: # %else22
10779 ; RV64ZVE32F-NEXT: slli a2, a1, 51
10780 ; RV64ZVE32F-NEXT: vsetivli zero, 16, e8, m2, ta, ma
10781 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 16
10782 ; RV64ZVE32F-NEXT: bgez a2, .LBB92_21
10783 ; RV64ZVE32F-NEXT: # %bb.20: # %cond.store23
10784 ; RV64ZVE32F-NEXT: vmv.x.s a2, v13
10785 ; RV64ZVE32F-NEXT: add a2, a0, a2
10786 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10787 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 12
10788 ; RV64ZVE32F-NEXT: vse8.v v11, (a2)
10789 ; RV64ZVE32F-NEXT: .LBB92_21: # %else24
10790 ; RV64ZVE32F-NEXT: slli a2, a1, 50
10791 ; RV64ZVE32F-NEXT: bgez a2, .LBB92_23
10792 ; RV64ZVE32F-NEXT: # %bb.22: # %cond.store25
10793 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
10794 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v13, 1
10795 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
10796 ; RV64ZVE32F-NEXT: add a2, a0, a2
10797 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10798 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 13
10799 ; RV64ZVE32F-NEXT: vse8.v v11, (a2)
10800 ; RV64ZVE32F-NEXT: .LBB92_23: # %else26
10801 ; RV64ZVE32F-NEXT: slli a2, a1, 49
10802 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
10803 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v13, 2
10804 ; RV64ZVE32F-NEXT: bltz a2, .LBB92_54
10805 ; RV64ZVE32F-NEXT: # %bb.24: # %else28
10806 ; RV64ZVE32F-NEXT: slli a2, a1, 48
10807 ; RV64ZVE32F-NEXT: bltz a2, .LBB92_55
10808 ; RV64ZVE32F-NEXT: .LBB92_25: # %else30
10809 ; RV64ZVE32F-NEXT: slli a2, a1, 47
10810 ; RV64ZVE32F-NEXT: bltz a2, .LBB92_56
10811 ; RV64ZVE32F-NEXT: .LBB92_26: # %else32
10812 ; RV64ZVE32F-NEXT: slli a2, a1, 46
10813 ; RV64ZVE32F-NEXT: bgez a2, .LBB92_28
10814 ; RV64ZVE32F-NEXT: .LBB92_27: # %cond.store33
10815 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
10816 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1
10817 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
10818 ; RV64ZVE32F-NEXT: add a2, a0, a2
10819 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
10820 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 17
10821 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10822 ; RV64ZVE32F-NEXT: vse8.v v12, (a2)
10823 ; RV64ZVE32F-NEXT: .LBB92_28: # %else34
10824 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
10825 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4
10826 ; RV64ZVE32F-NEXT: slli a2, a1, 45
10827 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
10828 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v10, 2
10829 ; RV64ZVE32F-NEXT: bltz a2, .LBB92_57
10830 ; RV64ZVE32F-NEXT: # %bb.29: # %else36
10831 ; RV64ZVE32F-NEXT: slli a2, a1, 44
10832 ; RV64ZVE32F-NEXT: bltz a2, .LBB92_58
10833 ; RV64ZVE32F-NEXT: .LBB92_30: # %else38
10834 ; RV64ZVE32F-NEXT: slli a2, a1, 43
10835 ; RV64ZVE32F-NEXT: bgez a2, .LBB92_32
10836 ; RV64ZVE32F-NEXT: .LBB92_31: # %cond.store39
10837 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
10838 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
10839 ; RV64ZVE32F-NEXT: add a2, a0, a2
10840 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 20
10841 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10842 ; RV64ZVE32F-NEXT: vse8.v v12, (a2)
10843 ; RV64ZVE32F-NEXT: .LBB92_32: # %else40
10844 ; RV64ZVE32F-NEXT: slli a2, a1, 42
10845 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, ta, ma
10846 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 8
10847 ; RV64ZVE32F-NEXT: bgez a2, .LBB92_34
10848 ; RV64ZVE32F-NEXT: # %bb.33: # %cond.store41
10849 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
10850 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v11, 1
10851 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12
10852 ; RV64ZVE32F-NEXT: add a2, a0, a2
10853 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
10854 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 21
10855 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10856 ; RV64ZVE32F-NEXT: vse8.v v12, (a2)
10857 ; RV64ZVE32F-NEXT: .LBB92_34: # %else42
10858 ; RV64ZVE32F-NEXT: slli a2, a1, 41
10859 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
10860 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 2
10861 ; RV64ZVE32F-NEXT: bltz a2, .LBB92_59
10862 ; RV64ZVE32F-NEXT: # %bb.35: # %else44
10863 ; RV64ZVE32F-NEXT: slli a2, a1, 40
10864 ; RV64ZVE32F-NEXT: bltz a2, .LBB92_60
10865 ; RV64ZVE32F-NEXT: .LBB92_36: # %else46
10866 ; RV64ZVE32F-NEXT: slli a2, a1, 39
10867 ; RV64ZVE32F-NEXT: bltz a2, .LBB92_61
10868 ; RV64ZVE32F-NEXT: .LBB92_37: # %else48
10869 ; RV64ZVE32F-NEXT: slli a2, a1, 38
10870 ; RV64ZVE32F-NEXT: bgez a2, .LBB92_39
10871 ; RV64ZVE32F-NEXT: .LBB92_38: # %cond.store49
10872 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
10873 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1
10874 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
10875 ; RV64ZVE32F-NEXT: add a2, a0, a2
10876 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
10877 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 25
10878 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10879 ; RV64ZVE32F-NEXT: vse8.v v12, (a2)
10880 ; RV64ZVE32F-NEXT: .LBB92_39: # %else50
10881 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
10882 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4
10883 ; RV64ZVE32F-NEXT: slli a2, a1, 37
10884 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
10885 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
10886 ; RV64ZVE32F-NEXT: bltz a2, .LBB92_62
10887 ; RV64ZVE32F-NEXT: # %bb.40: # %else52
10888 ; RV64ZVE32F-NEXT: slli a2, a1, 36
10889 ; RV64ZVE32F-NEXT: bltz a2, .LBB92_63
10890 ; RV64ZVE32F-NEXT: .LBB92_41: # %else54
10891 ; RV64ZVE32F-NEXT: slli a2, a1, 35
10892 ; RV64ZVE32F-NEXT: bltz a2, .LBB92_64
10893 ; RV64ZVE32F-NEXT: .LBB92_42: # %else56
10894 ; RV64ZVE32F-NEXT: slli a2, a1, 34
10895 ; RV64ZVE32F-NEXT: bgez a2, .LBB92_44
10896 ; RV64ZVE32F-NEXT: .LBB92_43: # %cond.store57
10897 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
10898 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 1
10899 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
10900 ; RV64ZVE32F-NEXT: add a2, a0, a2
10901 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
10902 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 29
10903 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10904 ; RV64ZVE32F-NEXT: vse8.v v12, (a2)
10905 ; RV64ZVE32F-NEXT: .LBB92_44: # %else58
10906 ; RV64ZVE32F-NEXT: slli a2, a1, 33
10907 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
10908 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2
10909 ; RV64ZVE32F-NEXT: bgez a2, .LBB92_46
10910 ; RV64ZVE32F-NEXT: # %bb.45: # %cond.store59
10911 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
10912 ; RV64ZVE32F-NEXT: add a2, a0, a2
10913 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
10914 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 30
10915 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10916 ; RV64ZVE32F-NEXT: vse8.v v12, (a2)
10917 ; RV64ZVE32F-NEXT: .LBB92_46: # %else60
10918 ; RV64ZVE32F-NEXT: lui a2, 524288
10919 ; RV64ZVE32F-NEXT: and a1, a1, a2
10920 ; RV64ZVE32F-NEXT: beqz a1, .LBB92_48
10921 ; RV64ZVE32F-NEXT: # %bb.47: # %cond.store61
10922 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
10923 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
10924 ; RV64ZVE32F-NEXT: vmv.x.s a1, v10
10925 ; RV64ZVE32F-NEXT: add a0, a0, a1
10926 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
10927 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 31
10928 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10929 ; RV64ZVE32F-NEXT: vse8.v v8, (a0)
10930 ; RV64ZVE32F-NEXT: .LBB92_48: # %else62
10931 ; RV64ZVE32F-NEXT: ret
10932 ; RV64ZVE32F-NEXT: .LBB92_49: # %cond.store3
10933 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12
10934 ; RV64ZVE32F-NEXT: add a2, a0, a2
10935 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10936 ; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 2
10937 ; RV64ZVE32F-NEXT: vse8.v v14, (a2)
10938 ; RV64ZVE32F-NEXT: andi a2, a1, 8
10939 ; RV64ZVE32F-NEXT: beqz a2, .LBB92_6
10940 ; RV64ZVE32F-NEXT: .LBB92_50: # %cond.store5
10941 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
10942 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 1
10943 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12
10944 ; RV64ZVE32F-NEXT: add a2, a0, a2
10945 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10946 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 3
10947 ; RV64ZVE32F-NEXT: vse8.v v12, (a2)
10948 ; RV64ZVE32F-NEXT: andi a2, a1, 16
10949 ; RV64ZVE32F-NEXT: bnez a2, .LBB92_7
10950 ; RV64ZVE32F-NEXT: j .LBB92_8
10951 ; RV64ZVE32F-NEXT: .LBB92_51: # %cond.store11
10952 ; RV64ZVE32F-NEXT: vmv.x.s a2, v13
10953 ; RV64ZVE32F-NEXT: add a2, a0, a2
10954 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10955 ; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 6
10956 ; RV64ZVE32F-NEXT: vse8.v v14, (a2)
10957 ; RV64ZVE32F-NEXT: andi a2, a1, 128
10958 ; RV64ZVE32F-NEXT: beqz a2, .LBB92_12
10959 ; RV64ZVE32F-NEXT: .LBB92_52: # %cond.store13
10960 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
10961 ; RV64ZVE32F-NEXT: vslidedown.vi v13, v13, 1
10962 ; RV64ZVE32F-NEXT: vmv.x.s a2, v13
10963 ; RV64ZVE32F-NEXT: add a2, a0, a2
10964 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10965 ; RV64ZVE32F-NEXT: vslidedown.vi v13, v8, 7
10966 ; RV64ZVE32F-NEXT: vse8.v v13, (a2)
10967 ; RV64ZVE32F-NEXT: andi a2, a1, 256
10968 ; RV64ZVE32F-NEXT: beqz a2, .LBB92_13
10969 ; RV64ZVE32F-NEXT: .LBB92_53: # %cond.store15
10970 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10971 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12
10972 ; RV64ZVE32F-NEXT: add a2, a0, a2
10973 ; RV64ZVE32F-NEXT: vslidedown.vi v13, v8, 8
10974 ; RV64ZVE32F-NEXT: vse8.v v13, (a2)
10975 ; RV64ZVE32F-NEXT: andi a2, a1, 512
10976 ; RV64ZVE32F-NEXT: bnez a2, .LBB92_14
10977 ; RV64ZVE32F-NEXT: j .LBB92_15
10978 ; RV64ZVE32F-NEXT: .LBB92_54: # %cond.store27
10979 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
10980 ; RV64ZVE32F-NEXT: add a2, a0, a2
10981 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10982 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 14
10983 ; RV64ZVE32F-NEXT: vse8.v v12, (a2)
10984 ; RV64ZVE32F-NEXT: slli a2, a1, 48
10985 ; RV64ZVE32F-NEXT: bgez a2, .LBB92_25
10986 ; RV64ZVE32F-NEXT: .LBB92_55: # %cond.store29
10987 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
10988 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1
10989 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
10990 ; RV64ZVE32F-NEXT: add a2, a0, a2
10991 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
10992 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 15
10993 ; RV64ZVE32F-NEXT: vse8.v v11, (a2)
10994 ; RV64ZVE32F-NEXT: slli a2, a1, 47
10995 ; RV64ZVE32F-NEXT: bgez a2, .LBB92_26
10996 ; RV64ZVE32F-NEXT: .LBB92_56: # %cond.store31
10997 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
10998 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
10999 ; RV64ZVE32F-NEXT: add a2, a0, a2
11000 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 16
11001 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
11002 ; RV64ZVE32F-NEXT: vse8.v v12, (a2)
11003 ; RV64ZVE32F-NEXT: slli a2, a1, 46
11004 ; RV64ZVE32F-NEXT: bltz a2, .LBB92_27
11005 ; RV64ZVE32F-NEXT: j .LBB92_28
11006 ; RV64ZVE32F-NEXT: .LBB92_57: # %cond.store35
11007 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12
11008 ; RV64ZVE32F-NEXT: add a2, a0, a2
11009 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
11010 ; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 18
11011 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
11012 ; RV64ZVE32F-NEXT: vse8.v v14, (a2)
11013 ; RV64ZVE32F-NEXT: slli a2, a1, 44
11014 ; RV64ZVE32F-NEXT: bgez a2, .LBB92_30
11015 ; RV64ZVE32F-NEXT: .LBB92_58: # %cond.store37
11016 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
11017 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 1
11018 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12
11019 ; RV64ZVE32F-NEXT: add a2, a0, a2
11020 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
11021 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 19
11022 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
11023 ; RV64ZVE32F-NEXT: vse8.v v12, (a2)
11024 ; RV64ZVE32F-NEXT: slli a2, a1, 43
11025 ; RV64ZVE32F-NEXT: bltz a2, .LBB92_31
11026 ; RV64ZVE32F-NEXT: j .LBB92_32
11027 ; RV64ZVE32F-NEXT: .LBB92_59: # %cond.store43
11028 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
11029 ; RV64ZVE32F-NEXT: add a2, a0, a2
11030 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
11031 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 22
11032 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
11033 ; RV64ZVE32F-NEXT: vse8.v v12, (a2)
11034 ; RV64ZVE32F-NEXT: slli a2, a1, 40
11035 ; RV64ZVE32F-NEXT: bgez a2, .LBB92_36
11036 ; RV64ZVE32F-NEXT: .LBB92_60: # %cond.store45
11037 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
11038 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1
11039 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
11040 ; RV64ZVE32F-NEXT: add a2, a0, a2
11041 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
11042 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 23
11043 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
11044 ; RV64ZVE32F-NEXT: vse8.v v12, (a2)
11045 ; RV64ZVE32F-NEXT: slli a2, a1, 39
11046 ; RV64ZVE32F-NEXT: bgez a2, .LBB92_37
11047 ; RV64ZVE32F-NEXT: .LBB92_61: # %cond.store47
11048 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
11049 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
11050 ; RV64ZVE32F-NEXT: add a2, a0, a2
11051 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 24
11052 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
11053 ; RV64ZVE32F-NEXT: vse8.v v12, (a2)
11054 ; RV64ZVE32F-NEXT: slli a2, a1, 38
11055 ; RV64ZVE32F-NEXT: bltz a2, .LBB92_38
11056 ; RV64ZVE32F-NEXT: j .LBB92_39
11057 ; RV64ZVE32F-NEXT: .LBB92_62: # %cond.store51
11058 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
11059 ; RV64ZVE32F-NEXT: add a2, a0, a2
11060 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
11061 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 26
11062 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
11063 ; RV64ZVE32F-NEXT: vse8.v v12, (a2)
11064 ; RV64ZVE32F-NEXT: slli a2, a1, 36
11065 ; RV64ZVE32F-NEXT: bgez a2, .LBB92_41
11066 ; RV64ZVE32F-NEXT: .LBB92_63: # %cond.store53
11067 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
11068 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
11069 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
11070 ; RV64ZVE32F-NEXT: add a2, a0, a2
11071 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
11072 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 27
11073 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
11074 ; RV64ZVE32F-NEXT: vse8.v v12, (a2)
11075 ; RV64ZVE32F-NEXT: slli a2, a1, 35
11076 ; RV64ZVE32F-NEXT: bgez a2, .LBB92_42
11077 ; RV64ZVE32F-NEXT: .LBB92_64: # %cond.store55
11078 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
11079 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
11080 ; RV64ZVE32F-NEXT: add a2, a0, a2
11081 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 28
11082 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
11083 ; RV64ZVE32F-NEXT: vse8.v v12, (a2)
11084 ; RV64ZVE32F-NEXT: slli a2, a1, 34
11085 ; RV64ZVE32F-NEXT: bltz a2, .LBB92_43
11086 ; RV64ZVE32F-NEXT: j .LBB92_44
11087 %ptrs = getelementptr inbounds i8, ptr %base, <32 x i8> %idxs
11088 call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> %val, <32 x ptr> %ptrs, i32 1, <32 x i1> %m)
11092 define void @mscatter_unit_stride(<8 x i16> %val, ptr %base) {
11093 ; CHECK-LABEL: mscatter_unit_stride:
11095 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
11096 ; CHECK-NEXT: vse16.v v8, (a0)
11098 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>
11099 call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> splat (i1 true))
11103 define void @mscatter_unit_stride_with_offset(<8 x i16> %val, ptr %base) {
11104 ; CHECK-LABEL: mscatter_unit_stride_with_offset:
11106 ; CHECK-NEXT: addi a0, a0, 10
11107 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
11108 ; CHECK-NEXT: vse16.v v8, (a0)
11110 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> <i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12>
11111 call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> splat (i1 true))
11115 define void @mscatter_shuffle_reverse(<8 x i16> %val, ptr %base) {
11116 ; CHECK-LABEL: mscatter_shuffle_reverse:
11118 ; CHECK-NEXT: addi a0, a0, 14
11119 ; CHECK-NEXT: li a1, -2
11120 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
11121 ; CHECK-NEXT: vsse16.v v8, (a0), a1
11123 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> <i64 7, i64 6, i64 5, i64 4, i64 3, i64 2, i64 1, i64 0>
11124 call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> splat (i1 true))
11128 define void @mscatter_shuffle_rotate(<8 x i16> %val, ptr %base) {
11129 ; RV32-LABEL: mscatter_shuffle_rotate:
11131 ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
11132 ; RV32-NEXT: vslidedown.vi v9, v8, 4
11133 ; RV32-NEXT: vslideup.vi v9, v8, 4
11134 ; RV32-NEXT: vse16.v v9, (a0)
11137 ; RV64-LABEL: mscatter_shuffle_rotate:
11139 ; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
11140 ; RV64-NEXT: vslidedown.vi v9, v8, 4
11141 ; RV64-NEXT: vslideup.vi v9, v8, 4
11142 ; RV64-NEXT: vse16.v v9, (a0)
11145 ; RV64ZVE32F-LABEL: mscatter_shuffle_rotate:
11146 ; RV64ZVE32F: # %bb.0:
11147 ; RV64ZVE32F-NEXT: addi a1, a0, 6
11148 ; RV64ZVE32F-NEXT: addi a2, a0, 4
11149 ; RV64ZVE32F-NEXT: addi a3, a0, 2
11150 ; RV64ZVE32F-NEXT: addi a4, a0, 14
11151 ; RV64ZVE32F-NEXT: addi a5, a0, 12
11152 ; RV64ZVE32F-NEXT: addi a6, a0, 10
11153 ; RV64ZVE32F-NEXT: addi a7, a0, 8
11154 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
11155 ; RV64ZVE32F-NEXT: vse16.v v8, (a7)
11156 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
11157 ; RV64ZVE32F-NEXT: vse16.v v9, (a6)
11158 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
11159 ; RV64ZVE32F-NEXT: vse16.v v9, (a5)
11160 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 3
11161 ; RV64ZVE32F-NEXT: vse16.v v9, (a4)
11162 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
11163 ; RV64ZVE32F-NEXT: vse16.v v9, (a0)
11164 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5
11165 ; RV64ZVE32F-NEXT: vse16.v v9, (a3)
11166 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 6
11167 ; RV64ZVE32F-NEXT: vse16.v v9, (a2)
11168 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
11169 ; RV64ZVE32F-NEXT: vse16.v v8, (a1)
11170 ; RV64ZVE32F-NEXT: ret
11171 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> <i64 4, i64 5, i64 6, i64 7, i64 0, i64 1, i64 2, i64 3>
11172 call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> splat (i1 true))