1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck %s -check-prefixes=CHECK,RV32
3 ; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck %s -check-prefixes=CHECK,RV64
5 define void @vselect_vv_v6i32(ptr %a, ptr %b, ptr %cc, ptr %z) {
6 ; RV32-LABEL: vselect_vv_v6i32:
8 ; RV32-NEXT: lbu a2, 0(a2)
9 ; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma
10 ; RV32-NEXT: vle32.v v8, (a1)
11 ; RV32-NEXT: slli a1, a2, 30
12 ; RV32-NEXT: srli a1, a1, 31
13 ; RV32-NEXT: andi a4, a2, 1
14 ; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
15 ; RV32-NEXT: vmv.v.x v10, a4
16 ; RV32-NEXT: vslide1down.vx v10, v10, a1
17 ; RV32-NEXT: slli a1, a2, 29
18 ; RV32-NEXT: srli a1, a1, 31
19 ; RV32-NEXT: vslide1down.vx v10, v10, a1
20 ; RV32-NEXT: slli a1, a2, 28
21 ; RV32-NEXT: srli a1, a1, 31
22 ; RV32-NEXT: vslide1down.vx v10, v10, a1
23 ; RV32-NEXT: slli a1, a2, 27
24 ; RV32-NEXT: srli a1, a1, 31
25 ; RV32-NEXT: vslide1down.vx v10, v10, a1
26 ; RV32-NEXT: srli a2, a2, 5
27 ; RV32-NEXT: vslide1down.vx v10, v10, a2
28 ; RV32-NEXT: vslidedown.vi v10, v10, 2
29 ; RV32-NEXT: vand.vi v10, v10, 1
30 ; RV32-NEXT: vmsne.vi v0, v10, 0
31 ; RV32-NEXT: vsetivli zero, 6, e32, m2, tu, mu
32 ; RV32-NEXT: vle32.v v8, (a0), v0.t
33 ; RV32-NEXT: vse32.v v8, (a3)
36 ; RV64-LABEL: vselect_vv_v6i32:
38 ; RV64-NEXT: lbu a2, 0(a2)
39 ; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma
40 ; RV64-NEXT: vle32.v v8, (a1)
41 ; RV64-NEXT: slli a1, a2, 62
42 ; RV64-NEXT: srli a1, a1, 63
43 ; RV64-NEXT: andi a4, a2, 1
44 ; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
45 ; RV64-NEXT: vmv.v.x v10, a4
46 ; RV64-NEXT: vslide1down.vx v10, v10, a1
47 ; RV64-NEXT: slli a1, a2, 61
48 ; RV64-NEXT: srli a1, a1, 63
49 ; RV64-NEXT: vslide1down.vx v10, v10, a1
50 ; RV64-NEXT: slli a1, a2, 60
51 ; RV64-NEXT: srli a1, a1, 63
52 ; RV64-NEXT: vslide1down.vx v10, v10, a1
53 ; RV64-NEXT: slli a1, a2, 59
54 ; RV64-NEXT: srli a1, a1, 63
55 ; RV64-NEXT: vslide1down.vx v10, v10, a1
56 ; RV64-NEXT: srli a2, a2, 5
57 ; RV64-NEXT: vslide1down.vx v10, v10, a2
58 ; RV64-NEXT: vslidedown.vi v10, v10, 2
59 ; RV64-NEXT: vand.vi v10, v10, 1
60 ; RV64-NEXT: vmsne.vi v0, v10, 0
61 ; RV64-NEXT: vsetivli zero, 6, e32, m2, tu, mu
62 ; RV64-NEXT: vle32.v v8, (a0), v0.t
63 ; RV64-NEXT: vse32.v v8, (a3)
65 %va = load <6 x i32>, ptr %a
66 %vb = load <6 x i32>, ptr %b
67 %vcc = load <6 x i1>, ptr %cc
68 %vsel = select <6 x i1> %vcc, <6 x i32> %va, <6 x i32> %vb
69 store <6 x i32> %vsel, ptr %z
73 define void @vselect_vx_v6i32(i32 %a, ptr %b, ptr %cc, ptr %z) {
74 ; RV32-LABEL: vselect_vx_v6i32:
76 ; RV32-NEXT: lbu a2, 0(a2)
77 ; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma
78 ; RV32-NEXT: vle32.v v8, (a1)
79 ; RV32-NEXT: slli a1, a2, 30
80 ; RV32-NEXT: srli a1, a1, 31
81 ; RV32-NEXT: andi a4, a2, 1
82 ; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
83 ; RV32-NEXT: vmv.v.x v10, a4
84 ; RV32-NEXT: vslide1down.vx v10, v10, a1
85 ; RV32-NEXT: slli a1, a2, 29
86 ; RV32-NEXT: srli a1, a1, 31
87 ; RV32-NEXT: vslide1down.vx v10, v10, a1
88 ; RV32-NEXT: slli a1, a2, 28
89 ; RV32-NEXT: srli a1, a1, 31
90 ; RV32-NEXT: vslide1down.vx v10, v10, a1
91 ; RV32-NEXT: slli a1, a2, 27
92 ; RV32-NEXT: srli a1, a1, 31
93 ; RV32-NEXT: vslide1down.vx v10, v10, a1
94 ; RV32-NEXT: srli a2, a2, 5
95 ; RV32-NEXT: vslide1down.vx v10, v10, a2
96 ; RV32-NEXT: vslidedown.vi v10, v10, 2
97 ; RV32-NEXT: vand.vi v10, v10, 1
98 ; RV32-NEXT: vmsne.vi v0, v10, 0
99 ; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
100 ; RV32-NEXT: vmerge.vxm v8, v8, a0, v0
101 ; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma
102 ; RV32-NEXT: vse32.v v8, (a3)
105 ; RV64-LABEL: vselect_vx_v6i32:
107 ; RV64-NEXT: lbu a2, 0(a2)
108 ; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma
109 ; RV64-NEXT: vle32.v v8, (a1)
110 ; RV64-NEXT: slli a1, a2, 62
111 ; RV64-NEXT: srli a1, a1, 63
112 ; RV64-NEXT: andi a4, a2, 1
113 ; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
114 ; RV64-NEXT: vmv.v.x v10, a4
115 ; RV64-NEXT: vslide1down.vx v10, v10, a1
116 ; RV64-NEXT: slli a1, a2, 61
117 ; RV64-NEXT: srli a1, a1, 63
118 ; RV64-NEXT: vslide1down.vx v10, v10, a1
119 ; RV64-NEXT: slli a1, a2, 60
120 ; RV64-NEXT: srli a1, a1, 63
121 ; RV64-NEXT: vslide1down.vx v10, v10, a1
122 ; RV64-NEXT: slli a1, a2, 59
123 ; RV64-NEXT: srli a1, a1, 63
124 ; RV64-NEXT: vslide1down.vx v10, v10, a1
125 ; RV64-NEXT: srli a2, a2, 5
126 ; RV64-NEXT: vslide1down.vx v10, v10, a2
127 ; RV64-NEXT: vslidedown.vi v10, v10, 2
128 ; RV64-NEXT: vand.vi v10, v10, 1
129 ; RV64-NEXT: vmsne.vi v0, v10, 0
130 ; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
131 ; RV64-NEXT: vmerge.vxm v8, v8, a0, v0
132 ; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma
133 ; RV64-NEXT: vse32.v v8, (a3)
135 %vb = load <6 x i32>, ptr %b
136 %ahead = insertelement <6 x i32> poison, i32 %a, i32 0
137 %va = shufflevector <6 x i32> %ahead, <6 x i32> poison, <6 x i32> zeroinitializer
138 %vcc = load <6 x i1>, ptr %cc
139 %vsel = select <6 x i1> %vcc, <6 x i32> %va, <6 x i32> %vb
140 store <6 x i32> %vsel, ptr %z
144 define void @vselect_vi_v6i32(ptr %b, ptr %cc, ptr %z) {
145 ; RV32-LABEL: vselect_vi_v6i32:
147 ; RV32-NEXT: lbu a1, 0(a1)
148 ; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma
149 ; RV32-NEXT: vle32.v v8, (a0)
150 ; RV32-NEXT: slli a0, a1, 30
151 ; RV32-NEXT: srli a0, a0, 31
152 ; RV32-NEXT: andi a3, a1, 1
153 ; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
154 ; RV32-NEXT: vmv.v.x v10, a3
155 ; RV32-NEXT: vslide1down.vx v10, v10, a0
156 ; RV32-NEXT: slli a0, a1, 29
157 ; RV32-NEXT: srli a0, a0, 31
158 ; RV32-NEXT: vslide1down.vx v10, v10, a0
159 ; RV32-NEXT: slli a0, a1, 28
160 ; RV32-NEXT: srli a0, a0, 31
161 ; RV32-NEXT: vslide1down.vx v10, v10, a0
162 ; RV32-NEXT: slli a0, a1, 27
163 ; RV32-NEXT: srli a0, a0, 31
164 ; RV32-NEXT: vslide1down.vx v10, v10, a0
165 ; RV32-NEXT: srli a1, a1, 5
166 ; RV32-NEXT: vslide1down.vx v10, v10, a1
167 ; RV32-NEXT: vslidedown.vi v10, v10, 2
168 ; RV32-NEXT: vand.vi v10, v10, 1
169 ; RV32-NEXT: vmsne.vi v0, v10, 0
170 ; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
171 ; RV32-NEXT: vmerge.vim v8, v8, -1, v0
172 ; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma
173 ; RV32-NEXT: vse32.v v8, (a2)
176 ; RV64-LABEL: vselect_vi_v6i32:
178 ; RV64-NEXT: lbu a1, 0(a1)
179 ; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma
180 ; RV64-NEXT: vle32.v v8, (a0)
181 ; RV64-NEXT: slli a0, a1, 62
182 ; RV64-NEXT: srli a0, a0, 63
183 ; RV64-NEXT: andi a3, a1, 1
184 ; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
185 ; RV64-NEXT: vmv.v.x v10, a3
186 ; RV64-NEXT: vslide1down.vx v10, v10, a0
187 ; RV64-NEXT: slli a0, a1, 61
188 ; RV64-NEXT: srli a0, a0, 63
189 ; RV64-NEXT: vslide1down.vx v10, v10, a0
190 ; RV64-NEXT: slli a0, a1, 60
191 ; RV64-NEXT: srli a0, a0, 63
192 ; RV64-NEXT: vslide1down.vx v10, v10, a0
193 ; RV64-NEXT: slli a0, a1, 59
194 ; RV64-NEXT: srli a0, a0, 63
195 ; RV64-NEXT: vslide1down.vx v10, v10, a0
196 ; RV64-NEXT: srli a1, a1, 5
197 ; RV64-NEXT: vslide1down.vx v10, v10, a1
198 ; RV64-NEXT: vslidedown.vi v10, v10, 2
199 ; RV64-NEXT: vand.vi v10, v10, 1
200 ; RV64-NEXT: vmsne.vi v0, v10, 0
201 ; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
202 ; RV64-NEXT: vmerge.vim v8, v8, -1, v0
203 ; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma
204 ; RV64-NEXT: vse32.v v8, (a2)
206 %vb = load <6 x i32>, ptr %b
207 %vcc = load <6 x i1>, ptr %cc
208 %vsel = select <6 x i1> %vcc, <6 x i32> splat (i32 -1), <6 x i32> %vb
209 store <6 x i32> %vsel, ptr %z
214 define void @vselect_vv_v6f32(ptr %a, ptr %b, ptr %cc, ptr %z) {
215 ; RV32-LABEL: vselect_vv_v6f32:
217 ; RV32-NEXT: lbu a2, 0(a2)
218 ; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma
219 ; RV32-NEXT: vle32.v v8, (a1)
220 ; RV32-NEXT: slli a1, a2, 30
221 ; RV32-NEXT: srli a1, a1, 31
222 ; RV32-NEXT: andi a4, a2, 1
223 ; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
224 ; RV32-NEXT: vmv.v.x v10, a4
225 ; RV32-NEXT: vslide1down.vx v10, v10, a1
226 ; RV32-NEXT: slli a1, a2, 29
227 ; RV32-NEXT: srli a1, a1, 31
228 ; RV32-NEXT: vslide1down.vx v10, v10, a1
229 ; RV32-NEXT: slli a1, a2, 28
230 ; RV32-NEXT: srli a1, a1, 31
231 ; RV32-NEXT: vslide1down.vx v10, v10, a1
232 ; RV32-NEXT: slli a1, a2, 27
233 ; RV32-NEXT: srli a1, a1, 31
234 ; RV32-NEXT: vslide1down.vx v10, v10, a1
235 ; RV32-NEXT: srli a2, a2, 5
236 ; RV32-NEXT: vslide1down.vx v10, v10, a2
237 ; RV32-NEXT: vslidedown.vi v10, v10, 2
238 ; RV32-NEXT: vand.vi v10, v10, 1
239 ; RV32-NEXT: vmsne.vi v0, v10, 0
240 ; RV32-NEXT: vsetivli zero, 6, e32, m2, tu, mu
241 ; RV32-NEXT: vle32.v v8, (a0), v0.t
242 ; RV32-NEXT: vse32.v v8, (a3)
245 ; RV64-LABEL: vselect_vv_v6f32:
247 ; RV64-NEXT: lbu a2, 0(a2)
248 ; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma
249 ; RV64-NEXT: vle32.v v8, (a1)
250 ; RV64-NEXT: slli a1, a2, 62
251 ; RV64-NEXT: srli a1, a1, 63
252 ; RV64-NEXT: andi a4, a2, 1
253 ; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
254 ; RV64-NEXT: vmv.v.x v10, a4
255 ; RV64-NEXT: vslide1down.vx v10, v10, a1
256 ; RV64-NEXT: slli a1, a2, 61
257 ; RV64-NEXT: srli a1, a1, 63
258 ; RV64-NEXT: vslide1down.vx v10, v10, a1
259 ; RV64-NEXT: slli a1, a2, 60
260 ; RV64-NEXT: srli a1, a1, 63
261 ; RV64-NEXT: vslide1down.vx v10, v10, a1
262 ; RV64-NEXT: slli a1, a2, 59
263 ; RV64-NEXT: srli a1, a1, 63
264 ; RV64-NEXT: vslide1down.vx v10, v10, a1
265 ; RV64-NEXT: srli a2, a2, 5
266 ; RV64-NEXT: vslide1down.vx v10, v10, a2
267 ; RV64-NEXT: vslidedown.vi v10, v10, 2
268 ; RV64-NEXT: vand.vi v10, v10, 1
269 ; RV64-NEXT: vmsne.vi v0, v10, 0
270 ; RV64-NEXT: vsetivli zero, 6, e32, m2, tu, mu
271 ; RV64-NEXT: vle32.v v8, (a0), v0.t
272 ; RV64-NEXT: vse32.v v8, (a3)
274 %va = load <6 x float>, ptr %a
275 %vb = load <6 x float>, ptr %b
276 %vcc = load <6 x i1>, ptr %cc
277 %vsel = select <6 x i1> %vcc, <6 x float> %va, <6 x float> %vb
278 store <6 x float> %vsel, ptr %z
282 define void @vselect_vx_v6f32(float %a, ptr %b, ptr %cc, ptr %z) {
283 ; RV32-LABEL: vselect_vx_v6f32:
285 ; RV32-NEXT: lbu a1, 0(a1)
286 ; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma
287 ; RV32-NEXT: vle32.v v8, (a0)
288 ; RV32-NEXT: slli a0, a1, 30
289 ; RV32-NEXT: srli a0, a0, 31
290 ; RV32-NEXT: andi a3, a1, 1
291 ; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
292 ; RV32-NEXT: vmv.v.x v10, a3
293 ; RV32-NEXT: vslide1down.vx v10, v10, a0
294 ; RV32-NEXT: slli a0, a1, 29
295 ; RV32-NEXT: srli a0, a0, 31
296 ; RV32-NEXT: vslide1down.vx v10, v10, a0
297 ; RV32-NEXT: slli a0, a1, 28
298 ; RV32-NEXT: srli a0, a0, 31
299 ; RV32-NEXT: vslide1down.vx v10, v10, a0
300 ; RV32-NEXT: slli a0, a1, 27
301 ; RV32-NEXT: srli a0, a0, 31
302 ; RV32-NEXT: vslide1down.vx v10, v10, a0
303 ; RV32-NEXT: srli a1, a1, 5
304 ; RV32-NEXT: vslide1down.vx v10, v10, a1
305 ; RV32-NEXT: vslidedown.vi v10, v10, 2
306 ; RV32-NEXT: vand.vi v10, v10, 1
307 ; RV32-NEXT: vmsne.vi v0, v10, 0
308 ; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
309 ; RV32-NEXT: vfmerge.vfm v8, v8, fa0, v0
310 ; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma
311 ; RV32-NEXT: vse32.v v8, (a2)
314 ; RV64-LABEL: vselect_vx_v6f32:
316 ; RV64-NEXT: lbu a1, 0(a1)
317 ; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma
318 ; RV64-NEXT: vle32.v v8, (a0)
319 ; RV64-NEXT: slli a0, a1, 62
320 ; RV64-NEXT: srli a0, a0, 63
321 ; RV64-NEXT: andi a3, a1, 1
322 ; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
323 ; RV64-NEXT: vmv.v.x v10, a3
324 ; RV64-NEXT: vslide1down.vx v10, v10, a0
325 ; RV64-NEXT: slli a0, a1, 61
326 ; RV64-NEXT: srli a0, a0, 63
327 ; RV64-NEXT: vslide1down.vx v10, v10, a0
328 ; RV64-NEXT: slli a0, a1, 60
329 ; RV64-NEXT: srli a0, a0, 63
330 ; RV64-NEXT: vslide1down.vx v10, v10, a0
331 ; RV64-NEXT: slli a0, a1, 59
332 ; RV64-NEXT: srli a0, a0, 63
333 ; RV64-NEXT: vslide1down.vx v10, v10, a0
334 ; RV64-NEXT: srli a1, a1, 5
335 ; RV64-NEXT: vslide1down.vx v10, v10, a1
336 ; RV64-NEXT: vslidedown.vi v10, v10, 2
337 ; RV64-NEXT: vand.vi v10, v10, 1
338 ; RV64-NEXT: vmsne.vi v0, v10, 0
339 ; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
340 ; RV64-NEXT: vfmerge.vfm v8, v8, fa0, v0
341 ; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma
342 ; RV64-NEXT: vse32.v v8, (a2)
344 %vb = load <6 x float>, ptr %b
345 %ahead = insertelement <6 x float> poison, float %a, i32 0
346 %va = shufflevector <6 x float> %ahead, <6 x float> poison, <6 x i32> zeroinitializer
347 %vcc = load <6 x i1>, ptr %cc
348 %vsel = select <6 x i1> %vcc, <6 x float> %va, <6 x float> %vb
349 store <6 x float> %vsel, ptr %z
353 define void @vselect_vfpzero_v6f32(ptr %b, ptr %cc, ptr %z) {
354 ; RV32-LABEL: vselect_vfpzero_v6f32:
356 ; RV32-NEXT: lbu a1, 0(a1)
357 ; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma
358 ; RV32-NEXT: vle32.v v8, (a0)
359 ; RV32-NEXT: slli a0, a1, 30
360 ; RV32-NEXT: srli a0, a0, 31
361 ; RV32-NEXT: andi a3, a1, 1
362 ; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
363 ; RV32-NEXT: vmv.v.x v10, a3
364 ; RV32-NEXT: vslide1down.vx v10, v10, a0
365 ; RV32-NEXT: slli a0, a1, 29
366 ; RV32-NEXT: srli a0, a0, 31
367 ; RV32-NEXT: vslide1down.vx v10, v10, a0
368 ; RV32-NEXT: slli a0, a1, 28
369 ; RV32-NEXT: srli a0, a0, 31
370 ; RV32-NEXT: vslide1down.vx v10, v10, a0
371 ; RV32-NEXT: slli a0, a1, 27
372 ; RV32-NEXT: srli a0, a0, 31
373 ; RV32-NEXT: vslide1down.vx v10, v10, a0
374 ; RV32-NEXT: srli a1, a1, 5
375 ; RV32-NEXT: vslide1down.vx v10, v10, a1
376 ; RV32-NEXT: vslidedown.vi v10, v10, 2
377 ; RV32-NEXT: vand.vi v10, v10, 1
378 ; RV32-NEXT: vmsne.vi v0, v10, 0
379 ; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
380 ; RV32-NEXT: vmerge.vim v8, v8, 0, v0
381 ; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma
382 ; RV32-NEXT: vse32.v v8, (a2)
385 ; RV64-LABEL: vselect_vfpzero_v6f32:
387 ; RV64-NEXT: lbu a1, 0(a1)
388 ; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma
389 ; RV64-NEXT: vle32.v v8, (a0)
390 ; RV64-NEXT: slli a0, a1, 62
391 ; RV64-NEXT: srli a0, a0, 63
392 ; RV64-NEXT: andi a3, a1, 1
393 ; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
394 ; RV64-NEXT: vmv.v.x v10, a3
395 ; RV64-NEXT: vslide1down.vx v10, v10, a0
396 ; RV64-NEXT: slli a0, a1, 61
397 ; RV64-NEXT: srli a0, a0, 63
398 ; RV64-NEXT: vslide1down.vx v10, v10, a0
399 ; RV64-NEXT: slli a0, a1, 60
400 ; RV64-NEXT: srli a0, a0, 63
401 ; RV64-NEXT: vslide1down.vx v10, v10, a0
402 ; RV64-NEXT: slli a0, a1, 59
403 ; RV64-NEXT: srli a0, a0, 63
404 ; RV64-NEXT: vslide1down.vx v10, v10, a0
405 ; RV64-NEXT: srli a1, a1, 5
406 ; RV64-NEXT: vslide1down.vx v10, v10, a1
407 ; RV64-NEXT: vslidedown.vi v10, v10, 2
408 ; RV64-NEXT: vand.vi v10, v10, 1
409 ; RV64-NEXT: vmsne.vi v0, v10, 0
410 ; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
411 ; RV64-NEXT: vmerge.vim v8, v8, 0, v0
412 ; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma
413 ; RV64-NEXT: vse32.v v8, (a2)
415 %vb = load <6 x float>, ptr %b
416 %vcc = load <6 x i1>, ptr %cc
417 %vsel = select <6 x i1> %vcc, <6 x float> splat (float 0.0), <6 x float> %vb
418 store <6 x float> %vsel, ptr %z
422 define void @vselect_vv_v8i32(ptr %a, ptr %b, ptr %cc, ptr %z) {
423 ; CHECK-LABEL: vselect_vv_v8i32:
425 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu
426 ; CHECK-NEXT: vlm.v v0, (a2)
427 ; CHECK-NEXT: vle32.v v8, (a1)
428 ; CHECK-NEXT: vle32.v v8, (a0), v0.t
429 ; CHECK-NEXT: vse32.v v8, (a3)
431 %va = load <8 x i32>, ptr %a
432 %vb = load <8 x i32>, ptr %b
433 %vcc = load <8 x i1>, ptr %cc
434 %vsel = select <8 x i1> %vcc, <8 x i32> %va, <8 x i32> %vb
435 store <8 x i32> %vsel, ptr %z
439 define void @vselect_vx_v8i32(i32 %a, ptr %b, ptr %cc, ptr %z) {
440 ; CHECK-LABEL: vselect_vx_v8i32:
442 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
443 ; CHECK-NEXT: vlm.v v0, (a2)
444 ; CHECK-NEXT: vle32.v v8, (a1)
445 ; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0
446 ; CHECK-NEXT: vse32.v v8, (a3)
448 %vb = load <8 x i32>, ptr %b
449 %ahead = insertelement <8 x i32> poison, i32 %a, i32 0
450 %va = shufflevector <8 x i32> %ahead, <8 x i32> poison, <8 x i32> zeroinitializer
451 %vcc = load <8 x i1>, ptr %cc
452 %vsel = select <8 x i1> %vcc, <8 x i32> %va, <8 x i32> %vb
453 store <8 x i32> %vsel, ptr %z
457 define void @vselect_vi_v8i32(ptr %b, ptr %cc, ptr %z) {
458 ; CHECK-LABEL: vselect_vi_v8i32:
460 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
461 ; CHECK-NEXT: vlm.v v0, (a1)
462 ; CHECK-NEXT: vle32.v v8, (a0)
463 ; CHECK-NEXT: vmerge.vim v8, v8, -1, v0
464 ; CHECK-NEXT: vse32.v v8, (a2)
466 %vb = load <8 x i32>, ptr %b
467 %vcc = load <8 x i1>, ptr %cc
468 %vsel = select <8 x i1> %vcc, <8 x i32> splat (i32 -1), <8 x i32> %vb
469 store <8 x i32> %vsel, ptr %z
473 define void @vselect_vv_v8f32(ptr %a, ptr %b, ptr %cc, ptr %z) {
474 ; CHECK-LABEL: vselect_vv_v8f32:
476 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu
477 ; CHECK-NEXT: vlm.v v0, (a2)
478 ; CHECK-NEXT: vle32.v v8, (a1)
479 ; CHECK-NEXT: vle32.v v8, (a0), v0.t
480 ; CHECK-NEXT: vse32.v v8, (a3)
482 %va = load <8 x float>, ptr %a
483 %vb = load <8 x float>, ptr %b
484 %vcc = load <8 x i1>, ptr %cc
485 %vsel = select <8 x i1> %vcc, <8 x float> %va, <8 x float> %vb
486 store <8 x float> %vsel, ptr %z
490 define void @vselect_vx_v8f32(float %a, ptr %b, ptr %cc, ptr %z) {
491 ; CHECK-LABEL: vselect_vx_v8f32:
493 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
494 ; CHECK-NEXT: vlm.v v0, (a1)
495 ; CHECK-NEXT: vle32.v v8, (a0)
496 ; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0
497 ; CHECK-NEXT: vse32.v v8, (a2)
499 %vb = load <8 x float>, ptr %b
500 %ahead = insertelement <8 x float> poison, float %a, i32 0
501 %va = shufflevector <8 x float> %ahead, <8 x float> poison, <8 x i32> zeroinitializer
502 %vcc = load <8 x i1>, ptr %cc
503 %vsel = select <8 x i1> %vcc, <8 x float> %va, <8 x float> %vb
504 store <8 x float> %vsel, ptr %z
508 define void @vselect_vfpzero_v8f32(ptr %b, ptr %cc, ptr %z) {
509 ; CHECK-LABEL: vselect_vfpzero_v8f32:
511 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
512 ; CHECK-NEXT: vlm.v v0, (a1)
513 ; CHECK-NEXT: vle32.v v8, (a0)
514 ; CHECK-NEXT: vmerge.vim v8, v8, 0, v0
515 ; CHECK-NEXT: vse32.v v8, (a2)
517 %vb = load <8 x float>, ptr %b
518 %vcc = load <8 x i1>, ptr %cc
519 %vsel = select <8 x i1> %vcc, <8 x float> splat (float 0.0), <8 x float> %vb
520 store <8 x float> %vsel, ptr %z
524 define void @vselect_vv_v16i16(ptr %a, ptr %b, ptr %cc, ptr %z) {
525 ; CHECK-LABEL: vselect_vv_v16i16:
527 ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu
528 ; CHECK-NEXT: vlm.v v0, (a2)
529 ; CHECK-NEXT: vle16.v v8, (a1)
530 ; CHECK-NEXT: vle16.v v8, (a0), v0.t
531 ; CHECK-NEXT: vse16.v v8, (a3)
533 %va = load <16 x i16>, ptr %a
534 %vb = load <16 x i16>, ptr %b
535 %vcc = load <16 x i1>, ptr %cc
536 %vsel = select <16 x i1> %vcc, <16 x i16> %va, <16 x i16> %vb
537 store <16 x i16> %vsel, ptr %z
541 define void @vselect_vx_v16i16(i16 signext %a, ptr %b, ptr %cc, ptr %z) {
542 ; CHECK-LABEL: vselect_vx_v16i16:
544 ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
545 ; CHECK-NEXT: vlm.v v0, (a2)
546 ; CHECK-NEXT: vle16.v v8, (a1)
547 ; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0
548 ; CHECK-NEXT: vse16.v v8, (a3)
550 %vb = load <16 x i16>, ptr %b
551 %ahead = insertelement <16 x i16> poison, i16 %a, i32 0
552 %va = shufflevector <16 x i16> %ahead, <16 x i16> poison, <16 x i32> zeroinitializer
553 %vcc = load <16 x i1>, ptr %cc
554 %vsel = select <16 x i1> %vcc, <16 x i16> %va, <16 x i16> %vb
555 store <16 x i16> %vsel, ptr %z
559 define void @vselect_vi_v16i16(ptr %b, ptr %cc, ptr %z) {
560 ; CHECK-LABEL: vselect_vi_v16i16:
562 ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
563 ; CHECK-NEXT: vlm.v v0, (a1)
564 ; CHECK-NEXT: vle16.v v8, (a0)
565 ; CHECK-NEXT: vmerge.vim v8, v8, 4, v0
566 ; CHECK-NEXT: vse16.v v8, (a2)
568 %vb = load <16 x i16>, ptr %b
569 %vcc = load <16 x i1>, ptr %cc
570 %vsel = select <16 x i1> %vcc, <16 x i16> splat (i16 4), <16 x i16> %vb
571 store <16 x i16> %vsel, ptr %z
575 define void @vselect_vv_v32f16(ptr %a, ptr %b, ptr %cc, ptr %z) {
576 ; CHECK-LABEL: vselect_vv_v32f16:
578 ; CHECK-NEXT: li a4, 32
579 ; CHECK-NEXT: vsetvli zero, a4, e16, m4, ta, mu
580 ; CHECK-NEXT: vlm.v v0, (a2)
581 ; CHECK-NEXT: vle16.v v8, (a1)
582 ; CHECK-NEXT: vle16.v v8, (a0), v0.t
583 ; CHECK-NEXT: vse16.v v8, (a3)
585 %va = load <32 x half>, ptr %a
586 %vb = load <32 x half>, ptr %b
587 %vcc = load <32 x i1>, ptr %cc
588 %vsel = select <32 x i1> %vcc, <32 x half> %va, <32 x half> %vb
589 store <32 x half> %vsel, ptr %z
593 define void @vselect_vx_v32f16(half %a, ptr %b, ptr %cc, ptr %z) {
594 ; CHECK-LABEL: vselect_vx_v32f16:
596 ; CHECK-NEXT: li a3, 32
597 ; CHECK-NEXT: vsetvli zero, a3, e16, m4, ta, ma
598 ; CHECK-NEXT: vlm.v v0, (a1)
599 ; CHECK-NEXT: vle16.v v8, (a0)
600 ; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0
601 ; CHECK-NEXT: vse16.v v8, (a2)
603 %vb = load <32 x half>, ptr %b
604 %ahead = insertelement <32 x half> poison, half %a, i32 0
605 %va = shufflevector <32 x half> %ahead, <32 x half> poison, <32 x i32> zeroinitializer
606 %vcc = load <32 x i1>, ptr %cc
607 %vsel = select <32 x i1> %vcc, <32 x half> %va, <32 x half> %vb
608 store <32 x half> %vsel, ptr %z
612 define void @vselect_vfpzero_v32f16(ptr %b, ptr %cc, ptr %z) {
613 ; CHECK-LABEL: vselect_vfpzero_v32f16:
615 ; CHECK-NEXT: li a3, 32
616 ; CHECK-NEXT: vsetvli zero, a3, e16, m4, ta, ma
617 ; CHECK-NEXT: vlm.v v0, (a1)
618 ; CHECK-NEXT: vle16.v v8, (a0)
619 ; CHECK-NEXT: vmerge.vim v8, v8, 0, v0
620 ; CHECK-NEXT: vse16.v v8, (a2)
622 %vb = load <32 x half>, ptr %b
623 %vcc = load <32 x i1>, ptr %cc
624 %vsel = select <32 x i1> %vcc, <32 x half> splat (half 0.0), <32 x half> %vb
625 store <32 x half> %vsel, ptr %z
629 define <2 x i1> @vselect_v2i1(<2 x i1> %a, <2 x i1> %b, <2 x i1> %cc) {
630 ; CHECK-LABEL: vselect_v2i1:
632 ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
633 ; CHECK-NEXT: vmandn.mm v8, v8, v9
634 ; CHECK-NEXT: vmand.mm v9, v0, v9
635 ; CHECK-NEXT: vmor.mm v0, v9, v8
637 %v = select <2 x i1> %cc, <2 x i1> %a, <2 x i1> %b
641 define <4 x i1> @vselect_v4i1(<4 x i1> %a, <4 x i1> %b, <4 x i1> %cc) {
642 ; CHECK-LABEL: vselect_v4i1:
644 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
645 ; CHECK-NEXT: vmandn.mm v8, v8, v9
646 ; CHECK-NEXT: vmand.mm v9, v0, v9
647 ; CHECK-NEXT: vmor.mm v0, v9, v8
649 %v = select <4 x i1> %cc, <4 x i1> %a, <4 x i1> %b
653 define <8 x i1> @vselect_v8i1(<8 x i1> %a, <8 x i1> %b, <8 x i1> %cc) {
654 ; CHECK-LABEL: vselect_v8i1:
656 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
657 ; CHECK-NEXT: vmandn.mm v8, v8, v9
658 ; CHECK-NEXT: vmand.mm v9, v0, v9
659 ; CHECK-NEXT: vmor.mm v0, v9, v8
661 %v = select <8 x i1> %cc, <8 x i1> %a, <8 x i1> %b
665 define <16 x i1> @vselect_v16i1(<16 x i1> %a, <16 x i1> %b, <16 x i1> %cc) {
666 ; CHECK-LABEL: vselect_v16i1:
668 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
669 ; CHECK-NEXT: vmandn.mm v8, v8, v9
670 ; CHECK-NEXT: vmand.mm v9, v0, v9
671 ; CHECK-NEXT: vmor.mm v0, v9, v8
673 %v = select <16 x i1> %cc, <16 x i1> %a, <16 x i1> %b
677 define <32 x i1> @vselect_v32i1(<32 x i1> %a, <32 x i1> %b, <32 x i1> %cc) {
678 ; CHECK-LABEL: vselect_v32i1:
680 ; CHECK-NEXT: li a0, 32
681 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma
682 ; CHECK-NEXT: vmandn.mm v8, v8, v9
683 ; CHECK-NEXT: vmand.mm v9, v0, v9
684 ; CHECK-NEXT: vmor.mm v0, v9, v8
686 %v = select <32 x i1> %cc, <32 x i1> %a, <32 x i1> %b
690 define <64 x i1> @vselect_v64i1(<64 x i1> %a, <64 x i1> %b, <64 x i1> %cc) {
691 ; CHECK-LABEL: vselect_v64i1:
693 ; CHECK-NEXT: li a0, 64
694 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma
695 ; CHECK-NEXT: vmandn.mm v8, v8, v9
696 ; CHECK-NEXT: vmand.mm v9, v0, v9
697 ; CHECK-NEXT: vmor.mm v0, v9, v8
699 %v = select <64 x i1> %cc, <64 x i1> %a, <64 x i1> %b