1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32NOM
3 ; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfh,+f,+d,+m -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32M
4 ; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64NOM
5 ; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfh,+f,+d,+m -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64M
7 define i8 @extractelt_v16i8(ptr %x) nounwind {
8 ; CHECK-LABEL: extractelt_v16i8:
10 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
11 ; CHECK-NEXT: vle8.v v8, (a0)
12 ; CHECK-NEXT: vslidedown.vi v8, v8, 7
13 ; CHECK-NEXT: vmv.x.s a0, v8
15 %a = load <16 x i8>, ptr %x
16 %b = extractelement <16 x i8> %a, i32 7
20 define i16 @extractelt_v8i16(ptr %x) nounwind {
21 ; CHECK-LABEL: extractelt_v8i16:
23 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
24 ; CHECK-NEXT: vle16.v v8, (a0)
25 ; CHECK-NEXT: vslidedown.vi v8, v8, 7
26 ; CHECK-NEXT: vmv.x.s a0, v8
28 %a = load <8 x i16>, ptr %x
29 %b = extractelement <8 x i16> %a, i32 7
33 define i32 @extractelt_v4i32(ptr %x) nounwind {
34 ; CHECK-LABEL: extractelt_v4i32:
36 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
37 ; CHECK-NEXT: vle32.v v8, (a0)
38 ; CHECK-NEXT: vslidedown.vi v8, v8, 2
39 ; CHECK-NEXT: vmv.x.s a0, v8
41 %a = load <4 x i32>, ptr %x
42 %b = extractelement <4 x i32> %a, i32 2
46 define i64 @extractelt_v2i64(ptr %x) nounwind {
47 ; RV32-LABEL: extractelt_v2i64:
49 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
50 ; RV32-NEXT: vle64.v v8, (a0)
51 ; RV32-NEXT: li a0, 32
52 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
53 ; RV32-NEXT: vsrl.vx v9, v8, a0
54 ; RV32-NEXT: vmv.x.s a1, v9
55 ; RV32-NEXT: vmv.x.s a0, v8
58 ; RV64-LABEL: extractelt_v2i64:
60 ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
61 ; RV64-NEXT: vle64.v v8, (a0)
62 ; RV64-NEXT: vmv.x.s a0, v8
64 %a = load <2 x i64>, ptr %x
65 %b = extractelement <2 x i64> %a, i32 0
69 define half @extractelt_v8f16(ptr %x) nounwind {
70 ; CHECK-LABEL: extractelt_v8f16:
72 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
73 ; CHECK-NEXT: vle16.v v8, (a0)
74 ; CHECK-NEXT: vslidedown.vi v8, v8, 7
75 ; CHECK-NEXT: vfmv.f.s fa0, v8
77 %a = load <8 x half>, ptr %x
78 %b = extractelement <8 x half> %a, i32 7
82 define float @extractelt_v4f32(ptr %x) nounwind {
83 ; CHECK-LABEL: extractelt_v4f32:
85 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
86 ; CHECK-NEXT: vle32.v v8, (a0)
87 ; CHECK-NEXT: vslidedown.vi v8, v8, 2
88 ; CHECK-NEXT: vfmv.f.s fa0, v8
90 %a = load <4 x float>, ptr %x
91 %b = extractelement <4 x float> %a, i32 2
95 define double @extractelt_v2f64(ptr %x) nounwind {
96 ; CHECK-LABEL: extractelt_v2f64:
98 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
99 ; CHECK-NEXT: vle64.v v8, (a0)
100 ; CHECK-NEXT: vfmv.f.s fa0, v8
102 %a = load <2 x double>, ptr %x
103 %b = extractelement <2 x double> %a, i32 0
107 define i8 @extractelt_v32i8(ptr %x) nounwind {
108 ; CHECK-LABEL: extractelt_v32i8:
110 ; CHECK-NEXT: li a1, 32
111 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
112 ; CHECK-NEXT: vle8.v v8, (a0)
113 ; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
114 ; CHECK-NEXT: vslidedown.vi v8, v8, 7
115 ; CHECK-NEXT: vmv.x.s a0, v8
117 %a = load <32 x i8>, ptr %x
118 %b = extractelement <32 x i8> %a, i32 7
122 define i16 @extractelt_v16i16(ptr %x) nounwind {
123 ; CHECK-LABEL: extractelt_v16i16:
125 ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
126 ; CHECK-NEXT: vle16.v v8, (a0)
127 ; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
128 ; CHECK-NEXT: vslidedown.vi v8, v8, 7
129 ; CHECK-NEXT: vmv.x.s a0, v8
131 %a = load <16 x i16>, ptr %x
132 %b = extractelement <16 x i16> %a, i32 7
136 define i32 @extractelt_v8i32(ptr %x) nounwind {
137 ; CHECK-LABEL: extractelt_v8i32:
139 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
140 ; CHECK-NEXT: vle32.v v8, (a0)
141 ; CHECK-NEXT: vslidedown.vi v8, v8, 6
142 ; CHECK-NEXT: vmv.x.s a0, v8
144 %a = load <8 x i32>, ptr %x
145 %b = extractelement <8 x i32> %a, i32 6
149 define i64 @extractelt_v4i64(ptr %x) nounwind {
150 ; RV32-LABEL: extractelt_v4i64:
152 ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
153 ; RV32-NEXT: vle64.v v8, (a0)
154 ; RV32-NEXT: vslidedown.vi v8, v8, 3
155 ; RV32-NEXT: li a0, 32
156 ; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma
157 ; RV32-NEXT: vsrl.vx v10, v8, a0
158 ; RV32-NEXT: vmv.x.s a1, v10
159 ; RV32-NEXT: vmv.x.s a0, v8
162 ; RV64-LABEL: extractelt_v4i64:
164 ; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
165 ; RV64-NEXT: vle64.v v8, (a0)
166 ; RV64-NEXT: vslidedown.vi v8, v8, 3
167 ; RV64-NEXT: vmv.x.s a0, v8
169 %a = load <4 x i64>, ptr %x
170 %b = extractelement <4 x i64> %a, i32 3
174 define half @extractelt_v16f16(ptr %x) nounwind {
175 ; CHECK-LABEL: extractelt_v16f16:
177 ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
178 ; CHECK-NEXT: vle16.v v8, (a0)
179 ; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
180 ; CHECK-NEXT: vslidedown.vi v8, v8, 7
181 ; CHECK-NEXT: vfmv.f.s fa0, v8
183 %a = load <16 x half>, ptr %x
184 %b = extractelement <16 x half> %a, i32 7
188 define float @extractelt_v8f32(ptr %x) nounwind {
189 ; CHECK-LABEL: extractelt_v8f32:
191 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
192 ; CHECK-NEXT: vle32.v v8, (a0)
193 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
194 ; CHECK-NEXT: vslidedown.vi v8, v8, 2
195 ; CHECK-NEXT: vfmv.f.s fa0, v8
197 %a = load <8 x float>, ptr %x
198 %b = extractelement <8 x float> %a, i32 2
202 define double @extractelt_v4f64(ptr %x) nounwind {
203 ; CHECK-LABEL: extractelt_v4f64:
205 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
206 ; CHECK-NEXT: vle64.v v8, (a0)
207 ; CHECK-NEXT: vfmv.f.s fa0, v8
209 %a = load <4 x double>, ptr %x
210 %b = extractelement <4 x double> %a, i32 0
214 ; This uses a non-power of 2 type so that it isn't an MVT to catch an
215 ; incorrect use of getSimpleValueType().
216 ; NOTE: Type legalization is bitcasting to vXi32 and doing 2 independent
217 ; slidedowns and extracts.
218 define i64 @extractelt_v3i64(ptr %x) nounwind {
219 ; RV32-LABEL: extractelt_v3i64:
221 ; RV32-NEXT: vsetivli zero, 3, e64, m2, ta, ma
222 ; RV32-NEXT: vle64.v v8, (a0)
223 ; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
224 ; RV32-NEXT: vslidedown.vi v10, v8, 4
225 ; RV32-NEXT: vmv.x.s a0, v10
226 ; RV32-NEXT: vslidedown.vi v8, v8, 5
227 ; RV32-NEXT: vmv.x.s a1, v8
230 ; RV64-LABEL: extractelt_v3i64:
232 ; RV64-NEXT: vsetivli zero, 3, e64, m2, ta, ma
233 ; RV64-NEXT: vle64.v v8, (a0)
234 ; RV64-NEXT: vslidedown.vi v8, v8, 2
235 ; RV64-NEXT: vmv.x.s a0, v8
237 %a = load <3 x i64>, ptr %x
238 %b = extractelement <3 x i64> %a, i32 2
243 define i32 @extractelt_v32i32(ptr %x) nounwind {
244 ; RV32-LABEL: extractelt_v32i32:
246 ; RV32-NEXT: addi sp, sp, -256
247 ; RV32-NEXT: sw ra, 252(sp) # 4-byte Folded Spill
248 ; RV32-NEXT: sw s0, 248(sp) # 4-byte Folded Spill
249 ; RV32-NEXT: addi s0, sp, 256
250 ; RV32-NEXT: andi sp, sp, -128
251 ; RV32-NEXT: li a1, 32
252 ; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma
253 ; RV32-NEXT: vle32.v v8, (a0)
254 ; RV32-NEXT: mv a0, sp
255 ; RV32-NEXT: vse32.v v8, (a0)
256 ; RV32-NEXT: lw a0, 124(sp)
257 ; RV32-NEXT: addi sp, s0, -256
258 ; RV32-NEXT: lw ra, 252(sp) # 4-byte Folded Reload
259 ; RV32-NEXT: lw s0, 248(sp) # 4-byte Folded Reload
260 ; RV32-NEXT: addi sp, sp, 256
263 ; RV64-LABEL: extractelt_v32i32:
265 ; RV64-NEXT: addi sp, sp, -256
266 ; RV64-NEXT: sd ra, 248(sp) # 8-byte Folded Spill
267 ; RV64-NEXT: sd s0, 240(sp) # 8-byte Folded Spill
268 ; RV64-NEXT: addi s0, sp, 256
269 ; RV64-NEXT: andi sp, sp, -128
270 ; RV64-NEXT: li a1, 32
271 ; RV64-NEXT: vsetvli zero, a1, e32, m8, ta, ma
272 ; RV64-NEXT: vle32.v v8, (a0)
273 ; RV64-NEXT: mv a0, sp
274 ; RV64-NEXT: vse32.v v8, (a0)
275 ; RV64-NEXT: lw a0, 124(sp)
276 ; RV64-NEXT: addi sp, s0, -256
277 ; RV64-NEXT: ld ra, 248(sp) # 8-byte Folded Reload
278 ; RV64-NEXT: ld s0, 240(sp) # 8-byte Folded Reload
279 ; RV64-NEXT: addi sp, sp, 256
281 %a = load <32 x i32>, ptr %x
282 %b = extractelement <32 x i32> %a, i32 31
286 ; Exercise type legalization for type beyond LMUL8
287 define i32 @extractelt_v64i32(ptr %x) nounwind {
288 ; RV32-LABEL: extractelt_v64i32:
290 ; RV32-NEXT: addi sp, sp, -256
291 ; RV32-NEXT: sw ra, 252(sp) # 4-byte Folded Spill
292 ; RV32-NEXT: sw s0, 248(sp) # 4-byte Folded Spill
293 ; RV32-NEXT: addi s0, sp, 256
294 ; RV32-NEXT: andi sp, sp, -128
295 ; RV32-NEXT: addi a0, a0, 128
296 ; RV32-NEXT: li a1, 32
297 ; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma
298 ; RV32-NEXT: vle32.v v8, (a0)
299 ; RV32-NEXT: mv a0, sp
300 ; RV32-NEXT: vse32.v v8, (a0)
301 ; RV32-NEXT: lw a0, 124(sp)
302 ; RV32-NEXT: addi sp, s0, -256
303 ; RV32-NEXT: lw ra, 252(sp) # 4-byte Folded Reload
304 ; RV32-NEXT: lw s0, 248(sp) # 4-byte Folded Reload
305 ; RV32-NEXT: addi sp, sp, 256
308 ; RV64-LABEL: extractelt_v64i32:
310 ; RV64-NEXT: addi sp, sp, -256
311 ; RV64-NEXT: sd ra, 248(sp) # 8-byte Folded Spill
312 ; RV64-NEXT: sd s0, 240(sp) # 8-byte Folded Spill
313 ; RV64-NEXT: addi s0, sp, 256
314 ; RV64-NEXT: andi sp, sp, -128
315 ; RV64-NEXT: addi a0, a0, 128
316 ; RV64-NEXT: li a1, 32
317 ; RV64-NEXT: vsetvli zero, a1, e32, m8, ta, ma
318 ; RV64-NEXT: vle32.v v8, (a0)
319 ; RV64-NEXT: mv a0, sp
320 ; RV64-NEXT: vse32.v v8, (a0)
321 ; RV64-NEXT: lw a0, 124(sp)
322 ; RV64-NEXT: addi sp, s0, -256
323 ; RV64-NEXT: ld ra, 248(sp) # 8-byte Folded Reload
324 ; RV64-NEXT: ld s0, 240(sp) # 8-byte Folded Reload
325 ; RV64-NEXT: addi sp, sp, 256
327 %a = load <64 x i32>, ptr %x
328 %b = extractelement <64 x i32> %a, i32 63
332 define i8 @extractelt_v16i8_idx(ptr %x, i32 zeroext %idx) nounwind {
333 ; CHECK-LABEL: extractelt_v16i8_idx:
335 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
336 ; CHECK-NEXT: vle8.v v8, (a0)
337 ; CHECK-NEXT: vslidedown.vx v8, v8, a1
338 ; CHECK-NEXT: vmv.x.s a0, v8
340 %a = load <16 x i8>, ptr %x
341 %b = extractelement <16 x i8> %a, i32 %idx
345 define i16 @extractelt_v8i16_idx(ptr %x, i32 zeroext %idx) nounwind {
346 ; CHECK-LABEL: extractelt_v8i16_idx:
348 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
349 ; CHECK-NEXT: vle16.v v8, (a0)
350 ; CHECK-NEXT: vslidedown.vx v8, v8, a1
351 ; CHECK-NEXT: vmv.x.s a0, v8
353 %a = load <8 x i16>, ptr %x
354 %b = extractelement <8 x i16> %a, i32 %idx
358 define i32 @extractelt_v4i32_idx(ptr %x, i32 zeroext %idx) nounwind {
359 ; CHECK-LABEL: extractelt_v4i32_idx:
361 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
362 ; CHECK-NEXT: vle32.v v8, (a0)
363 ; CHECK-NEXT: vadd.vv v8, v8, v8
364 ; CHECK-NEXT: vslidedown.vx v8, v8, a1
365 ; CHECK-NEXT: vmv.x.s a0, v8
367 %a = load <4 x i32>, ptr %x
368 %b = add <4 x i32> %a, %a
369 %c = extractelement <4 x i32> %b, i32 %idx
373 define i64 @extractelt_v2i64_idx(ptr %x, i32 zeroext %idx) nounwind {
374 ; RV32-LABEL: extractelt_v2i64_idx:
376 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
377 ; RV32-NEXT: vle64.v v8, (a0)
378 ; RV32-NEXT: vadd.vv v8, v8, v8
379 ; RV32-NEXT: vslidedown.vx v8, v8, a1
380 ; RV32-NEXT: vmv.x.s a0, v8
381 ; RV32-NEXT: li a1, 32
382 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
383 ; RV32-NEXT: vsrl.vx v8, v8, a1
384 ; RV32-NEXT: vmv.x.s a1, v8
387 ; RV64-LABEL: extractelt_v2i64_idx:
389 ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
390 ; RV64-NEXT: vle64.v v8, (a0)
391 ; RV64-NEXT: vadd.vv v8, v8, v8
392 ; RV64-NEXT: vslidedown.vx v8, v8, a1
393 ; RV64-NEXT: vmv.x.s a0, v8
395 %a = load <2 x i64>, ptr %x
396 %b = add <2 x i64> %a, %a
397 %c = extractelement <2 x i64> %b, i32 %idx
401 define half @extractelt_v8f16_idx(ptr %x, i32 zeroext %idx) nounwind {
402 ; CHECK-LABEL: extractelt_v8f16_idx:
404 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
405 ; CHECK-NEXT: vle16.v v8, (a0)
406 ; CHECK-NEXT: vfadd.vv v8, v8, v8
407 ; CHECK-NEXT: vslidedown.vx v8, v8, a1
408 ; CHECK-NEXT: vfmv.f.s fa0, v8
410 %a = load <8 x half>, ptr %x
411 %b = fadd <8 x half> %a, %a
412 %c = extractelement <8 x half> %b, i32 %idx
416 define float @extractelt_v4f32_idx(ptr %x, i32 zeroext %idx) nounwind {
417 ; CHECK-LABEL: extractelt_v4f32_idx:
419 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
420 ; CHECK-NEXT: vle32.v v8, (a0)
421 ; CHECK-NEXT: vfadd.vv v8, v8, v8
422 ; CHECK-NEXT: vslidedown.vx v8, v8, a1
423 ; CHECK-NEXT: vfmv.f.s fa0, v8
425 %a = load <4 x float>, ptr %x
426 %b = fadd <4 x float> %a, %a
427 %c = extractelement <4 x float> %b, i32 %idx
431 define double @extractelt_v2f64_idx(ptr %x, i32 zeroext %idx) nounwind {
432 ; CHECK-LABEL: extractelt_v2f64_idx:
434 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
435 ; CHECK-NEXT: vle64.v v8, (a0)
436 ; CHECK-NEXT: vfadd.vv v8, v8, v8
437 ; CHECK-NEXT: vslidedown.vx v8, v8, a1
438 ; CHECK-NEXT: vfmv.f.s fa0, v8
440 %a = load <2 x double>, ptr %x
441 %b = fadd <2 x double> %a, %a
442 %c = extractelement <2 x double> %b, i32 %idx
446 define i8 @extractelt_v32i8_idx(ptr %x, i32 zeroext %idx) nounwind {
447 ; CHECK-LABEL: extractelt_v32i8_idx:
449 ; CHECK-NEXT: li a2, 32
450 ; CHECK-NEXT: vsetvli zero, a2, e8, m2, ta, ma
451 ; CHECK-NEXT: vle8.v v8, (a0)
452 ; CHECK-NEXT: vslidedown.vx v8, v8, a1
453 ; CHECK-NEXT: vmv.x.s a0, v8
455 %a = load <32 x i8>, ptr %x
456 %b = extractelement <32 x i8> %a, i32 %idx
460 define i16 @extractelt_v16i16_idx(ptr %x, i32 zeroext %idx) nounwind {
461 ; CHECK-LABEL: extractelt_v16i16_idx:
463 ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
464 ; CHECK-NEXT: vle16.v v8, (a0)
465 ; CHECK-NEXT: vslidedown.vx v8, v8, a1
466 ; CHECK-NEXT: vmv.x.s a0, v8
468 %a = load <16 x i16>, ptr %x
469 %b = extractelement <16 x i16> %a, i32 %idx
473 define i32 @extractelt_v8i32_idx(ptr %x, i32 zeroext %idx) nounwind {
474 ; CHECK-LABEL: extractelt_v8i32_idx:
476 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
477 ; CHECK-NEXT: vle32.v v8, (a0)
478 ; CHECK-NEXT: vadd.vv v8, v8, v8
479 ; CHECK-NEXT: vslidedown.vx v8, v8, a1
480 ; CHECK-NEXT: vmv.x.s a0, v8
482 %a = load <8 x i32>, ptr %x
483 %b = add <8 x i32> %a, %a
484 %c = extractelement <8 x i32> %b, i32 %idx
488 define i64 @extractelt_v4i64_idx(ptr %x, i32 zeroext %idx) nounwind {
489 ; RV32-LABEL: extractelt_v4i64_idx:
491 ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
492 ; RV32-NEXT: vle64.v v8, (a0)
493 ; RV32-NEXT: vadd.vv v8, v8, v8
494 ; RV32-NEXT: vslidedown.vx v8, v8, a1
495 ; RV32-NEXT: vmv.x.s a0, v8
496 ; RV32-NEXT: li a1, 32
497 ; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma
498 ; RV32-NEXT: vsrl.vx v8, v8, a1
499 ; RV32-NEXT: vmv.x.s a1, v8
502 ; RV64-LABEL: extractelt_v4i64_idx:
504 ; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
505 ; RV64-NEXT: vle64.v v8, (a0)
506 ; RV64-NEXT: vadd.vv v8, v8, v8
507 ; RV64-NEXT: vslidedown.vx v8, v8, a1
508 ; RV64-NEXT: vmv.x.s a0, v8
510 %a = load <4 x i64>, ptr %x
511 %b = add <4 x i64> %a, %a
512 %c = extractelement <4 x i64> %b, i32 %idx
516 define half @extractelt_v16f16_idx(ptr %x, i32 zeroext %idx) nounwind {
517 ; CHECK-LABEL: extractelt_v16f16_idx:
519 ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
520 ; CHECK-NEXT: vle16.v v8, (a0)
521 ; CHECK-NEXT: vfadd.vv v8, v8, v8
522 ; CHECK-NEXT: vslidedown.vx v8, v8, a1
523 ; CHECK-NEXT: vfmv.f.s fa0, v8
525 %a = load <16 x half>, ptr %x
526 %b = fadd <16 x half> %a, %a
527 %c = extractelement <16 x half> %b, i32 %idx
531 define float @extractelt_v8f32_idx(ptr %x, i32 zeroext %idx) nounwind {
532 ; CHECK-LABEL: extractelt_v8f32_idx:
534 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
535 ; CHECK-NEXT: vle32.v v8, (a0)
536 ; CHECK-NEXT: vfadd.vv v8, v8, v8
537 ; CHECK-NEXT: vslidedown.vx v8, v8, a1
538 ; CHECK-NEXT: vfmv.f.s fa0, v8
540 %a = load <8 x float>, ptr %x
541 %b = fadd <8 x float> %a, %a
542 %c = extractelement <8 x float> %b, i32 %idx
546 define double @extractelt_v4f64_idx(ptr %x, i32 zeroext %idx) nounwind {
547 ; CHECK-LABEL: extractelt_v4f64_idx:
549 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
550 ; CHECK-NEXT: vle64.v v8, (a0)
551 ; CHECK-NEXT: vfadd.vv v8, v8, v8
552 ; CHECK-NEXT: vslidedown.vx v8, v8, a1
553 ; CHECK-NEXT: vfmv.f.s fa0, v8
555 %a = load <4 x double>, ptr %x
556 %b = fadd <4 x double> %a, %a
557 %c = extractelement <4 x double> %b, i32 %idx
561 ; This uses a non-power of 2 type so that it isn't an MVT to catch an
562 ; incorrect use of getSimpleValueType_idx(, i32 zeroext %idx).
563 ; NOTE: Type legalization is bitcasting to vXi32 and doing 2 independent
564 ; slidedowns and extracts.
565 define i64 @extractelt_v3i64_idx(ptr %x, i32 zeroext %idx) nounwind {
566 ; RV32-LABEL: extractelt_v3i64_idx:
568 ; RV32-NEXT: vsetivli zero, 3, e64, m2, ta, ma
569 ; RV32-NEXT: vle64.v v8, (a0)
570 ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
571 ; RV32-NEXT: vadd.vv v8, v8, v8
572 ; RV32-NEXT: add a1, a1, a1
573 ; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
574 ; RV32-NEXT: vslidedown.vx v10, v8, a1
575 ; RV32-NEXT: vmv.x.s a0, v10
576 ; RV32-NEXT: addi a1, a1, 1
577 ; RV32-NEXT: vslidedown.vx v8, v8, a1
578 ; RV32-NEXT: vmv.x.s a1, v8
581 ; RV64-LABEL: extractelt_v3i64_idx:
583 ; RV64-NEXT: vsetivli zero, 3, e64, m2, ta, ma
584 ; RV64-NEXT: vle64.v v8, (a0)
585 ; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
586 ; RV64-NEXT: vadd.vv v8, v8, v8
587 ; RV64-NEXT: vslidedown.vx v8, v8, a1
588 ; RV64-NEXT: vmv.x.s a0, v8
590 %a = load <3 x i64>, ptr %x
591 %b = add <3 x i64> %a, %a
592 %c = extractelement <3 x i64> %b, i32 %idx
596 define i32 @extractelt_v32i32_idx(ptr %x, i32 zeroext %idx) nounwind {
597 ; RV32NOM-LABEL: extractelt_v32i32_idx:
599 ; RV32NOM-NEXT: addi sp, sp, -256
600 ; RV32NOM-NEXT: sw ra, 252(sp) # 4-byte Folded Spill
601 ; RV32NOM-NEXT: sw s0, 248(sp) # 4-byte Folded Spill
602 ; RV32NOM-NEXT: sw s2, 244(sp) # 4-byte Folded Spill
603 ; RV32NOM-NEXT: addi s0, sp, 256
604 ; RV32NOM-NEXT: andi sp, sp, -128
605 ; RV32NOM-NEXT: mv s2, a0
606 ; RV32NOM-NEXT: andi a0, a1, 31
607 ; RV32NOM-NEXT: li a1, 4
608 ; RV32NOM-NEXT: call __mulsi3
609 ; RV32NOM-NEXT: li a1, 32
610 ; RV32NOM-NEXT: vsetvli zero, a1, e32, m8, ta, ma
611 ; RV32NOM-NEXT: vle32.v v8, (s2)
612 ; RV32NOM-NEXT: mv a1, sp
613 ; RV32NOM-NEXT: add a0, a1, a0
614 ; RV32NOM-NEXT: vadd.vv v8, v8, v8
615 ; RV32NOM-NEXT: vse32.v v8, (a1)
616 ; RV32NOM-NEXT: lw a0, 0(a0)
617 ; RV32NOM-NEXT: addi sp, s0, -256
618 ; RV32NOM-NEXT: lw ra, 252(sp) # 4-byte Folded Reload
619 ; RV32NOM-NEXT: lw s0, 248(sp) # 4-byte Folded Reload
620 ; RV32NOM-NEXT: lw s2, 244(sp) # 4-byte Folded Reload
621 ; RV32NOM-NEXT: addi sp, sp, 256
624 ; RV32M-LABEL: extractelt_v32i32_idx:
626 ; RV32M-NEXT: addi sp, sp, -256
627 ; RV32M-NEXT: sw ra, 252(sp) # 4-byte Folded Spill
628 ; RV32M-NEXT: sw s0, 248(sp) # 4-byte Folded Spill
629 ; RV32M-NEXT: addi s0, sp, 256
630 ; RV32M-NEXT: andi sp, sp, -128
631 ; RV32M-NEXT: andi a1, a1, 31
632 ; RV32M-NEXT: li a2, 32
633 ; RV32M-NEXT: vsetvli zero, a2, e32, m8, ta, ma
634 ; RV32M-NEXT: vle32.v v8, (a0)
635 ; RV32M-NEXT: slli a1, a1, 2
636 ; RV32M-NEXT: mv a0, sp
637 ; RV32M-NEXT: or a1, a0, a1
638 ; RV32M-NEXT: vadd.vv v8, v8, v8
639 ; RV32M-NEXT: vse32.v v8, (a0)
640 ; RV32M-NEXT: lw a0, 0(a1)
641 ; RV32M-NEXT: addi sp, s0, -256
642 ; RV32M-NEXT: lw ra, 252(sp) # 4-byte Folded Reload
643 ; RV32M-NEXT: lw s0, 248(sp) # 4-byte Folded Reload
644 ; RV32M-NEXT: addi sp, sp, 256
647 ; RV64NOM-LABEL: extractelt_v32i32_idx:
649 ; RV64NOM-NEXT: addi sp, sp, -256
650 ; RV64NOM-NEXT: sd ra, 248(sp) # 8-byte Folded Spill
651 ; RV64NOM-NEXT: sd s0, 240(sp) # 8-byte Folded Spill
652 ; RV64NOM-NEXT: sd s2, 232(sp) # 8-byte Folded Spill
653 ; RV64NOM-NEXT: addi s0, sp, 256
654 ; RV64NOM-NEXT: andi sp, sp, -128
655 ; RV64NOM-NEXT: mv s2, a0
656 ; RV64NOM-NEXT: andi a0, a1, 31
657 ; RV64NOM-NEXT: li a1, 4
658 ; RV64NOM-NEXT: call __muldi3
659 ; RV64NOM-NEXT: li a1, 32
660 ; RV64NOM-NEXT: vsetvli zero, a1, e32, m8, ta, ma
661 ; RV64NOM-NEXT: vle32.v v8, (s2)
662 ; RV64NOM-NEXT: mv a1, sp
663 ; RV64NOM-NEXT: add a0, a1, a0
664 ; RV64NOM-NEXT: vadd.vv v8, v8, v8
665 ; RV64NOM-NEXT: vse32.v v8, (a1)
666 ; RV64NOM-NEXT: lw a0, 0(a0)
667 ; RV64NOM-NEXT: addi sp, s0, -256
668 ; RV64NOM-NEXT: ld ra, 248(sp) # 8-byte Folded Reload
669 ; RV64NOM-NEXT: ld s0, 240(sp) # 8-byte Folded Reload
670 ; RV64NOM-NEXT: ld s2, 232(sp) # 8-byte Folded Reload
671 ; RV64NOM-NEXT: addi sp, sp, 256
674 ; RV64M-LABEL: extractelt_v32i32_idx:
676 ; RV64M-NEXT: addi sp, sp, -256
677 ; RV64M-NEXT: sd ra, 248(sp) # 8-byte Folded Spill
678 ; RV64M-NEXT: sd s0, 240(sp) # 8-byte Folded Spill
679 ; RV64M-NEXT: addi s0, sp, 256
680 ; RV64M-NEXT: andi sp, sp, -128
681 ; RV64M-NEXT: andi a1, a1, 31
682 ; RV64M-NEXT: li a2, 32
683 ; RV64M-NEXT: vsetvli zero, a2, e32, m8, ta, ma
684 ; RV64M-NEXT: vle32.v v8, (a0)
685 ; RV64M-NEXT: slli a1, a1, 2
686 ; RV64M-NEXT: mv a0, sp
687 ; RV64M-NEXT: or a1, a0, a1
688 ; RV64M-NEXT: vadd.vv v8, v8, v8
689 ; RV64M-NEXT: vse32.v v8, (a0)
690 ; RV64M-NEXT: lw a0, 0(a1)
691 ; RV64M-NEXT: addi sp, s0, -256
692 ; RV64M-NEXT: ld ra, 248(sp) # 8-byte Folded Reload
693 ; RV64M-NEXT: ld s0, 240(sp) # 8-byte Folded Reload
694 ; RV64M-NEXT: addi sp, sp, 256
696 %a = load <32 x i32>, ptr %x
697 %b = add <32 x i32> %a, %a
698 %c = extractelement <32 x i32> %b, i32 %idx
702 define i32 @extractelt_v64i32_idx(ptr %x, i32 zeroext %idx) nounwind {
703 ; RV32-LABEL: extractelt_v64i32_idx:
705 ; RV32-NEXT: addi sp, sp, -384
706 ; RV32-NEXT: sw ra, 380(sp) # 4-byte Folded Spill
707 ; RV32-NEXT: sw s0, 376(sp) # 4-byte Folded Spill
708 ; RV32-NEXT: addi s0, sp, 384
709 ; RV32-NEXT: andi sp, sp, -128
710 ; RV32-NEXT: andi a1, a1, 63
711 ; RV32-NEXT: slli a1, a1, 2
712 ; RV32-NEXT: li a2, 32
713 ; RV32-NEXT: addi a3, a0, 128
714 ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
715 ; RV32-NEXT: vle32.v v8, (a3)
716 ; RV32-NEXT: vle32.v v16, (a0)
717 ; RV32-NEXT: mv a0, sp
718 ; RV32-NEXT: add a1, a0, a1
719 ; RV32-NEXT: vadd.vv v8, v8, v8
720 ; RV32-NEXT: vadd.vv v16, v16, v16
721 ; RV32-NEXT: vse32.v v16, (a0)
722 ; RV32-NEXT: addi a0, sp, 128
723 ; RV32-NEXT: vse32.v v8, (a0)
724 ; RV32-NEXT: lw a0, 0(a1)
725 ; RV32-NEXT: addi sp, s0, -384
726 ; RV32-NEXT: lw ra, 380(sp) # 4-byte Folded Reload
727 ; RV32-NEXT: lw s0, 376(sp) # 4-byte Folded Reload
728 ; RV32-NEXT: addi sp, sp, 384
731 ; RV64-LABEL: extractelt_v64i32_idx:
733 ; RV64-NEXT: addi sp, sp, -384
734 ; RV64-NEXT: sd ra, 376(sp) # 8-byte Folded Spill
735 ; RV64-NEXT: sd s0, 368(sp) # 8-byte Folded Spill
736 ; RV64-NEXT: addi s0, sp, 384
737 ; RV64-NEXT: andi sp, sp, -128
738 ; RV64-NEXT: andi a1, a1, 63
739 ; RV64-NEXT: slli a1, a1, 2
740 ; RV64-NEXT: li a2, 32
741 ; RV64-NEXT: addi a3, a0, 128
742 ; RV64-NEXT: vsetvli zero, a2, e32, m8, ta, ma
743 ; RV64-NEXT: vle32.v v8, (a3)
744 ; RV64-NEXT: vle32.v v16, (a0)
745 ; RV64-NEXT: mv a0, sp
746 ; RV64-NEXT: add a1, a0, a1
747 ; RV64-NEXT: vadd.vv v8, v8, v8
748 ; RV64-NEXT: vadd.vv v16, v16, v16
749 ; RV64-NEXT: vse32.v v16, (a0)
750 ; RV64-NEXT: addi a0, sp, 128
751 ; RV64-NEXT: vse32.v v8, (a0)
752 ; RV64-NEXT: lw a0, 0(a1)
753 ; RV64-NEXT: addi sp, s0, -384
754 ; RV64-NEXT: ld ra, 376(sp) # 8-byte Folded Reload
755 ; RV64-NEXT: ld s0, 368(sp) # 8-byte Folded Reload
756 ; RV64-NEXT: addi sp, sp, 384
758 %a = load <64 x i32>, ptr %x
759 %b = add <64 x i32> %a, %a
760 %c = extractelement <64 x i32> %b, i32 %idx
764 define void @store_extractelt_v16i8(ptr %x, ptr %p) nounwind {
765 ; CHECK-LABEL: store_extractelt_v16i8:
767 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
768 ; CHECK-NEXT: vle8.v v8, (a0)
769 ; CHECK-NEXT: vslidedown.vi v8, v8, 7
770 ; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
771 ; CHECK-NEXT: vse8.v v8, (a1)
773 %a = load <16 x i8>, ptr %x
774 %b = extractelement <16 x i8> %a, i32 7
779 define void @store_extractelt_v8i16(ptr %x, ptr %p) nounwind {
780 ; CHECK-LABEL: store_extractelt_v8i16:
782 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
783 ; CHECK-NEXT: vle16.v v8, (a0)
784 ; CHECK-NEXT: vslidedown.vi v8, v8, 7
785 ; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
786 ; CHECK-NEXT: vse16.v v8, (a1)
788 %a = load <8 x i16>, ptr %x
789 %b = extractelement <8 x i16> %a, i32 7
794 define void @store_extractelt_v4i32(ptr %x, ptr %p) nounwind {
795 ; CHECK-LABEL: store_extractelt_v4i32:
797 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
798 ; CHECK-NEXT: vle32.v v8, (a0)
799 ; CHECK-NEXT: vslidedown.vi v8, v8, 2
800 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
801 ; CHECK-NEXT: vse32.v v8, (a1)
803 %a = load <4 x i32>, ptr %x
804 %b = extractelement <4 x i32> %a, i32 2
809 ; FIXME: Use vse64.v on RV32 to avoid two scalar extracts and two scalar stores.
810 define void @store_extractelt_v2i64(ptr %x, ptr %p) nounwind {
811 ; RV32-LABEL: store_extractelt_v2i64:
813 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
814 ; RV32-NEXT: vle64.v v8, (a0)
815 ; RV32-NEXT: vslidedown.vi v8, v8, 1
816 ; RV32-NEXT: li a0, 32
817 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
818 ; RV32-NEXT: vsrl.vx v9, v8, a0
819 ; RV32-NEXT: vmv.x.s a0, v9
820 ; RV32-NEXT: vmv.x.s a2, v8
821 ; RV32-NEXT: sw a2, 0(a1)
822 ; RV32-NEXT: sw a0, 4(a1)
825 ; RV64-LABEL: store_extractelt_v2i64:
827 ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
828 ; RV64-NEXT: vle64.v v8, (a0)
829 ; RV64-NEXT: vslidedown.vi v8, v8, 1
830 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
831 ; RV64-NEXT: vse64.v v8, (a1)
833 %a = load <2 x i64>, ptr %x
834 %b = extractelement <2 x i64> %a, i64 1
839 define void @store_extractelt_v2f64(ptr %x, ptr %p) nounwind {
840 ; CHECK-LABEL: store_extractelt_v2f64:
842 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
843 ; CHECK-NEXT: vle64.v v8, (a0)
844 ; CHECK-NEXT: vslidedown.vi v8, v8, 1
845 ; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
846 ; CHECK-NEXT: vse64.v v8, (a1)
848 %a = load <2 x double>, ptr %x
849 %b = extractelement <2 x double> %a, i64 1
850 store double %b, ptr %p
854 define i32 @extractelt_add_v4i32(<4 x i32> %x) {
855 ; RV32-LABEL: extractelt_add_v4i32:
857 ; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
858 ; RV32-NEXT: vslidedown.vi v8, v8, 2
859 ; RV32-NEXT: vmv.x.s a0, v8
860 ; RV32-NEXT: addi a0, a0, 13
863 ; RV64-LABEL: extractelt_add_v4i32:
865 ; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
866 ; RV64-NEXT: vslidedown.vi v8, v8, 2
867 ; RV64-NEXT: vmv.x.s a0, v8
868 ; RV64-NEXT: addiw a0, a0, 13
870 %bo = add <4 x i32> %x, <i32 11, i32 12, i32 13, i32 14>
871 %ext = extractelement <4 x i32> %bo, i32 2
875 define i32 @extractelt_sub_v4i32(<4 x i32> %x) {
876 ; RV32-LABEL: extractelt_sub_v4i32:
878 ; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
879 ; RV32-NEXT: vslidedown.vi v8, v8, 2
880 ; RV32-NEXT: vmv.x.s a0, v8
881 ; RV32-NEXT: li a1, 13
882 ; RV32-NEXT: sub a0, a1, a0
885 ; RV64-LABEL: extractelt_sub_v4i32:
887 ; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
888 ; RV64-NEXT: vslidedown.vi v8, v8, 2
889 ; RV64-NEXT: vmv.x.s a0, v8
890 ; RV64-NEXT: li a1, 13
891 ; RV64-NEXT: subw a0, a1, a0
893 %bo = sub <4 x i32> <i32 11, i32 12, i32 13, i32 14>, %x
894 %ext = extractelement <4 x i32> %bo, i32 2
898 define i32 @extractelt_mul_v4i32(<4 x i32> %x) {
899 ; RV32NOM-LABEL: extractelt_mul_v4i32:
901 ; RV32NOM-NEXT: li a0, 13
902 ; RV32NOM-NEXT: vsetivli zero, 4, e32, m1, ta, ma
903 ; RV32NOM-NEXT: vmul.vx v8, v8, a0
904 ; RV32NOM-NEXT: vslidedown.vi v8, v8, 2
905 ; RV32NOM-NEXT: vmv.x.s a0, v8
908 ; RV32M-LABEL: extractelt_mul_v4i32:
910 ; RV32M-NEXT: vsetivli zero, 1, e32, m1, ta, ma
911 ; RV32M-NEXT: vslidedown.vi v8, v8, 2
912 ; RV32M-NEXT: vmv.x.s a0, v8
913 ; RV32M-NEXT: li a1, 13
914 ; RV32M-NEXT: mul a0, a0, a1
917 ; RV64NOM-LABEL: extractelt_mul_v4i32:
919 ; RV64NOM-NEXT: li a0, 13
920 ; RV64NOM-NEXT: vsetivli zero, 4, e32, m1, ta, ma
921 ; RV64NOM-NEXT: vmul.vx v8, v8, a0
922 ; RV64NOM-NEXT: vslidedown.vi v8, v8, 2
923 ; RV64NOM-NEXT: vmv.x.s a0, v8
926 ; RV64M-LABEL: extractelt_mul_v4i32:
928 ; RV64M-NEXT: vsetivli zero, 1, e32, m1, ta, ma
929 ; RV64M-NEXT: vslidedown.vi v8, v8, 2
930 ; RV64M-NEXT: vmv.x.s a0, v8
931 ; RV64M-NEXT: li a1, 13
932 ; RV64M-NEXT: mulw a0, a0, a1
934 %bo = mul <4 x i32> %x, <i32 11, i32 12, i32 13, i32 14>
935 %ext = extractelement <4 x i32> %bo, i32 2
939 define i32 @extractelt_sdiv_v4i32(<4 x i32> %x) {
940 ; RV32NOM-LABEL: extractelt_sdiv_v4i32:
942 ; RV32NOM-NEXT: lui a0, %hi(.LCPI42_0)
943 ; RV32NOM-NEXT: addi a0, a0, %lo(.LCPI42_0)
944 ; RV32NOM-NEXT: vsetivli zero, 4, e32, m1, ta, ma
945 ; RV32NOM-NEXT: vle32.v v9, (a0)
946 ; RV32NOM-NEXT: vmulh.vv v9, v8, v9
947 ; RV32NOM-NEXT: lui a0, 1044480
948 ; RV32NOM-NEXT: vmv.s.x v10, a0
949 ; RV32NOM-NEXT: vsext.vf4 v11, v10
950 ; RV32NOM-NEXT: vand.vv v8, v8, v11
951 ; RV32NOM-NEXT: vadd.vv v8, v9, v8
952 ; RV32NOM-NEXT: lui a0, 12320
953 ; RV32NOM-NEXT: addi a0, a0, 257
954 ; RV32NOM-NEXT: vmv.s.x v9, a0
955 ; RV32NOM-NEXT: vsext.vf4 v10, v9
956 ; RV32NOM-NEXT: vsra.vv v9, v8, v10
957 ; RV32NOM-NEXT: vsrl.vi v8, v8, 31
958 ; RV32NOM-NEXT: vadd.vv v8, v9, v8
959 ; RV32NOM-NEXT: vslidedown.vi v8, v8, 2
960 ; RV32NOM-NEXT: vmv.x.s a0, v8
963 ; RV32M-LABEL: extractelt_sdiv_v4i32:
965 ; RV32M-NEXT: vsetivli zero, 1, e32, m1, ta, ma
966 ; RV32M-NEXT: vslidedown.vi v8, v8, 2
967 ; RV32M-NEXT: vmv.x.s a0, v8
968 ; RV32M-NEXT: lui a1, 322639
969 ; RV32M-NEXT: addi a1, a1, -945
970 ; RV32M-NEXT: mulh a0, a0, a1
971 ; RV32M-NEXT: srli a1, a0, 31
972 ; RV32M-NEXT: srai a0, a0, 2
973 ; RV32M-NEXT: add a0, a0, a1
976 ; RV64NOM-LABEL: extractelt_sdiv_v4i32:
978 ; RV64NOM-NEXT: lui a0, %hi(.LCPI42_0)
979 ; RV64NOM-NEXT: addi a0, a0, %lo(.LCPI42_0)
980 ; RV64NOM-NEXT: vsetivli zero, 4, e32, m1, ta, ma
981 ; RV64NOM-NEXT: vle32.v v9, (a0)
982 ; RV64NOM-NEXT: vmulh.vv v9, v8, v9
983 ; RV64NOM-NEXT: lui a0, 1044480
984 ; RV64NOM-NEXT: vmv.s.x v10, a0
985 ; RV64NOM-NEXT: vsext.vf4 v11, v10
986 ; RV64NOM-NEXT: vand.vv v8, v8, v11
987 ; RV64NOM-NEXT: vadd.vv v8, v9, v8
988 ; RV64NOM-NEXT: lui a0, 12320
989 ; RV64NOM-NEXT: addi a0, a0, 257
990 ; RV64NOM-NEXT: vmv.s.x v9, a0
991 ; RV64NOM-NEXT: vsext.vf4 v10, v9
992 ; RV64NOM-NEXT: vsra.vv v8, v8, v10
993 ; RV64NOM-NEXT: vsrl.vi v9, v8, 31
994 ; RV64NOM-NEXT: vadd.vv v8, v8, v9
995 ; RV64NOM-NEXT: vslidedown.vi v8, v8, 2
996 ; RV64NOM-NEXT: vmv.x.s a0, v8
999 ; RV64M-LABEL: extractelt_sdiv_v4i32:
1001 ; RV64M-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1002 ; RV64M-NEXT: vslidedown.vi v8, v8, 2
1003 ; RV64M-NEXT: vmv.x.s a0, v8
1004 ; RV64M-NEXT: lui a1, 322639
1005 ; RV64M-NEXT: addiw a1, a1, -945
1006 ; RV64M-NEXT: mul a0, a0, a1
1007 ; RV64M-NEXT: srli a1, a0, 63
1008 ; RV64M-NEXT: srai a0, a0, 34
1009 ; RV64M-NEXT: add a0, a0, a1
1011 %bo = sdiv <4 x i32> %x, <i32 11, i32 12, i32 13, i32 14>
1012 %ext = extractelement <4 x i32> %bo, i32 2
1016 define i32 @extractelt_udiv_v4i32(<4 x i32> %x) {
1017 ; RV32NOM-LABEL: extractelt_udiv_v4i32:
1019 ; RV32NOM-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1020 ; RV32NOM-NEXT: vsrl.vi v8, v8, 0
1021 ; RV32NOM-NEXT: lui a0, 322639
1022 ; RV32NOM-NEXT: addi a0, a0, -945
1023 ; RV32NOM-NEXT: vmulhu.vx v8, v8, a0
1024 ; RV32NOM-NEXT: vslidedown.vi v8, v8, 2
1025 ; RV32NOM-NEXT: vmv.x.s a0, v8
1026 ; RV32NOM-NEXT: srli a0, a0, 2
1029 ; RV32M-LABEL: extractelt_udiv_v4i32:
1031 ; RV32M-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1032 ; RV32M-NEXT: vslidedown.vi v8, v8, 2
1033 ; RV32M-NEXT: vmv.x.s a0, v8
1034 ; RV32M-NEXT: lui a1, 322639
1035 ; RV32M-NEXT: addi a1, a1, -945
1036 ; RV32M-NEXT: mulhu a0, a0, a1
1037 ; RV32M-NEXT: srli a0, a0, 2
1040 ; RV64NOM-LABEL: extractelt_udiv_v4i32:
1042 ; RV64NOM-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1043 ; RV64NOM-NEXT: vsrl.vi v8, v8, 0
1044 ; RV64NOM-NEXT: lui a0, 322639
1045 ; RV64NOM-NEXT: addi a0, a0, -945
1046 ; RV64NOM-NEXT: vmulhu.vx v8, v8, a0
1047 ; RV64NOM-NEXT: vslidedown.vi v8, v8, 2
1048 ; RV64NOM-NEXT: vmv.x.s a0, v8
1049 ; RV64NOM-NEXT: slli a0, a0, 33
1050 ; RV64NOM-NEXT: srli a0, a0, 35
1053 ; RV64M-LABEL: extractelt_udiv_v4i32:
1055 ; RV64M-NEXT: lui a0, 322639
1056 ; RV64M-NEXT: addi a0, a0, -945
1057 ; RV64M-NEXT: slli a0, a0, 32
1058 ; RV64M-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1059 ; RV64M-NEXT: vslidedown.vi v8, v8, 2
1060 ; RV64M-NEXT: vmv.x.s a1, v8
1061 ; RV64M-NEXT: slli a1, a1, 32
1062 ; RV64M-NEXT: mulhu a0, a1, a0
1063 ; RV64M-NEXT: srli a0, a0, 34
1065 %bo = udiv <4 x i32> %x, <i32 11, i32 12, i32 13, i32 14>
1066 %ext = extractelement <4 x i32> %bo, i32 2
1070 define float @extractelt_fadd_v4f32(<4 x float> %x) {
1071 ; CHECK-LABEL: extractelt_fadd_v4f32:
1073 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1074 ; CHECK-NEXT: vslidedown.vi v8, v8, 2
1075 ; CHECK-NEXT: vfmv.f.s fa5, v8
1076 ; CHECK-NEXT: lui a0, 267520
1077 ; CHECK-NEXT: fmv.w.x fa4, a0
1078 ; CHECK-NEXT: fadd.s fa0, fa5, fa4
1080 %bo = fadd <4 x float> %x, <float 11.0, float 12.0, float 13.0, float 14.0>
1081 %ext = extractelement <4 x float> %bo, i32 2
1085 define float @extractelt_fsub_v4f32(<4 x float> %x) {
1086 ; CHECK-LABEL: extractelt_fsub_v4f32:
1088 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1089 ; CHECK-NEXT: vslidedown.vi v8, v8, 2
1090 ; CHECK-NEXT: vfmv.f.s fa5, v8
1091 ; CHECK-NEXT: lui a0, 267520
1092 ; CHECK-NEXT: fmv.w.x fa4, a0
1093 ; CHECK-NEXT: fsub.s fa0, fa4, fa5
1095 %bo = fsub <4 x float> <float 11.0, float 12.0, float 13.0, float 14.0>, %x
1096 %ext = extractelement <4 x float> %bo, i32 2
1100 define float @extractelt_fmul_v4f32(<4 x float> %x) {
1101 ; CHECK-LABEL: extractelt_fmul_v4f32:
1103 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1104 ; CHECK-NEXT: vslidedown.vi v8, v8, 2
1105 ; CHECK-NEXT: vfmv.f.s fa5, v8
1106 ; CHECK-NEXT: lui a0, 267520
1107 ; CHECK-NEXT: fmv.w.x fa4, a0
1108 ; CHECK-NEXT: fmul.s fa0, fa5, fa4
1110 %bo = fmul <4 x float> %x, <float 11.0, float 12.0, float 13.0, float 14.0>
1111 %ext = extractelement <4 x float> %bo, i32 2
1115 define float @extractelt_fdiv_v4f32(<4 x float> %x) {
1116 ; CHECK-LABEL: extractelt_fdiv_v4f32:
1118 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1119 ; CHECK-NEXT: vslidedown.vi v8, v8, 2
1120 ; CHECK-NEXT: vfmv.f.s fa5, v8
1121 ; CHECK-NEXT: lui a0, 267520
1122 ; CHECK-NEXT: fmv.w.x fa4, a0
1123 ; CHECK-NEXT: fdiv.s fa0, fa5, fa4
1125 %bo = fdiv <4 x float> %x, <float 11.0, float 12.0, float 13.0, float 14.0>
1126 %ext = extractelement <4 x float> %bo, i32 2
1130 define i32 @extractelt_v16i32_idx7_exact_vlen(ptr %x) nounwind vscale_range(2,2) {
1131 ; CHECK-LABEL: extractelt_v16i32_idx7_exact_vlen:
1133 ; CHECK-NEXT: addi a0, a0, 16
1134 ; CHECK-NEXT: vl1re32.v v8, (a0)
1135 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1136 ; CHECK-NEXT: vslidedown.vi v8, v8, 3
1137 ; CHECK-NEXT: vmv.x.s a0, v8
1139 %a = load <16 x i32>, ptr %x
1140 %b = extractelement <16 x i32> %a, i32 7
1144 define i32 @extractelt_v16i32_idx15_exact_vlen(ptr %x) nounwind vscale_range(2,2) {
1145 ; CHECK-LABEL: extractelt_v16i32_idx15_exact_vlen:
1147 ; CHECK-NEXT: addi a0, a0, 48
1148 ; CHECK-NEXT: vl1re32.v v8, (a0)
1149 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1150 ; CHECK-NEXT: vslidedown.vi v8, v8, 3
1151 ; CHECK-NEXT: vmv.x.s a0, v8
1153 %a = load <16 x i32>, ptr %x
1154 %b = extractelement <16 x i32> %a, i32 15