1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32NOM
3 ; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfh,+f,+d,+m -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32M
4 ; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64NOM
5 ; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfh,+f,+d,+m -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64M
7 define i8 @extractelt_v16i8(ptr %x) nounwind {
8 ; CHECK-LABEL: extractelt_v16i8:
10 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
11 ; CHECK-NEXT: vle8.v v8, (a0)
12 ; CHECK-NEXT: vslidedown.vi v8, v8, 7
13 ; CHECK-NEXT: vmv.x.s a0, v8
15 %a = load <16 x i8>, ptr %x
16 %b = extractelement <16 x i8> %a, i32 7
20 define i16 @extractelt_v8i16(ptr %x) nounwind {
21 ; CHECK-LABEL: extractelt_v8i16:
23 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
24 ; CHECK-NEXT: vle16.v v8, (a0)
25 ; CHECK-NEXT: vslidedown.vi v8, v8, 7
26 ; CHECK-NEXT: vmv.x.s a0, v8
28 %a = load <8 x i16>, ptr %x
29 %b = extractelement <8 x i16> %a, i32 7
33 define i32 @extractelt_v4i32(ptr %x) nounwind {
34 ; CHECK-LABEL: extractelt_v4i32:
36 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
37 ; CHECK-NEXT: vle32.v v8, (a0)
38 ; CHECK-NEXT: vslidedown.vi v8, v8, 2
39 ; CHECK-NEXT: vmv.x.s a0, v8
41 %a = load <4 x i32>, ptr %x
42 %b = extractelement <4 x i32> %a, i32 2
46 define i64 @extractelt_v2i64(ptr %x) nounwind {
47 ; RV32-LABEL: extractelt_v2i64:
49 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
50 ; RV32-NEXT: vle64.v v8, (a0)
51 ; RV32-NEXT: li a0, 32
52 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
53 ; RV32-NEXT: vsrl.vx v9, v8, a0
54 ; RV32-NEXT: vmv.x.s a1, v9
55 ; RV32-NEXT: vmv.x.s a0, v8
58 ; RV64-LABEL: extractelt_v2i64:
60 ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
61 ; RV64-NEXT: vle64.v v8, (a0)
62 ; RV64-NEXT: vmv.x.s a0, v8
64 %a = load <2 x i64>, ptr %x
65 %b = extractelement <2 x i64> %a, i32 0
69 define half @extractelt_v8f16(ptr %x) nounwind {
70 ; CHECK-LABEL: extractelt_v8f16:
72 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
73 ; CHECK-NEXT: vle16.v v8, (a0)
74 ; CHECK-NEXT: vslidedown.vi v8, v8, 7
75 ; CHECK-NEXT: vfmv.f.s fa0, v8
77 %a = load <8 x half>, ptr %x
78 %b = extractelement <8 x half> %a, i32 7
82 define float @extractelt_v4f32(ptr %x) nounwind {
83 ; CHECK-LABEL: extractelt_v4f32:
85 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
86 ; CHECK-NEXT: vle32.v v8, (a0)
87 ; CHECK-NEXT: vslidedown.vi v8, v8, 2
88 ; CHECK-NEXT: vfmv.f.s fa0, v8
90 %a = load <4 x float>, ptr %x
91 %b = extractelement <4 x float> %a, i32 2
95 define double @extractelt_v2f64(ptr %x) nounwind {
96 ; CHECK-LABEL: extractelt_v2f64:
98 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
99 ; CHECK-NEXT: vle64.v v8, (a0)
100 ; CHECK-NEXT: vfmv.f.s fa0, v8
102 %a = load <2 x double>, ptr %x
103 %b = extractelement <2 x double> %a, i32 0
107 define i8 @extractelt_v32i8(ptr %x) nounwind {
108 ; CHECK-LABEL: extractelt_v32i8:
110 ; CHECK-NEXT: li a1, 32
111 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
112 ; CHECK-NEXT: vle8.v v8, (a0)
113 ; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
114 ; CHECK-NEXT: vslidedown.vi v8, v8, 7
115 ; CHECK-NEXT: vmv.x.s a0, v8
117 %a = load <32 x i8>, ptr %x
118 %b = extractelement <32 x i8> %a, i32 7
122 define i16 @extractelt_v16i16(ptr %x) nounwind {
123 ; CHECK-LABEL: extractelt_v16i16:
125 ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
126 ; CHECK-NEXT: vle16.v v8, (a0)
127 ; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
128 ; CHECK-NEXT: vslidedown.vi v8, v8, 7
129 ; CHECK-NEXT: vmv.x.s a0, v8
131 %a = load <16 x i16>, ptr %x
132 %b = extractelement <16 x i16> %a, i32 7
136 define i32 @extractelt_v8i32(ptr %x) nounwind {
137 ; CHECK-LABEL: extractelt_v8i32:
139 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
140 ; CHECK-NEXT: vle32.v v8, (a0)
141 ; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma
142 ; CHECK-NEXT: vslidedown.vi v8, v8, 6
143 ; CHECK-NEXT: vmv.x.s a0, v8
145 %a = load <8 x i32>, ptr %x
146 %b = extractelement <8 x i32> %a, i32 6
150 define i64 @extractelt_v4i64(ptr %x) nounwind {
151 ; RV32-LABEL: extractelt_v4i64:
153 ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
154 ; RV32-NEXT: vle64.v v8, (a0)
155 ; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma
156 ; RV32-NEXT: vslidedown.vi v8, v8, 3
157 ; RV32-NEXT: li a0, 32
158 ; RV32-NEXT: vsrl.vx v10, v8, a0
159 ; RV32-NEXT: vmv.x.s a1, v10
160 ; RV32-NEXT: vmv.x.s a0, v8
163 ; RV64-LABEL: extractelt_v4i64:
165 ; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
166 ; RV64-NEXT: vle64.v v8, (a0)
167 ; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma
168 ; RV64-NEXT: vslidedown.vi v8, v8, 3
169 ; RV64-NEXT: vmv.x.s a0, v8
171 %a = load <4 x i64>, ptr %x
172 %b = extractelement <4 x i64> %a, i32 3
176 define half @extractelt_v16f16(ptr %x) nounwind {
177 ; CHECK-LABEL: extractelt_v16f16:
179 ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
180 ; CHECK-NEXT: vle16.v v8, (a0)
181 ; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
182 ; CHECK-NEXT: vslidedown.vi v8, v8, 7
183 ; CHECK-NEXT: vfmv.f.s fa0, v8
185 %a = load <16 x half>, ptr %x
186 %b = extractelement <16 x half> %a, i32 7
190 define float @extractelt_v8f32(ptr %x) nounwind {
191 ; CHECK-LABEL: extractelt_v8f32:
193 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
194 ; CHECK-NEXT: vle32.v v8, (a0)
195 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
196 ; CHECK-NEXT: vslidedown.vi v8, v8, 2
197 ; CHECK-NEXT: vfmv.f.s fa0, v8
199 %a = load <8 x float>, ptr %x
200 %b = extractelement <8 x float> %a, i32 2
204 define double @extractelt_v4f64(ptr %x) nounwind {
205 ; CHECK-LABEL: extractelt_v4f64:
207 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
208 ; CHECK-NEXT: vle64.v v8, (a0)
209 ; CHECK-NEXT: vfmv.f.s fa0, v8
211 %a = load <4 x double>, ptr %x
212 %b = extractelement <4 x double> %a, i32 0
216 ; This uses a non-power of 2 type so that it isn't an MVT to catch an
217 ; incorrect use of getSimpleValueType().
218 ; NOTE: Type legalization is bitcasting to vXi32 and doing 2 independent
219 ; slidedowns and extracts.
220 define i64 @extractelt_v3i64(ptr %x) nounwind {
221 ; RV32-LABEL: extractelt_v3i64:
223 ; RV32-NEXT: vsetivli zero, 3, e64, m2, ta, ma
224 ; RV32-NEXT: vle64.v v8, (a0)
225 ; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
226 ; RV32-NEXT: vslidedown.vi v10, v8, 4
227 ; RV32-NEXT: vmv.x.s a0, v10
228 ; RV32-NEXT: vslidedown.vi v8, v8, 5
229 ; RV32-NEXT: vmv.x.s a1, v8
232 ; RV64-LABEL: extractelt_v3i64:
234 ; RV64-NEXT: vsetivli zero, 3, e64, m2, ta, ma
235 ; RV64-NEXT: vle64.v v8, (a0)
236 ; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma
237 ; RV64-NEXT: vslidedown.vi v8, v8, 2
238 ; RV64-NEXT: vmv.x.s a0, v8
240 %a = load <3 x i64>, ptr %x
241 %b = extractelement <3 x i64> %a, i32 2
246 define i32 @extractelt_v32i32(ptr %x) nounwind {
247 ; RV32-LABEL: extractelt_v32i32:
249 ; RV32-NEXT: addi sp, sp, -256
250 ; RV32-NEXT: sw ra, 252(sp) # 4-byte Folded Spill
251 ; RV32-NEXT: sw s0, 248(sp) # 4-byte Folded Spill
252 ; RV32-NEXT: addi s0, sp, 256
253 ; RV32-NEXT: andi sp, sp, -128
254 ; RV32-NEXT: li a1, 32
255 ; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma
256 ; RV32-NEXT: vle32.v v8, (a0)
257 ; RV32-NEXT: mv a0, sp
258 ; RV32-NEXT: vse32.v v8, (a0)
259 ; RV32-NEXT: lw a0, 124(sp)
260 ; RV32-NEXT: addi sp, s0, -256
261 ; RV32-NEXT: lw ra, 252(sp) # 4-byte Folded Reload
262 ; RV32-NEXT: lw s0, 248(sp) # 4-byte Folded Reload
263 ; RV32-NEXT: addi sp, sp, 256
266 ; RV64-LABEL: extractelt_v32i32:
268 ; RV64-NEXT: addi sp, sp, -256
269 ; RV64-NEXT: sd ra, 248(sp) # 8-byte Folded Spill
270 ; RV64-NEXT: sd s0, 240(sp) # 8-byte Folded Spill
271 ; RV64-NEXT: addi s0, sp, 256
272 ; RV64-NEXT: andi sp, sp, -128
273 ; RV64-NEXT: li a1, 32
274 ; RV64-NEXT: vsetvli zero, a1, e32, m8, ta, ma
275 ; RV64-NEXT: vle32.v v8, (a0)
276 ; RV64-NEXT: mv a0, sp
277 ; RV64-NEXT: vse32.v v8, (a0)
278 ; RV64-NEXT: lw a0, 124(sp)
279 ; RV64-NEXT: addi sp, s0, -256
280 ; RV64-NEXT: ld ra, 248(sp) # 8-byte Folded Reload
281 ; RV64-NEXT: ld s0, 240(sp) # 8-byte Folded Reload
282 ; RV64-NEXT: addi sp, sp, 256
284 %a = load <32 x i32>, ptr %x
285 %b = extractelement <32 x i32> %a, i32 31
289 ; Exercise type legalization for type beyond LMUL8
290 define i32 @extractelt_v64i32(ptr %x) nounwind {
291 ; RV32-LABEL: extractelt_v64i32:
293 ; RV32-NEXT: addi sp, sp, -256
294 ; RV32-NEXT: sw ra, 252(sp) # 4-byte Folded Spill
295 ; RV32-NEXT: sw s0, 248(sp) # 4-byte Folded Spill
296 ; RV32-NEXT: addi s0, sp, 256
297 ; RV32-NEXT: andi sp, sp, -128
298 ; RV32-NEXT: addi a0, a0, 128
299 ; RV32-NEXT: li a1, 32
300 ; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma
301 ; RV32-NEXT: vle32.v v8, (a0)
302 ; RV32-NEXT: mv a0, sp
303 ; RV32-NEXT: vse32.v v8, (a0)
304 ; RV32-NEXT: lw a0, 124(sp)
305 ; RV32-NEXT: addi sp, s0, -256
306 ; RV32-NEXT: lw ra, 252(sp) # 4-byte Folded Reload
307 ; RV32-NEXT: lw s0, 248(sp) # 4-byte Folded Reload
308 ; RV32-NEXT: addi sp, sp, 256
311 ; RV64-LABEL: extractelt_v64i32:
313 ; RV64-NEXT: addi sp, sp, -256
314 ; RV64-NEXT: sd ra, 248(sp) # 8-byte Folded Spill
315 ; RV64-NEXT: sd s0, 240(sp) # 8-byte Folded Spill
316 ; RV64-NEXT: addi s0, sp, 256
317 ; RV64-NEXT: andi sp, sp, -128
318 ; RV64-NEXT: addi a0, a0, 128
319 ; RV64-NEXT: li a1, 32
320 ; RV64-NEXT: vsetvli zero, a1, e32, m8, ta, ma
321 ; RV64-NEXT: vle32.v v8, (a0)
322 ; RV64-NEXT: mv a0, sp
323 ; RV64-NEXT: vse32.v v8, (a0)
324 ; RV64-NEXT: lw a0, 124(sp)
325 ; RV64-NEXT: addi sp, s0, -256
326 ; RV64-NEXT: ld ra, 248(sp) # 8-byte Folded Reload
327 ; RV64-NEXT: ld s0, 240(sp) # 8-byte Folded Reload
328 ; RV64-NEXT: addi sp, sp, 256
330 %a = load <64 x i32>, ptr %x
331 %b = extractelement <64 x i32> %a, i32 63
335 define i8 @extractelt_v16i8_idx(ptr %x, i32 zeroext %idx) nounwind {
336 ; CHECK-LABEL: extractelt_v16i8_idx:
338 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
339 ; CHECK-NEXT: vle8.v v8, (a0)
340 ; CHECK-NEXT: vslidedown.vx v8, v8, a1
341 ; CHECK-NEXT: vmv.x.s a0, v8
343 %a = load <16 x i8>, ptr %x
344 %b = extractelement <16 x i8> %a, i32 %idx
348 define i16 @extractelt_v8i16_idx(ptr %x, i32 zeroext %idx) nounwind {
349 ; CHECK-LABEL: extractelt_v8i16_idx:
351 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
352 ; CHECK-NEXT: vle16.v v8, (a0)
353 ; CHECK-NEXT: vslidedown.vx v8, v8, a1
354 ; CHECK-NEXT: vmv.x.s a0, v8
356 %a = load <8 x i16>, ptr %x
357 %b = extractelement <8 x i16> %a, i32 %idx
361 define i32 @extractelt_v4i32_idx(ptr %x, i32 zeroext %idx) nounwind {
362 ; CHECK-LABEL: extractelt_v4i32_idx:
364 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
365 ; CHECK-NEXT: vle32.v v8, (a0)
366 ; CHECK-NEXT: vadd.vv v8, v8, v8
367 ; CHECK-NEXT: vslidedown.vx v8, v8, a1
368 ; CHECK-NEXT: vmv.x.s a0, v8
370 %a = load <4 x i32>, ptr %x
371 %b = add <4 x i32> %a, %a
372 %c = extractelement <4 x i32> %b, i32 %idx
376 define i64 @extractelt_v2i64_idx(ptr %x, i32 zeroext %idx) nounwind {
377 ; RV32-LABEL: extractelt_v2i64_idx:
379 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
380 ; RV32-NEXT: vle64.v v8, (a0)
381 ; RV32-NEXT: vadd.vv v8, v8, v8
382 ; RV32-NEXT: vslidedown.vx v8, v8, a1
383 ; RV32-NEXT: vmv.x.s a0, v8
384 ; RV32-NEXT: li a1, 32
385 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
386 ; RV32-NEXT: vsrl.vx v8, v8, a1
387 ; RV32-NEXT: vmv.x.s a1, v8
390 ; RV64-LABEL: extractelt_v2i64_idx:
392 ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
393 ; RV64-NEXT: vle64.v v8, (a0)
394 ; RV64-NEXT: vadd.vv v8, v8, v8
395 ; RV64-NEXT: vslidedown.vx v8, v8, a1
396 ; RV64-NEXT: vmv.x.s a0, v8
398 %a = load <2 x i64>, ptr %x
399 %b = add <2 x i64> %a, %a
400 %c = extractelement <2 x i64> %b, i32 %idx
404 define half @extractelt_v8f16_idx(ptr %x, i32 zeroext %idx) nounwind {
405 ; CHECK-LABEL: extractelt_v8f16_idx:
407 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
408 ; CHECK-NEXT: vle16.v v8, (a0)
409 ; CHECK-NEXT: vfadd.vv v8, v8, v8
410 ; CHECK-NEXT: vslidedown.vx v8, v8, a1
411 ; CHECK-NEXT: vfmv.f.s fa0, v8
413 %a = load <8 x half>, ptr %x
414 %b = fadd <8 x half> %a, %a
415 %c = extractelement <8 x half> %b, i32 %idx
419 define float @extractelt_v4f32_idx(ptr %x, i32 zeroext %idx) nounwind {
420 ; CHECK-LABEL: extractelt_v4f32_idx:
422 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
423 ; CHECK-NEXT: vle32.v v8, (a0)
424 ; CHECK-NEXT: vfadd.vv v8, v8, v8
425 ; CHECK-NEXT: vslidedown.vx v8, v8, a1
426 ; CHECK-NEXT: vfmv.f.s fa0, v8
428 %a = load <4 x float>, ptr %x
429 %b = fadd <4 x float> %a, %a
430 %c = extractelement <4 x float> %b, i32 %idx
434 define double @extractelt_v2f64_idx(ptr %x, i32 zeroext %idx) nounwind {
435 ; CHECK-LABEL: extractelt_v2f64_idx:
437 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
438 ; CHECK-NEXT: vle64.v v8, (a0)
439 ; CHECK-NEXT: vfadd.vv v8, v8, v8
440 ; CHECK-NEXT: vslidedown.vx v8, v8, a1
441 ; CHECK-NEXT: vfmv.f.s fa0, v8
443 %a = load <2 x double>, ptr %x
444 %b = fadd <2 x double> %a, %a
445 %c = extractelement <2 x double> %b, i32 %idx
449 define i8 @extractelt_v32i8_idx(ptr %x, i32 zeroext %idx) nounwind {
450 ; CHECK-LABEL: extractelt_v32i8_idx:
452 ; CHECK-NEXT: li a2, 32
453 ; CHECK-NEXT: vsetvli zero, a2, e8, m2, ta, ma
454 ; CHECK-NEXT: vle8.v v8, (a0)
455 ; CHECK-NEXT: vsetivli zero, 1, e8, m2, ta, ma
456 ; CHECK-NEXT: vslidedown.vx v8, v8, a1
457 ; CHECK-NEXT: vmv.x.s a0, v8
459 %a = load <32 x i8>, ptr %x
460 %b = extractelement <32 x i8> %a, i32 %idx
464 define i16 @extractelt_v16i16_idx(ptr %x, i32 zeroext %idx) nounwind {
465 ; CHECK-LABEL: extractelt_v16i16_idx:
467 ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
468 ; CHECK-NEXT: vle16.v v8, (a0)
469 ; CHECK-NEXT: vsetivli zero, 1, e16, m2, ta, ma
470 ; CHECK-NEXT: vslidedown.vx v8, v8, a1
471 ; CHECK-NEXT: vmv.x.s a0, v8
473 %a = load <16 x i16>, ptr %x
474 %b = extractelement <16 x i16> %a, i32 %idx
478 define i32 @extractelt_v8i32_idx(ptr %x, i32 zeroext %idx) nounwind {
479 ; CHECK-LABEL: extractelt_v8i32_idx:
481 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
482 ; CHECK-NEXT: vle32.v v8, (a0)
483 ; CHECK-NEXT: vadd.vv v8, v8, v8
484 ; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma
485 ; CHECK-NEXT: vslidedown.vx v8, v8, a1
486 ; CHECK-NEXT: vmv.x.s a0, v8
488 %a = load <8 x i32>, ptr %x
489 %b = add <8 x i32> %a, %a
490 %c = extractelement <8 x i32> %b, i32 %idx
494 define i64 @extractelt_v4i64_idx(ptr %x, i32 zeroext %idx) nounwind {
495 ; RV32-LABEL: extractelt_v4i64_idx:
497 ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
498 ; RV32-NEXT: vle64.v v8, (a0)
499 ; RV32-NEXT: vadd.vv v8, v8, v8
500 ; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma
501 ; RV32-NEXT: vslidedown.vx v8, v8, a1
502 ; RV32-NEXT: vmv.x.s a0, v8
503 ; RV32-NEXT: li a1, 32
504 ; RV32-NEXT: vsrl.vx v8, v8, a1
505 ; RV32-NEXT: vmv.x.s a1, v8
508 ; RV64-LABEL: extractelt_v4i64_idx:
510 ; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
511 ; RV64-NEXT: vle64.v v8, (a0)
512 ; RV64-NEXT: vadd.vv v8, v8, v8
513 ; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma
514 ; RV64-NEXT: vslidedown.vx v8, v8, a1
515 ; RV64-NEXT: vmv.x.s a0, v8
517 %a = load <4 x i64>, ptr %x
518 %b = add <4 x i64> %a, %a
519 %c = extractelement <4 x i64> %b, i32 %idx
523 define half @extractelt_v16f16_idx(ptr %x, i32 zeroext %idx) nounwind {
524 ; CHECK-LABEL: extractelt_v16f16_idx:
526 ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
527 ; CHECK-NEXT: vle16.v v8, (a0)
528 ; CHECK-NEXT: vfadd.vv v8, v8, v8
529 ; CHECK-NEXT: vsetivli zero, 1, e16, m2, ta, ma
530 ; CHECK-NEXT: vslidedown.vx v8, v8, a1
531 ; CHECK-NEXT: vfmv.f.s fa0, v8
533 %a = load <16 x half>, ptr %x
534 %b = fadd <16 x half> %a, %a
535 %c = extractelement <16 x half> %b, i32 %idx
539 define float @extractelt_v8f32_idx(ptr %x, i32 zeroext %idx) nounwind {
540 ; CHECK-LABEL: extractelt_v8f32_idx:
542 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
543 ; CHECK-NEXT: vle32.v v8, (a0)
544 ; CHECK-NEXT: vfadd.vv v8, v8, v8
545 ; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma
546 ; CHECK-NEXT: vslidedown.vx v8, v8, a1
547 ; CHECK-NEXT: vfmv.f.s fa0, v8
549 %a = load <8 x float>, ptr %x
550 %b = fadd <8 x float> %a, %a
551 %c = extractelement <8 x float> %b, i32 %idx
555 define double @extractelt_v4f64_idx(ptr %x, i32 zeroext %idx) nounwind {
556 ; CHECK-LABEL: extractelt_v4f64_idx:
558 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
559 ; CHECK-NEXT: vle64.v v8, (a0)
560 ; CHECK-NEXT: vfadd.vv v8, v8, v8
561 ; CHECK-NEXT: vsetivli zero, 1, e64, m2, ta, ma
562 ; CHECK-NEXT: vslidedown.vx v8, v8, a1
563 ; CHECK-NEXT: vfmv.f.s fa0, v8
565 %a = load <4 x double>, ptr %x
566 %b = fadd <4 x double> %a, %a
567 %c = extractelement <4 x double> %b, i32 %idx
571 ; This uses a non-power of 2 type so that it isn't an MVT to catch an
572 ; incorrect use of getSimpleValueType_idx(, i32 zeroext %idx).
573 ; NOTE: Type legalization is bitcasting to vXi32 and doing 2 independent
574 ; slidedowns and extracts.
575 define i64 @extractelt_v3i64_idx(ptr %x, i32 zeroext %idx) nounwind {
576 ; RV32-LABEL: extractelt_v3i64_idx:
578 ; RV32-NEXT: vsetivli zero, 3, e64, m2, ta, ma
579 ; RV32-NEXT: vle64.v v8, (a0)
580 ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
581 ; RV32-NEXT: vadd.vv v8, v8, v8
582 ; RV32-NEXT: add a1, a1, a1
583 ; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
584 ; RV32-NEXT: vslidedown.vx v10, v8, a1
585 ; RV32-NEXT: vmv.x.s a0, v10
586 ; RV32-NEXT: addi a1, a1, 1
587 ; RV32-NEXT: vslidedown.vx v8, v8, a1
588 ; RV32-NEXT: vmv.x.s a1, v8
591 ; RV64-LABEL: extractelt_v3i64_idx:
593 ; RV64-NEXT: vsetivli zero, 3, e64, m2, ta, ma
594 ; RV64-NEXT: vle64.v v8, (a0)
595 ; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
596 ; RV64-NEXT: vadd.vv v8, v8, v8
597 ; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma
598 ; RV64-NEXT: vslidedown.vx v8, v8, a1
599 ; RV64-NEXT: vmv.x.s a0, v8
601 %a = load <3 x i64>, ptr %x
602 %b = add <3 x i64> %a, %a
603 %c = extractelement <3 x i64> %b, i32 %idx
607 define i32 @extractelt_v32i32_idx(ptr %x, i32 zeroext %idx) nounwind {
608 ; RV32NOM-LABEL: extractelt_v32i32_idx:
610 ; RV32NOM-NEXT: addi sp, sp, -256
611 ; RV32NOM-NEXT: sw ra, 252(sp) # 4-byte Folded Spill
612 ; RV32NOM-NEXT: sw s0, 248(sp) # 4-byte Folded Spill
613 ; RV32NOM-NEXT: sw s2, 244(sp) # 4-byte Folded Spill
614 ; RV32NOM-NEXT: addi s0, sp, 256
615 ; RV32NOM-NEXT: andi sp, sp, -128
616 ; RV32NOM-NEXT: mv s2, a0
617 ; RV32NOM-NEXT: andi a0, a1, 31
618 ; RV32NOM-NEXT: li a1, 4
619 ; RV32NOM-NEXT: call __mulsi3@plt
620 ; RV32NOM-NEXT: li a1, 32
621 ; RV32NOM-NEXT: vsetvli zero, a1, e32, m8, ta, ma
622 ; RV32NOM-NEXT: vle32.v v8, (s2)
623 ; RV32NOM-NEXT: mv a1, sp
624 ; RV32NOM-NEXT: add a0, a1, a0
625 ; RV32NOM-NEXT: vadd.vv v8, v8, v8
626 ; RV32NOM-NEXT: vse32.v v8, (a1)
627 ; RV32NOM-NEXT: lw a0, 0(a0)
628 ; RV32NOM-NEXT: addi sp, s0, -256
629 ; RV32NOM-NEXT: lw ra, 252(sp) # 4-byte Folded Reload
630 ; RV32NOM-NEXT: lw s0, 248(sp) # 4-byte Folded Reload
631 ; RV32NOM-NEXT: lw s2, 244(sp) # 4-byte Folded Reload
632 ; RV32NOM-NEXT: addi sp, sp, 256
635 ; RV32M-LABEL: extractelt_v32i32_idx:
637 ; RV32M-NEXT: addi sp, sp, -256
638 ; RV32M-NEXT: sw ra, 252(sp) # 4-byte Folded Spill
639 ; RV32M-NEXT: sw s0, 248(sp) # 4-byte Folded Spill
640 ; RV32M-NEXT: addi s0, sp, 256
641 ; RV32M-NEXT: andi sp, sp, -128
642 ; RV32M-NEXT: andi a1, a1, 31
643 ; RV32M-NEXT: li a2, 32
644 ; RV32M-NEXT: vsetvli zero, a2, e32, m8, ta, ma
645 ; RV32M-NEXT: vle32.v v8, (a0)
646 ; RV32M-NEXT: slli a1, a1, 2
647 ; RV32M-NEXT: mv a0, sp
648 ; RV32M-NEXT: or a1, a0, a1
649 ; RV32M-NEXT: vadd.vv v8, v8, v8
650 ; RV32M-NEXT: vse32.v v8, (a0)
651 ; RV32M-NEXT: lw a0, 0(a1)
652 ; RV32M-NEXT: addi sp, s0, -256
653 ; RV32M-NEXT: lw ra, 252(sp) # 4-byte Folded Reload
654 ; RV32M-NEXT: lw s0, 248(sp) # 4-byte Folded Reload
655 ; RV32M-NEXT: addi sp, sp, 256
658 ; RV64NOM-LABEL: extractelt_v32i32_idx:
660 ; RV64NOM-NEXT: addi sp, sp, -256
661 ; RV64NOM-NEXT: sd ra, 248(sp) # 8-byte Folded Spill
662 ; RV64NOM-NEXT: sd s0, 240(sp) # 8-byte Folded Spill
663 ; RV64NOM-NEXT: sd s2, 232(sp) # 8-byte Folded Spill
664 ; RV64NOM-NEXT: addi s0, sp, 256
665 ; RV64NOM-NEXT: andi sp, sp, -128
666 ; RV64NOM-NEXT: mv s2, a0
667 ; RV64NOM-NEXT: andi a0, a1, 31
668 ; RV64NOM-NEXT: li a1, 4
669 ; RV64NOM-NEXT: call __muldi3@plt
670 ; RV64NOM-NEXT: li a1, 32
671 ; RV64NOM-NEXT: vsetvli zero, a1, e32, m8, ta, ma
672 ; RV64NOM-NEXT: vle32.v v8, (s2)
673 ; RV64NOM-NEXT: mv a1, sp
674 ; RV64NOM-NEXT: add a0, a1, a0
675 ; RV64NOM-NEXT: vadd.vv v8, v8, v8
676 ; RV64NOM-NEXT: vse32.v v8, (a1)
677 ; RV64NOM-NEXT: lw a0, 0(a0)
678 ; RV64NOM-NEXT: addi sp, s0, -256
679 ; RV64NOM-NEXT: ld ra, 248(sp) # 8-byte Folded Reload
680 ; RV64NOM-NEXT: ld s0, 240(sp) # 8-byte Folded Reload
681 ; RV64NOM-NEXT: ld s2, 232(sp) # 8-byte Folded Reload
682 ; RV64NOM-NEXT: addi sp, sp, 256
685 ; RV64M-LABEL: extractelt_v32i32_idx:
687 ; RV64M-NEXT: addi sp, sp, -256
688 ; RV64M-NEXT: sd ra, 248(sp) # 8-byte Folded Spill
689 ; RV64M-NEXT: sd s0, 240(sp) # 8-byte Folded Spill
690 ; RV64M-NEXT: addi s0, sp, 256
691 ; RV64M-NEXT: andi sp, sp, -128
692 ; RV64M-NEXT: andi a1, a1, 31
693 ; RV64M-NEXT: li a2, 32
694 ; RV64M-NEXT: vsetvli zero, a2, e32, m8, ta, ma
695 ; RV64M-NEXT: vle32.v v8, (a0)
696 ; RV64M-NEXT: slli a1, a1, 2
697 ; RV64M-NEXT: mv a0, sp
698 ; RV64M-NEXT: or a1, a0, a1
699 ; RV64M-NEXT: vadd.vv v8, v8, v8
700 ; RV64M-NEXT: vse32.v v8, (a0)
701 ; RV64M-NEXT: lw a0, 0(a1)
702 ; RV64M-NEXT: addi sp, s0, -256
703 ; RV64M-NEXT: ld ra, 248(sp) # 8-byte Folded Reload
704 ; RV64M-NEXT: ld s0, 240(sp) # 8-byte Folded Reload
705 ; RV64M-NEXT: addi sp, sp, 256
707 %a = load <32 x i32>, ptr %x
708 %b = add <32 x i32> %a, %a
709 %c = extractelement <32 x i32> %b, i32 %idx
713 define i32 @extractelt_v64i32_idx(ptr %x, i32 zeroext %idx) nounwind {
714 ; RV32-LABEL: extractelt_v64i32_idx:
716 ; RV32-NEXT: addi sp, sp, -384
717 ; RV32-NEXT: sw ra, 380(sp) # 4-byte Folded Spill
718 ; RV32-NEXT: sw s0, 376(sp) # 4-byte Folded Spill
719 ; RV32-NEXT: addi s0, sp, 384
720 ; RV32-NEXT: andi sp, sp, -128
721 ; RV32-NEXT: andi a1, a1, 63
722 ; RV32-NEXT: slli a1, a1, 2
723 ; RV32-NEXT: li a2, 32
724 ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
725 ; RV32-NEXT: addi a2, a0, 128
726 ; RV32-NEXT: vle32.v v8, (a2)
727 ; RV32-NEXT: vle32.v v16, (a0)
728 ; RV32-NEXT: mv a0, sp
729 ; RV32-NEXT: add a1, a0, a1
730 ; RV32-NEXT: vadd.vv v8, v8, v8
731 ; RV32-NEXT: vadd.vv v16, v16, v16
732 ; RV32-NEXT: vse32.v v16, (a0)
733 ; RV32-NEXT: addi a0, sp, 128
734 ; RV32-NEXT: vse32.v v8, (a0)
735 ; RV32-NEXT: lw a0, 0(a1)
736 ; RV32-NEXT: addi sp, s0, -384
737 ; RV32-NEXT: lw ra, 380(sp) # 4-byte Folded Reload
738 ; RV32-NEXT: lw s0, 376(sp) # 4-byte Folded Reload
739 ; RV32-NEXT: addi sp, sp, 384
742 ; RV64-LABEL: extractelt_v64i32_idx:
744 ; RV64-NEXT: addi sp, sp, -384
745 ; RV64-NEXT: sd ra, 376(sp) # 8-byte Folded Spill
746 ; RV64-NEXT: sd s0, 368(sp) # 8-byte Folded Spill
747 ; RV64-NEXT: addi s0, sp, 384
748 ; RV64-NEXT: andi sp, sp, -128
749 ; RV64-NEXT: andi a1, a1, 63
750 ; RV64-NEXT: slli a1, a1, 2
751 ; RV64-NEXT: li a2, 32
752 ; RV64-NEXT: vsetvli zero, a2, e32, m8, ta, ma
753 ; RV64-NEXT: addi a2, a0, 128
754 ; RV64-NEXT: vle32.v v8, (a2)
755 ; RV64-NEXT: vle32.v v16, (a0)
756 ; RV64-NEXT: mv a0, sp
757 ; RV64-NEXT: add a1, a0, a1
758 ; RV64-NEXT: vadd.vv v8, v8, v8
759 ; RV64-NEXT: vadd.vv v16, v16, v16
760 ; RV64-NEXT: vse32.v v16, (a0)
761 ; RV64-NEXT: addi a0, sp, 128
762 ; RV64-NEXT: vse32.v v8, (a0)
763 ; RV64-NEXT: lw a0, 0(a1)
764 ; RV64-NEXT: addi sp, s0, -384
765 ; RV64-NEXT: ld ra, 376(sp) # 8-byte Folded Reload
766 ; RV64-NEXT: ld s0, 368(sp) # 8-byte Folded Reload
767 ; RV64-NEXT: addi sp, sp, 384
769 %a = load <64 x i32>, ptr %x
770 %b = add <64 x i32> %a, %a
771 %c = extractelement <64 x i32> %b, i32 %idx
775 define void @store_extractelt_v16i8(ptr %x, ptr %p) nounwind {
776 ; CHECK-LABEL: store_extractelt_v16i8:
778 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
779 ; CHECK-NEXT: vle8.v v8, (a0)
780 ; CHECK-NEXT: vslidedown.vi v8, v8, 7
781 ; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
782 ; CHECK-NEXT: vse8.v v8, (a1)
784 %a = load <16 x i8>, ptr %x
785 %b = extractelement <16 x i8> %a, i32 7
790 define void @store_extractelt_v8i16(ptr %x, ptr %p) nounwind {
791 ; CHECK-LABEL: store_extractelt_v8i16:
793 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
794 ; CHECK-NEXT: vle16.v v8, (a0)
795 ; CHECK-NEXT: vslidedown.vi v8, v8, 7
796 ; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
797 ; CHECK-NEXT: vse16.v v8, (a1)
799 %a = load <8 x i16>, ptr %x
800 %b = extractelement <8 x i16> %a, i32 7
805 define void @store_extractelt_v4i32(ptr %x, ptr %p) nounwind {
806 ; CHECK-LABEL: store_extractelt_v4i32:
808 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
809 ; CHECK-NEXT: vle32.v v8, (a0)
810 ; CHECK-NEXT: vslidedown.vi v8, v8, 2
811 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
812 ; CHECK-NEXT: vse32.v v8, (a1)
814 %a = load <4 x i32>, ptr %x
815 %b = extractelement <4 x i32> %a, i32 2
820 ; FIXME: Use vse64.v on RV32 to avoid two scalar extracts and two scalar stores.
821 define void @store_extractelt_v2i64(ptr %x, ptr %p) nounwind {
822 ; RV32-LABEL: store_extractelt_v2i64:
824 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
825 ; RV32-NEXT: vle64.v v8, (a0)
826 ; RV32-NEXT: vslidedown.vi v8, v8, 1
827 ; RV32-NEXT: li a0, 32
828 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
829 ; RV32-NEXT: vsrl.vx v9, v8, a0
830 ; RV32-NEXT: vmv.x.s a0, v9
831 ; RV32-NEXT: vmv.x.s a2, v8
832 ; RV32-NEXT: sw a2, 0(a1)
833 ; RV32-NEXT: sw a0, 4(a1)
836 ; RV64-LABEL: store_extractelt_v2i64:
838 ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
839 ; RV64-NEXT: vle64.v v8, (a0)
840 ; RV64-NEXT: vslidedown.vi v8, v8, 1
841 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
842 ; RV64-NEXT: vse64.v v8, (a1)
844 %a = load <2 x i64>, ptr %x
845 %b = extractelement <2 x i64> %a, i64 1
850 define void @store_extractelt_v2f64(ptr %x, ptr %p) nounwind {
851 ; CHECK-LABEL: store_extractelt_v2f64:
853 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
854 ; CHECK-NEXT: vle64.v v8, (a0)
855 ; CHECK-NEXT: vslidedown.vi v8, v8, 1
856 ; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
857 ; CHECK-NEXT: vse64.v v8, (a1)
859 %a = load <2 x double>, ptr %x
860 %b = extractelement <2 x double> %a, i64 1
861 store double %b, ptr %p
865 define i32 @extractelt_add_v4i32(<4 x i32> %x) {
866 ; RV32-LABEL: extractelt_add_v4i32:
868 ; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
869 ; RV32-NEXT: vslidedown.vi v8, v8, 2
870 ; RV32-NEXT: vmv.x.s a0, v8
871 ; RV32-NEXT: addi a0, a0, 13
874 ; RV64-LABEL: extractelt_add_v4i32:
876 ; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
877 ; RV64-NEXT: vslidedown.vi v8, v8, 2
878 ; RV64-NEXT: vmv.x.s a0, v8
879 ; RV64-NEXT: addiw a0, a0, 13
881 %bo = add <4 x i32> %x, <i32 11, i32 12, i32 13, i32 14>
882 %ext = extractelement <4 x i32> %bo, i32 2
886 define i32 @extractelt_sub_v4i32(<4 x i32> %x) {
887 ; RV32-LABEL: extractelt_sub_v4i32:
889 ; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
890 ; RV32-NEXT: vslidedown.vi v8, v8, 2
891 ; RV32-NEXT: vmv.x.s a0, v8
892 ; RV32-NEXT: li a1, 13
893 ; RV32-NEXT: sub a0, a1, a0
896 ; RV64-LABEL: extractelt_sub_v4i32:
898 ; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
899 ; RV64-NEXT: vslidedown.vi v8, v8, 2
900 ; RV64-NEXT: vmv.x.s a0, v8
901 ; RV64-NEXT: li a1, 13
902 ; RV64-NEXT: subw a0, a1, a0
904 %bo = sub <4 x i32> <i32 11, i32 12, i32 13, i32 14>, %x
905 %ext = extractelement <4 x i32> %bo, i32 2
909 define i32 @extractelt_mul_v4i32(<4 x i32> %x) {
910 ; RV32NOM-LABEL: extractelt_mul_v4i32:
912 ; RV32NOM-NEXT: li a0, 13
913 ; RV32NOM-NEXT: vsetivli zero, 4, e32, m1, ta, ma
914 ; RV32NOM-NEXT: vmul.vx v8, v8, a0
915 ; RV32NOM-NEXT: vslidedown.vi v8, v8, 2
916 ; RV32NOM-NEXT: vmv.x.s a0, v8
919 ; RV32M-LABEL: extractelt_mul_v4i32:
921 ; RV32M-NEXT: vsetivli zero, 1, e32, m1, ta, ma
922 ; RV32M-NEXT: vslidedown.vi v8, v8, 2
923 ; RV32M-NEXT: vmv.x.s a0, v8
924 ; RV32M-NEXT: li a1, 13
925 ; RV32M-NEXT: mul a0, a0, a1
928 ; RV64NOM-LABEL: extractelt_mul_v4i32:
930 ; RV64NOM-NEXT: li a0, 13
931 ; RV64NOM-NEXT: vsetivli zero, 4, e32, m1, ta, ma
932 ; RV64NOM-NEXT: vmul.vx v8, v8, a0
933 ; RV64NOM-NEXT: vslidedown.vi v8, v8, 2
934 ; RV64NOM-NEXT: vmv.x.s a0, v8
937 ; RV64M-LABEL: extractelt_mul_v4i32:
939 ; RV64M-NEXT: vsetivli zero, 1, e32, m1, ta, ma
940 ; RV64M-NEXT: vslidedown.vi v8, v8, 2
941 ; RV64M-NEXT: vmv.x.s a0, v8
942 ; RV64M-NEXT: li a1, 13
943 ; RV64M-NEXT: mulw a0, a0, a1
945 %bo = mul <4 x i32> %x, <i32 11, i32 12, i32 13, i32 14>
946 %ext = extractelement <4 x i32> %bo, i32 2
950 define i32 @extractelt_sdiv_v4i32(<4 x i32> %x) {
951 ; RV32NOM-LABEL: extractelt_sdiv_v4i32:
953 ; RV32NOM-NEXT: lui a0, %hi(.LCPI42_0)
954 ; RV32NOM-NEXT: addi a0, a0, %lo(.LCPI42_0)
955 ; RV32NOM-NEXT: vsetivli zero, 4, e32, m1, ta, ma
956 ; RV32NOM-NEXT: vle32.v v9, (a0)
957 ; RV32NOM-NEXT: vmulh.vv v9, v8, v9
958 ; RV32NOM-NEXT: lui a0, 1044480
959 ; RV32NOM-NEXT: vmv.s.x v10, a0
960 ; RV32NOM-NEXT: vsext.vf4 v11, v10
961 ; RV32NOM-NEXT: vand.vv v8, v8, v11
962 ; RV32NOM-NEXT: vadd.vv v8, v9, v8
963 ; RV32NOM-NEXT: lui a0, 12320
964 ; RV32NOM-NEXT: addi a0, a0, 257
965 ; RV32NOM-NEXT: vmv.s.x v9, a0
966 ; RV32NOM-NEXT: vsext.vf4 v10, v9
967 ; RV32NOM-NEXT: vsra.vv v9, v8, v10
968 ; RV32NOM-NEXT: vsrl.vi v8, v8, 31
969 ; RV32NOM-NEXT: vadd.vv v8, v9, v8
970 ; RV32NOM-NEXT: vslidedown.vi v8, v8, 2
971 ; RV32NOM-NEXT: vmv.x.s a0, v8
974 ; RV32M-LABEL: extractelt_sdiv_v4i32:
976 ; RV32M-NEXT: vsetivli zero, 1, e32, m1, ta, ma
977 ; RV32M-NEXT: vslidedown.vi v8, v8, 2
978 ; RV32M-NEXT: vmv.x.s a0, v8
979 ; RV32M-NEXT: lui a1, 322639
980 ; RV32M-NEXT: addi a1, a1, -945
981 ; RV32M-NEXT: mulh a0, a0, a1
982 ; RV32M-NEXT: srli a1, a0, 31
983 ; RV32M-NEXT: srai a0, a0, 2
984 ; RV32M-NEXT: add a0, a0, a1
987 ; RV64NOM-LABEL: extractelt_sdiv_v4i32:
989 ; RV64NOM-NEXT: lui a0, %hi(.LCPI42_0)
990 ; RV64NOM-NEXT: addi a0, a0, %lo(.LCPI42_0)
991 ; RV64NOM-NEXT: vsetivli zero, 4, e32, m1, ta, ma
992 ; RV64NOM-NEXT: vle32.v v9, (a0)
993 ; RV64NOM-NEXT: vmulh.vv v9, v8, v9
994 ; RV64NOM-NEXT: lui a0, 1044480
995 ; RV64NOM-NEXT: vmv.s.x v10, a0
996 ; RV64NOM-NEXT: vsext.vf4 v11, v10
997 ; RV64NOM-NEXT: vand.vv v8, v8, v11
998 ; RV64NOM-NEXT: vadd.vv v8, v9, v8
999 ; RV64NOM-NEXT: lui a0, 12320
1000 ; RV64NOM-NEXT: addi a0, a0, 257
1001 ; RV64NOM-NEXT: vmv.s.x v9, a0
1002 ; RV64NOM-NEXT: vsext.vf4 v10, v9
1003 ; RV64NOM-NEXT: vsra.vv v8, v8, v10
1004 ; RV64NOM-NEXT: vsrl.vi v9, v8, 31
1005 ; RV64NOM-NEXT: vadd.vv v8, v8, v9
1006 ; RV64NOM-NEXT: vslidedown.vi v8, v8, 2
1007 ; RV64NOM-NEXT: vmv.x.s a0, v8
1010 ; RV64M-LABEL: extractelt_sdiv_v4i32:
1012 ; RV64M-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1013 ; RV64M-NEXT: vslidedown.vi v8, v8, 2
1014 ; RV64M-NEXT: vmv.x.s a0, v8
1015 ; RV64M-NEXT: lui a1, 322639
1016 ; RV64M-NEXT: addiw a1, a1, -945
1017 ; RV64M-NEXT: mul a0, a0, a1
1018 ; RV64M-NEXT: srli a1, a0, 63
1019 ; RV64M-NEXT: srai a0, a0, 34
1020 ; RV64M-NEXT: add a0, a0, a1
1022 %bo = sdiv <4 x i32> %x, <i32 11, i32 12, i32 13, i32 14>
1023 %ext = extractelement <4 x i32> %bo, i32 2
1027 define i32 @extractelt_udiv_v4i32(<4 x i32> %x) {
1028 ; RV32NOM-LABEL: extractelt_udiv_v4i32:
1030 ; RV32NOM-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1031 ; RV32NOM-NEXT: vsrl.vi v8, v8, 0
1032 ; RV32NOM-NEXT: lui a0, 322639
1033 ; RV32NOM-NEXT: addi a0, a0, -945
1034 ; RV32NOM-NEXT: vmulhu.vx v8, v8, a0
1035 ; RV32NOM-NEXT: vslidedown.vi v8, v8, 2
1036 ; RV32NOM-NEXT: vmv.x.s a0, v8
1037 ; RV32NOM-NEXT: srli a0, a0, 2
1040 ; RV32M-LABEL: extractelt_udiv_v4i32:
1042 ; RV32M-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1043 ; RV32M-NEXT: vslidedown.vi v8, v8, 2
1044 ; RV32M-NEXT: vmv.x.s a0, v8
1045 ; RV32M-NEXT: lui a1, 322639
1046 ; RV32M-NEXT: addi a1, a1, -945
1047 ; RV32M-NEXT: mulhu a0, a0, a1
1048 ; RV32M-NEXT: srli a0, a0, 2
1051 ; RV64NOM-LABEL: extractelt_udiv_v4i32:
1053 ; RV64NOM-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1054 ; RV64NOM-NEXT: vsrl.vi v8, v8, 0
1055 ; RV64NOM-NEXT: lui a0, 322639
1056 ; RV64NOM-NEXT: addi a0, a0, -945
1057 ; RV64NOM-NEXT: vmulhu.vx v8, v8, a0
1058 ; RV64NOM-NEXT: vslidedown.vi v8, v8, 2
1059 ; RV64NOM-NEXT: vmv.x.s a0, v8
1060 ; RV64NOM-NEXT: slli a0, a0, 33
1061 ; RV64NOM-NEXT: srli a0, a0, 35
1064 ; RV64M-LABEL: extractelt_udiv_v4i32:
1066 ; RV64M-NEXT: lui a0, 322639
1067 ; RV64M-NEXT: addi a0, a0, -945
1068 ; RV64M-NEXT: slli a0, a0, 32
1069 ; RV64M-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1070 ; RV64M-NEXT: vslidedown.vi v8, v8, 2
1071 ; RV64M-NEXT: vmv.x.s a1, v8
1072 ; RV64M-NEXT: slli a1, a1, 32
1073 ; RV64M-NEXT: mulhu a0, a1, a0
1074 ; RV64M-NEXT: srli a0, a0, 34
1076 %bo = udiv <4 x i32> %x, <i32 11, i32 12, i32 13, i32 14>
1077 %ext = extractelement <4 x i32> %bo, i32 2
1081 define float @extractelt_fadd_v4f32(<4 x float> %x) {
1082 ; CHECK-LABEL: extractelt_fadd_v4f32:
1084 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1085 ; CHECK-NEXT: vslidedown.vi v8, v8, 2
1086 ; CHECK-NEXT: vfmv.f.s fa5, v8
1087 ; CHECK-NEXT: lui a0, 267520
1088 ; CHECK-NEXT: fmv.w.x fa4, a0
1089 ; CHECK-NEXT: fadd.s fa0, fa5, fa4
1091 %bo = fadd <4 x float> %x, <float 11.0, float 12.0, float 13.0, float 14.0>
1092 %ext = extractelement <4 x float> %bo, i32 2
1096 define float @extractelt_fsub_v4f32(<4 x float> %x) {
1097 ; CHECK-LABEL: extractelt_fsub_v4f32:
1099 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1100 ; CHECK-NEXT: vslidedown.vi v8, v8, 2
1101 ; CHECK-NEXT: vfmv.f.s fa5, v8
1102 ; CHECK-NEXT: lui a0, 267520
1103 ; CHECK-NEXT: fmv.w.x fa4, a0
1104 ; CHECK-NEXT: fsub.s fa0, fa4, fa5
1106 %bo = fsub <4 x float> <float 11.0, float 12.0, float 13.0, float 14.0>, %x
1107 %ext = extractelement <4 x float> %bo, i32 2
1111 define float @extractelt_fmul_v4f32(<4 x float> %x) {
1112 ; CHECK-LABEL: extractelt_fmul_v4f32:
1114 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1115 ; CHECK-NEXT: vslidedown.vi v8, v8, 2
1116 ; CHECK-NEXT: vfmv.f.s fa5, v8
1117 ; CHECK-NEXT: lui a0, 267520
1118 ; CHECK-NEXT: fmv.w.x fa4, a0
1119 ; CHECK-NEXT: fmul.s fa0, fa5, fa4
1121 %bo = fmul <4 x float> %x, <float 11.0, float 12.0, float 13.0, float 14.0>
1122 %ext = extractelement <4 x float> %bo, i32 2
1126 define float @extractelt_fdiv_v4f32(<4 x float> %x) {
1127 ; CHECK-LABEL: extractelt_fdiv_v4f32:
1129 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
1130 ; CHECK-NEXT: vslidedown.vi v8, v8, 2
1131 ; CHECK-NEXT: vfmv.f.s fa5, v8
1132 ; CHECK-NEXT: lui a0, 267520
1133 ; CHECK-NEXT: fmv.w.x fa4, a0
1134 ; CHECK-NEXT: fdiv.s fa0, fa5, fa4
1136 %bo = fdiv <4 x float> %x, <float 11.0, float 12.0, float 13.0, float 14.0>
1137 %ext = extractelement <4 x float> %bo, i32 2