1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -O0 -fast-isel -fast-isel-abort=1 -mtriple=x86_64-unknown-unknown -mattr=+sse2 < %s | FileCheck %s --check-prefix=SSE
3 ; RUN: llc -O0 -fast-isel -fast-isel-abort=1 -mtriple=x86_64-unknown-unknown -mattr=+avx < %s | FileCheck %s --check-prefixes=AVX,AVXONLY
4 ; RUN: llc -O0 -fast-isel -fast-isel-abort=1 -mtriple=x86_64-unknown-unknown -mattr=+avx512f < %s | FileCheck %s --check-prefixes=AVX,AVX512
5 ; RUN: llc -O0 -fast-isel -fast-isel-abort=1 -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512bw,+avx512vl < %s | FileCheck %s --check-prefixes=AVX,AVX512
7 ; Verify that fast-isel knows how to select aligned/unaligned vector loads.
8 ; Also verify that the selected load instruction is in the correct domain.
10 define <16 x i8> @test_v16i8(ptr %V) {
11 ; SSE-LABEL: test_v16i8:
12 ; SSE: # %bb.0: # %entry
13 ; SSE-NEXT: movdqa (%rdi), %xmm0
16 ; AVX-LABEL: test_v16i8:
17 ; AVX: # %bb.0: # %entry
18 ; AVX-NEXT: vmovdqa (%rdi), %xmm0
21 %0 = load <16 x i8>, ptr %V, align 16
25 define <8 x i16> @test_v8i16(ptr %V) {
26 ; SSE-LABEL: test_v8i16:
27 ; SSE: # %bb.0: # %entry
28 ; SSE-NEXT: movdqa (%rdi), %xmm0
31 ; AVX-LABEL: test_v8i16:
32 ; AVX: # %bb.0: # %entry
33 ; AVX-NEXT: vmovdqa (%rdi), %xmm0
36 %0 = load <8 x i16>, ptr %V, align 16
40 define <4 x i32> @test_v4i32(ptr %V) {
41 ; SSE-LABEL: test_v4i32:
42 ; SSE: # %bb.0: # %entry
43 ; SSE-NEXT: movdqa (%rdi), %xmm0
46 ; AVX-LABEL: test_v4i32:
47 ; AVX: # %bb.0: # %entry
48 ; AVX-NEXT: vmovdqa (%rdi), %xmm0
51 %0 = load <4 x i32>, ptr %V, align 16
55 define <2 x i64> @test_v2i64(ptr %V) {
56 ; SSE-LABEL: test_v2i64:
57 ; SSE: # %bb.0: # %entry
58 ; SSE-NEXT: movdqa (%rdi), %xmm0
61 ; AVX-LABEL: test_v2i64:
62 ; AVX: # %bb.0: # %entry
63 ; AVX-NEXT: vmovdqa (%rdi), %xmm0
66 %0 = load <2 x i64>, ptr %V, align 16
70 define <16 x i8> @test_v16i8_unaligned(ptr %V) {
71 ; SSE-LABEL: test_v16i8_unaligned:
72 ; SSE: # %bb.0: # %entry
73 ; SSE-NEXT: movdqu (%rdi), %xmm0
76 ; AVX-LABEL: test_v16i8_unaligned:
77 ; AVX: # %bb.0: # %entry
78 ; AVX-NEXT: vmovdqu (%rdi), %xmm0
81 %0 = load <16 x i8>, ptr %V, align 4
85 define <8 x i16> @test_v8i16_unaligned(ptr %V) {
86 ; SSE-LABEL: test_v8i16_unaligned:
87 ; SSE: # %bb.0: # %entry
88 ; SSE-NEXT: movdqu (%rdi), %xmm0
91 ; AVX-LABEL: test_v8i16_unaligned:
92 ; AVX: # %bb.0: # %entry
93 ; AVX-NEXT: vmovdqu (%rdi), %xmm0
96 %0 = load <8 x i16>, ptr %V, align 4
100 define <4 x i32> @test_v4i32_unaligned(ptr %V) {
101 ; SSE-LABEL: test_v4i32_unaligned:
102 ; SSE: # %bb.0: # %entry
103 ; SSE-NEXT: movdqu (%rdi), %xmm0
106 ; AVX-LABEL: test_v4i32_unaligned:
107 ; AVX: # %bb.0: # %entry
108 ; AVX-NEXT: vmovdqu (%rdi), %xmm0
111 %0 = load <4 x i32>, ptr %V, align 4
115 define <2 x i64> @test_v2i64_unaligned(ptr %V) {
116 ; SSE-LABEL: test_v2i64_unaligned:
117 ; SSE: # %bb.0: # %entry
118 ; SSE-NEXT: movdqu (%rdi), %xmm0
121 ; AVX-LABEL: test_v2i64_unaligned:
122 ; AVX: # %bb.0: # %entry
123 ; AVX-NEXT: vmovdqu (%rdi), %xmm0
126 %0 = load <2 x i64>, ptr %V, align 4
130 define <4 x float> @test_v4f32(ptr %V) {
131 ; SSE-LABEL: test_v4f32:
132 ; SSE: # %bb.0: # %entry
133 ; SSE-NEXT: movaps (%rdi), %xmm0
136 ; AVX-LABEL: test_v4f32:
137 ; AVX: # %bb.0: # %entry
138 ; AVX-NEXT: vmovaps (%rdi), %xmm0
141 %0 = load <4 x float>, ptr %V, align 16
145 define <2 x double> @test_v2f64(ptr %V) {
146 ; SSE-LABEL: test_v2f64:
147 ; SSE: # %bb.0: # %entry
148 ; SSE-NEXT: movapd (%rdi), %xmm0
151 ; AVX-LABEL: test_v2f64:
152 ; AVX: # %bb.0: # %entry
153 ; AVX-NEXT: vmovapd (%rdi), %xmm0
156 %0 = load <2 x double>, ptr %V, align 16
160 define <4 x float> @test_v4f32_unaligned(ptr %V) {
161 ; SSE-LABEL: test_v4f32_unaligned:
162 ; SSE: # %bb.0: # %entry
163 ; SSE-NEXT: movups (%rdi), %xmm0
166 ; AVX-LABEL: test_v4f32_unaligned:
167 ; AVX: # %bb.0: # %entry
168 ; AVX-NEXT: vmovups (%rdi), %xmm0
171 %0 = load <4 x float>, ptr %V, align 4
175 define <2 x double> @test_v2f64_unaligned(ptr %V) {
176 ; SSE-LABEL: test_v2f64_unaligned:
177 ; SSE: # %bb.0: # %entry
178 ; SSE-NEXT: movupd (%rdi), %xmm0
181 ; AVX-LABEL: test_v2f64_unaligned:
182 ; AVX: # %bb.0: # %entry
183 ; AVX-NEXT: vmovupd (%rdi), %xmm0
186 %0 = load <2 x double>, ptr %V, align 4
190 define <16 x i8> @test_v16i8_abi_alignment(ptr %V) {
191 ; SSE-LABEL: test_v16i8_abi_alignment:
192 ; SSE: # %bb.0: # %entry
193 ; SSE-NEXT: movdqa (%rdi), %xmm0
196 ; AVX-LABEL: test_v16i8_abi_alignment:
197 ; AVX: # %bb.0: # %entry
198 ; AVX-NEXT: vmovdqa (%rdi), %xmm0
201 %0 = load <16 x i8>, ptr %V
205 define <8 x i16> @test_v8i16_abi_alignment(ptr %V) {
206 ; SSE-LABEL: test_v8i16_abi_alignment:
207 ; SSE: # %bb.0: # %entry
208 ; SSE-NEXT: movdqa (%rdi), %xmm0
211 ; AVX-LABEL: test_v8i16_abi_alignment:
212 ; AVX: # %bb.0: # %entry
213 ; AVX-NEXT: vmovdqa (%rdi), %xmm0
216 %0 = load <8 x i16>, ptr %V
220 define <4 x i32> @test_v4i32_abi_alignment(ptr %V) {
221 ; SSE-LABEL: test_v4i32_abi_alignment:
222 ; SSE: # %bb.0: # %entry
223 ; SSE-NEXT: movdqa (%rdi), %xmm0
226 ; AVX-LABEL: test_v4i32_abi_alignment:
227 ; AVX: # %bb.0: # %entry
228 ; AVX-NEXT: vmovdqa (%rdi), %xmm0
231 %0 = load <4 x i32>, ptr %V
235 define <2 x i64> @test_v2i64_abi_alignment(ptr %V) {
236 ; SSE-LABEL: test_v2i64_abi_alignment:
237 ; SSE: # %bb.0: # %entry
238 ; SSE-NEXT: movdqa (%rdi), %xmm0
241 ; AVX-LABEL: test_v2i64_abi_alignment:
242 ; AVX: # %bb.0: # %entry
243 ; AVX-NEXT: vmovdqa (%rdi), %xmm0
246 %0 = load <2 x i64>, ptr %V
250 define <4 x float> @test_v4f32_abi_alignment(ptr %V) {
251 ; SSE-LABEL: test_v4f32_abi_alignment:
252 ; SSE: # %bb.0: # %entry
253 ; SSE-NEXT: movaps (%rdi), %xmm0
256 ; AVX-LABEL: test_v4f32_abi_alignment:
257 ; AVX: # %bb.0: # %entry
258 ; AVX-NEXT: vmovaps (%rdi), %xmm0
261 %0 = load <4 x float>, ptr %V
265 define <2 x double> @test_v2f64_abi_alignment(ptr %V) {
266 ; SSE-LABEL: test_v2f64_abi_alignment:
267 ; SSE: # %bb.0: # %entry
268 ; SSE-NEXT: movapd (%rdi), %xmm0
271 ; AVX-LABEL: test_v2f64_abi_alignment:
272 ; AVX: # %bb.0: # %entry
273 ; AVX-NEXT: vmovapd (%rdi), %xmm0
276 %0 = load <2 x double>, ptr %V
280 define <32 x i8> @test_v32i8(ptr %V) {
281 ; SSE-LABEL: test_v32i8:
282 ; SSE: # %bb.0: # %entry
283 ; SSE-NEXT: movaps (%rdi), %xmm0
284 ; SSE-NEXT: movaps 16(%rdi), %xmm1
287 ; AVX-LABEL: test_v32i8:
288 ; AVX: # %bb.0: # %entry
289 ; AVX-NEXT: vmovdqa (%rdi), %ymm0
292 %0 = load <32 x i8>, ptr %V, align 32
296 define <16 x i16> @test_v16i16(ptr %V) {
297 ; SSE-LABEL: test_v16i16:
298 ; SSE: # %bb.0: # %entry
299 ; SSE-NEXT: movaps (%rdi), %xmm0
300 ; SSE-NEXT: movaps 16(%rdi), %xmm1
303 ; AVX-LABEL: test_v16i16:
304 ; AVX: # %bb.0: # %entry
305 ; AVX-NEXT: vmovdqa (%rdi), %ymm0
308 %0 = load <16 x i16>, ptr %V, align 32
312 define <8 x i32> @test_v8i32(ptr %V) {
313 ; SSE-LABEL: test_v8i32:
314 ; SSE: # %bb.0: # %entry
315 ; SSE-NEXT: movaps (%rdi), %xmm0
316 ; SSE-NEXT: movaps 16(%rdi), %xmm1
319 ; AVX-LABEL: test_v8i32:
320 ; AVX: # %bb.0: # %entry
321 ; AVX-NEXT: vmovdqa (%rdi), %ymm0
324 %0 = load <8 x i32>, ptr %V, align 32
328 define <4 x i64> @test_v4i64(ptr %V) {
329 ; SSE-LABEL: test_v4i64:
330 ; SSE: # %bb.0: # %entry
331 ; SSE-NEXT: movaps (%rdi), %xmm0
332 ; SSE-NEXT: movaps 16(%rdi), %xmm1
335 ; AVX-LABEL: test_v4i64:
336 ; AVX: # %bb.0: # %entry
337 ; AVX-NEXT: vmovdqa (%rdi), %ymm0
340 %0 = load <4 x i64>, ptr %V, align 32
344 define <32 x i8> @test_v32i8_unaligned(ptr %V) {
345 ; SSE-LABEL: test_v32i8_unaligned:
346 ; SSE: # %bb.0: # %entry
347 ; SSE-NEXT: movups (%rdi), %xmm0
348 ; SSE-NEXT: movups 16(%rdi), %xmm1
351 ; AVX-LABEL: test_v32i8_unaligned:
352 ; AVX: # %bb.0: # %entry
353 ; AVX-NEXT: vmovdqu (%rdi), %ymm0
356 %0 = load <32 x i8>, ptr %V, align 4
360 define <16 x i16> @test_v16i16_unaligned(ptr %V) {
361 ; SSE-LABEL: test_v16i16_unaligned:
362 ; SSE: # %bb.0: # %entry
363 ; SSE-NEXT: movups (%rdi), %xmm0
364 ; SSE-NEXT: movups 16(%rdi), %xmm1
367 ; AVX-LABEL: test_v16i16_unaligned:
368 ; AVX: # %bb.0: # %entry
369 ; AVX-NEXT: vmovdqu (%rdi), %ymm0
372 %0 = load <16 x i16>, ptr %V, align 4
376 define <8 x i32> @test_v8i32_unaligned(ptr %V) {
377 ; SSE-LABEL: test_v8i32_unaligned:
378 ; SSE: # %bb.0: # %entry
379 ; SSE-NEXT: movups (%rdi), %xmm0
380 ; SSE-NEXT: movups 16(%rdi), %xmm1
383 ; AVX-LABEL: test_v8i32_unaligned:
384 ; AVX: # %bb.0: # %entry
385 ; AVX-NEXT: vmovdqu (%rdi), %ymm0
388 %0 = load <8 x i32>, ptr %V, align 4
392 define <4 x i64> @test_v4i64_unaligned(ptr %V) {
393 ; SSE-LABEL: test_v4i64_unaligned:
394 ; SSE: # %bb.0: # %entry
395 ; SSE-NEXT: movups (%rdi), %xmm0
396 ; SSE-NEXT: movups 16(%rdi), %xmm1
399 ; AVX-LABEL: test_v4i64_unaligned:
400 ; AVX: # %bb.0: # %entry
401 ; AVX-NEXT: vmovdqu (%rdi), %ymm0
404 %0 = load <4 x i64>, ptr %V, align 4
408 define <8 x float> @test_v8f32(ptr %V) {
409 ; SSE-LABEL: test_v8f32:
410 ; SSE: # %bb.0: # %entry
411 ; SSE-NEXT: movaps (%rdi), %xmm0
412 ; SSE-NEXT: movaps 16(%rdi), %xmm1
415 ; AVX-LABEL: test_v8f32:
416 ; AVX: # %bb.0: # %entry
417 ; AVX-NEXT: vmovaps (%rdi), %ymm0
420 %0 = load <8 x float>, ptr %V, align 32
424 define <4 x double> @test_v4f64(ptr %V) {
425 ; SSE-LABEL: test_v4f64:
426 ; SSE: # %bb.0: # %entry
427 ; SSE-NEXT: movapd (%rdi), %xmm0
428 ; SSE-NEXT: movapd 16(%rdi), %xmm1
431 ; AVX-LABEL: test_v4f64:
432 ; AVX: # %bb.0: # %entry
433 ; AVX-NEXT: vmovapd (%rdi), %ymm0
436 %0 = load <4 x double>, ptr %V, align 32
440 define <8 x float> @test_v8f32_unaligned(ptr %V) {
441 ; SSE-LABEL: test_v8f32_unaligned:
442 ; SSE: # %bb.0: # %entry
443 ; SSE-NEXT: movups (%rdi), %xmm0
444 ; SSE-NEXT: movups 16(%rdi), %xmm1
447 ; AVX-LABEL: test_v8f32_unaligned:
448 ; AVX: # %bb.0: # %entry
449 ; AVX-NEXT: vmovups (%rdi), %ymm0
452 %0 = load <8 x float>, ptr %V, align 4
456 define <4 x double> @test_v4f64_unaligned(ptr %V) {
457 ; SSE-LABEL: test_v4f64_unaligned:
458 ; SSE: # %bb.0: # %entry
459 ; SSE-NEXT: movupd (%rdi), %xmm0
460 ; SSE-NEXT: movupd 16(%rdi), %xmm1
463 ; AVX-LABEL: test_v4f64_unaligned:
464 ; AVX: # %bb.0: # %entry
465 ; AVX-NEXT: vmovupd (%rdi), %ymm0
468 %0 = load <4 x double>, ptr %V, align 4
472 define <64 x i8> @test_v64i8(ptr %V) {
473 ; SSE-LABEL: test_v64i8:
474 ; SSE: # %bb.0: # %entry
475 ; SSE-NEXT: movaps (%rdi), %xmm0
476 ; SSE-NEXT: movaps 16(%rdi), %xmm1
477 ; SSE-NEXT: movaps 32(%rdi), %xmm2
478 ; SSE-NEXT: movaps 48(%rdi), %xmm3
481 ; AVXONLY-LABEL: test_v64i8:
482 ; AVXONLY: # %bb.0: # %entry
483 ; AVXONLY-NEXT: vmovaps (%rdi), %ymm0
484 ; AVXONLY-NEXT: vmovaps 32(%rdi), %ymm1
487 ; AVX512-LABEL: test_v64i8:
488 ; AVX512: # %bb.0: # %entry
489 ; AVX512-NEXT: vmovdqa64 (%rdi), %zmm0
492 %0 = load <64 x i8>, ptr %V, align 64
496 define <32 x i16> @test_v32i16(ptr %V) {
497 ; SSE-LABEL: test_v32i16:
498 ; SSE: # %bb.0: # %entry
499 ; SSE-NEXT: movaps (%rdi), %xmm0
500 ; SSE-NEXT: movaps 16(%rdi), %xmm1
501 ; SSE-NEXT: movaps 32(%rdi), %xmm2
502 ; SSE-NEXT: movaps 48(%rdi), %xmm3
505 ; AVXONLY-LABEL: test_v32i16:
506 ; AVXONLY: # %bb.0: # %entry
507 ; AVXONLY-NEXT: vmovaps (%rdi), %ymm0
508 ; AVXONLY-NEXT: vmovaps 32(%rdi), %ymm1
511 ; AVX512-LABEL: test_v32i16:
512 ; AVX512: # %bb.0: # %entry
513 ; AVX512-NEXT: vmovdqa64 (%rdi), %zmm0
516 %0 = load <32 x i16>, ptr %V, align 64
520 define <16 x i32> @test_v16i32(ptr %V) {
521 ; SSE-LABEL: test_v16i32:
522 ; SSE: # %bb.0: # %entry
523 ; SSE-NEXT: movaps (%rdi), %xmm0
524 ; SSE-NEXT: movaps 16(%rdi), %xmm1
525 ; SSE-NEXT: movaps 32(%rdi), %xmm2
526 ; SSE-NEXT: movaps 48(%rdi), %xmm3
529 ; AVXONLY-LABEL: test_v16i32:
530 ; AVXONLY: # %bb.0: # %entry
531 ; AVXONLY-NEXT: vmovaps (%rdi), %ymm0
532 ; AVXONLY-NEXT: vmovaps 32(%rdi), %ymm1
535 ; AVX512-LABEL: test_v16i32:
536 ; AVX512: # %bb.0: # %entry
537 ; AVX512-NEXT: vmovdqa64 (%rdi), %zmm0
540 %0 = load <16 x i32>, ptr %V, align 64
544 define <8 x i64> @test_v8i64(ptr %V) {
545 ; SSE-LABEL: test_v8i64:
546 ; SSE: # %bb.0: # %entry
547 ; SSE-NEXT: movaps (%rdi), %xmm0
548 ; SSE-NEXT: movaps 16(%rdi), %xmm1
549 ; SSE-NEXT: movaps 32(%rdi), %xmm2
550 ; SSE-NEXT: movaps 48(%rdi), %xmm3
553 ; AVXONLY-LABEL: test_v8i64:
554 ; AVXONLY: # %bb.0: # %entry
555 ; AVXONLY-NEXT: vmovaps (%rdi), %ymm0
556 ; AVXONLY-NEXT: vmovaps 32(%rdi), %ymm1
559 ; AVX512-LABEL: test_v8i64:
560 ; AVX512: # %bb.0: # %entry
561 ; AVX512-NEXT: vmovdqa64 (%rdi), %zmm0
564 %0 = load <8 x i64>, ptr %V, align 64
568 define <64 x i8> @test_v64i8_unaligned(ptr %V) {
569 ; SSE-LABEL: test_v64i8_unaligned:
570 ; SSE: # %bb.0: # %entry
571 ; SSE-NEXT: movups (%rdi), %xmm0
572 ; SSE-NEXT: movups 16(%rdi), %xmm1
573 ; SSE-NEXT: movups 32(%rdi), %xmm2
574 ; SSE-NEXT: movups 48(%rdi), %xmm3
577 ; AVXONLY-LABEL: test_v64i8_unaligned:
578 ; AVXONLY: # %bb.0: # %entry
579 ; AVXONLY-NEXT: vmovups (%rdi), %ymm0
580 ; AVXONLY-NEXT: vmovups 32(%rdi), %ymm1
583 ; AVX512-LABEL: test_v64i8_unaligned:
584 ; AVX512: # %bb.0: # %entry
585 ; AVX512-NEXT: vmovdqu64 (%rdi), %zmm0
588 %0 = load <64 x i8>, ptr %V, align 4
592 define <32 x i16> @test_v32i16_unaligned(ptr %V) {
593 ; SSE-LABEL: test_v32i16_unaligned:
594 ; SSE: # %bb.0: # %entry
595 ; SSE-NEXT: movups (%rdi), %xmm0
596 ; SSE-NEXT: movups 16(%rdi), %xmm1
597 ; SSE-NEXT: movups 32(%rdi), %xmm2
598 ; SSE-NEXT: movups 48(%rdi), %xmm3
601 ; AVXONLY-LABEL: test_v32i16_unaligned:
602 ; AVXONLY: # %bb.0: # %entry
603 ; AVXONLY-NEXT: vmovups (%rdi), %ymm0
604 ; AVXONLY-NEXT: vmovups 32(%rdi), %ymm1
607 ; AVX512-LABEL: test_v32i16_unaligned:
608 ; AVX512: # %bb.0: # %entry
609 ; AVX512-NEXT: vmovdqu64 (%rdi), %zmm0
612 %0 = load <32 x i16>, ptr %V, align 4
616 define <16 x i32> @test_v16i32_unaligned(ptr %V) {
617 ; SSE-LABEL: test_v16i32_unaligned:
618 ; SSE: # %bb.0: # %entry
619 ; SSE-NEXT: movups (%rdi), %xmm0
620 ; SSE-NEXT: movups 16(%rdi), %xmm1
621 ; SSE-NEXT: movups 32(%rdi), %xmm2
622 ; SSE-NEXT: movups 48(%rdi), %xmm3
625 ; AVXONLY-LABEL: test_v16i32_unaligned:
626 ; AVXONLY: # %bb.0: # %entry
627 ; AVXONLY-NEXT: vmovups (%rdi), %ymm0
628 ; AVXONLY-NEXT: vmovups 32(%rdi), %ymm1
631 ; AVX512-LABEL: test_v16i32_unaligned:
632 ; AVX512: # %bb.0: # %entry
633 ; AVX512-NEXT: vmovdqu64 (%rdi), %zmm0
636 %0 = load <16 x i32>, ptr %V, align 4
640 define <8 x i64> @test_v8i64_unaligned(ptr %V) {
641 ; SSE-LABEL: test_v8i64_unaligned:
642 ; SSE: # %bb.0: # %entry
643 ; SSE-NEXT: movups (%rdi), %xmm0
644 ; SSE-NEXT: movups 16(%rdi), %xmm1
645 ; SSE-NEXT: movups 32(%rdi), %xmm2
646 ; SSE-NEXT: movups 48(%rdi), %xmm3
649 ; AVXONLY-LABEL: test_v8i64_unaligned:
650 ; AVXONLY: # %bb.0: # %entry
651 ; AVXONLY-NEXT: vmovups (%rdi), %ymm0
652 ; AVXONLY-NEXT: vmovups 32(%rdi), %ymm1
655 ; AVX512-LABEL: test_v8i64_unaligned:
656 ; AVX512: # %bb.0: # %entry
657 ; AVX512-NEXT: vmovdqu64 (%rdi), %zmm0
660 %0 = load <8 x i64>, ptr %V, align 4
664 define <8 x float> @test_v16f32(ptr %V) {
665 ; SSE-LABEL: test_v16f32:
666 ; SSE: # %bb.0: # %entry
667 ; SSE-NEXT: movaps (%rdi), %xmm0
668 ; SSE-NEXT: movaps 16(%rdi), %xmm1
671 ; AVX-LABEL: test_v16f32:
672 ; AVX: # %bb.0: # %entry
673 ; AVX-NEXT: vmovaps (%rdi), %ymm0
676 %0 = load <8 x float>, ptr %V, align 64
680 define <8 x double> @test_v8f64(ptr %V) {
681 ; SSE-LABEL: test_v8f64:
682 ; SSE: # %bb.0: # %entry
683 ; SSE-NEXT: movapd (%rdi), %xmm0
684 ; SSE-NEXT: movapd 16(%rdi), %xmm1
685 ; SSE-NEXT: movapd 32(%rdi), %xmm2
686 ; SSE-NEXT: movapd 48(%rdi), %xmm3
689 ; AVXONLY-LABEL: test_v8f64:
690 ; AVXONLY: # %bb.0: # %entry
691 ; AVXONLY-NEXT: vmovapd (%rdi), %ymm0
692 ; AVXONLY-NEXT: vmovapd 32(%rdi), %ymm1
695 ; AVX512-LABEL: test_v8f64:
696 ; AVX512: # %bb.0: # %entry
697 ; AVX512-NEXT: vmovapd (%rdi), %zmm0
700 %0 = load <8 x double>, ptr %V, align 64
704 define <16 x float> @test_v16f32_unaligned(ptr %V) {
705 ; SSE-LABEL: test_v16f32_unaligned:
706 ; SSE: # %bb.0: # %entry
707 ; SSE-NEXT: movups (%rdi), %xmm0
708 ; SSE-NEXT: movups 16(%rdi), %xmm1
709 ; SSE-NEXT: movups 32(%rdi), %xmm2
710 ; SSE-NEXT: movups 48(%rdi), %xmm3
713 ; AVXONLY-LABEL: test_v16f32_unaligned:
714 ; AVXONLY: # %bb.0: # %entry
715 ; AVXONLY-NEXT: vmovups (%rdi), %ymm0
716 ; AVXONLY-NEXT: vmovups 32(%rdi), %ymm1
719 ; AVX512-LABEL: test_v16f32_unaligned:
720 ; AVX512: # %bb.0: # %entry
721 ; AVX512-NEXT: vmovups (%rdi), %zmm0
724 %0 = load <16 x float>, ptr %V, align 4
728 define <8 x double> @test_v8f64_unaligned(ptr %V) {
729 ; SSE-LABEL: test_v8f64_unaligned:
730 ; SSE: # %bb.0: # %entry
731 ; SSE-NEXT: movupd (%rdi), %xmm0
732 ; SSE-NEXT: movupd 16(%rdi), %xmm1
733 ; SSE-NEXT: movupd 32(%rdi), %xmm2
734 ; SSE-NEXT: movupd 48(%rdi), %xmm3
737 ; AVXONLY-LABEL: test_v8f64_unaligned:
738 ; AVXONLY: # %bb.0: # %entry
739 ; AVXONLY-NEXT: vmovupd (%rdi), %ymm0
740 ; AVXONLY-NEXT: vmovupd 32(%rdi), %ymm1
743 ; AVX512-LABEL: test_v8f64_unaligned:
744 ; AVX512: # %bb.0: # %entry
745 ; AVX512-NEXT: vmovupd (%rdi), %zmm0
748 %0 = load <8 x double>, ptr %V, align 4