1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -O0 -fast-isel -fast-isel-abort=1 -mtriple=x86_64-unknown-unknown -mattr=+sse2 < %s | FileCheck %s --check-prefix=SSE
3 ; RUN: llc -O0 -fast-isel -fast-isel-abort=1 -mtriple=x86_64-unknown-unknown -mattr=+avx < %s | FileCheck %s --check-prefix=AVX --check-prefix=AVXONLY
4 ; RUN: llc -O0 -fast-isel -fast-isel-abort=1 -mtriple=x86_64-unknown-unknown -mattr=+avx512f < %s | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=KNL
5 ; RUN: llc -O0 -fast-isel -fast-isel-abort=1 -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512bw,+avx512vl < %s | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=SKX
7 ; Verify that fast-isel knows how to select aligned/unaligned vector loads.
8 ; Also verify that the selected load instruction is in the correct domain.
10 define <16 x i8> @test_v16i8(<16 x i8>* %V) {
11 ; SSE-LABEL: test_v16i8:
12 ; SSE: # %bb.0: # %entry
13 ; SSE-NEXT: movdqa (%rdi), %xmm0
16 ; AVXONLY-LABEL: test_v16i8:
17 ; AVXONLY: # %bb.0: # %entry
18 ; AVXONLY-NEXT: vmovdqa (%rdi), %xmm0
21 ; KNL-LABEL: test_v16i8:
22 ; KNL: # %bb.0: # %entry
23 ; KNL-NEXT: vmovdqa (%rdi), %xmm0
26 ; SKX-LABEL: test_v16i8:
27 ; SKX: # %bb.0: # %entry
28 ; SKX-NEXT: vmovdqa64 (%rdi), %xmm0
31 %0 = load <16 x i8>, <16 x i8>* %V, align 16
35 define <8 x i16> @test_v8i16(<8 x i16>* %V) {
36 ; SSE-LABEL: test_v8i16:
37 ; SSE: # %bb.0: # %entry
38 ; SSE-NEXT: movdqa (%rdi), %xmm0
41 ; AVXONLY-LABEL: test_v8i16:
42 ; AVXONLY: # %bb.0: # %entry
43 ; AVXONLY-NEXT: vmovdqa (%rdi), %xmm0
46 ; KNL-LABEL: test_v8i16:
47 ; KNL: # %bb.0: # %entry
48 ; KNL-NEXT: vmovdqa (%rdi), %xmm0
51 ; SKX-LABEL: test_v8i16:
52 ; SKX: # %bb.0: # %entry
53 ; SKX-NEXT: vmovdqa64 (%rdi), %xmm0
56 %0 = load <8 x i16>, <8 x i16>* %V, align 16
60 define <4 x i32> @test_v4i32(<4 x i32>* %V) {
61 ; SSE-LABEL: test_v4i32:
62 ; SSE: # %bb.0: # %entry
63 ; SSE-NEXT: movdqa (%rdi), %xmm0
66 ; AVXONLY-LABEL: test_v4i32:
67 ; AVXONLY: # %bb.0: # %entry
68 ; AVXONLY-NEXT: vmovdqa (%rdi), %xmm0
71 ; KNL-LABEL: test_v4i32:
72 ; KNL: # %bb.0: # %entry
73 ; KNL-NEXT: vmovdqa (%rdi), %xmm0
76 ; SKX-LABEL: test_v4i32:
77 ; SKX: # %bb.0: # %entry
78 ; SKX-NEXT: vmovdqa64 (%rdi), %xmm0
81 %0 = load <4 x i32>, <4 x i32>* %V, align 16
85 define <2 x i64> @test_v2i64(<2 x i64>* %V) {
86 ; SSE-LABEL: test_v2i64:
87 ; SSE: # %bb.0: # %entry
88 ; SSE-NEXT: movdqa (%rdi), %xmm0
91 ; AVXONLY-LABEL: test_v2i64:
92 ; AVXONLY: # %bb.0: # %entry
93 ; AVXONLY-NEXT: vmovdqa (%rdi), %xmm0
96 ; KNL-LABEL: test_v2i64:
97 ; KNL: # %bb.0: # %entry
98 ; KNL-NEXT: vmovdqa (%rdi), %xmm0
101 ; SKX-LABEL: test_v2i64:
102 ; SKX: # %bb.0: # %entry
103 ; SKX-NEXT: vmovdqa64 (%rdi), %xmm0
106 %0 = load <2 x i64>, <2 x i64>* %V, align 16
110 define <16 x i8> @test_v16i8_unaligned(<16 x i8>* %V) {
111 ; SSE-LABEL: test_v16i8_unaligned:
112 ; SSE: # %bb.0: # %entry
113 ; SSE-NEXT: movdqu (%rdi), %xmm0
116 ; AVXONLY-LABEL: test_v16i8_unaligned:
117 ; AVXONLY: # %bb.0: # %entry
118 ; AVXONLY-NEXT: vmovdqu (%rdi), %xmm0
121 ; KNL-LABEL: test_v16i8_unaligned:
122 ; KNL: # %bb.0: # %entry
123 ; KNL-NEXT: vmovdqu (%rdi), %xmm0
126 ; SKX-LABEL: test_v16i8_unaligned:
127 ; SKX: # %bb.0: # %entry
128 ; SKX-NEXT: vmovdqu64 (%rdi), %xmm0
131 %0 = load <16 x i8>, <16 x i8>* %V, align 4
135 define <8 x i16> @test_v8i16_unaligned(<8 x i16>* %V) {
136 ; SSE-LABEL: test_v8i16_unaligned:
137 ; SSE: # %bb.0: # %entry
138 ; SSE-NEXT: movdqu (%rdi), %xmm0
141 ; AVXONLY-LABEL: test_v8i16_unaligned:
142 ; AVXONLY: # %bb.0: # %entry
143 ; AVXONLY-NEXT: vmovdqu (%rdi), %xmm0
146 ; KNL-LABEL: test_v8i16_unaligned:
147 ; KNL: # %bb.0: # %entry
148 ; KNL-NEXT: vmovdqu (%rdi), %xmm0
151 ; SKX-LABEL: test_v8i16_unaligned:
152 ; SKX: # %bb.0: # %entry
153 ; SKX-NEXT: vmovdqu64 (%rdi), %xmm0
156 %0 = load <8 x i16>, <8 x i16>* %V, align 4
160 define <4 x i32> @test_v4i32_unaligned(<4 x i32>* %V) {
161 ; SSE-LABEL: test_v4i32_unaligned:
162 ; SSE: # %bb.0: # %entry
163 ; SSE-NEXT: movdqu (%rdi), %xmm0
166 ; AVXONLY-LABEL: test_v4i32_unaligned:
167 ; AVXONLY: # %bb.0: # %entry
168 ; AVXONLY-NEXT: vmovdqu (%rdi), %xmm0
171 ; KNL-LABEL: test_v4i32_unaligned:
172 ; KNL: # %bb.0: # %entry
173 ; KNL-NEXT: vmovdqu (%rdi), %xmm0
176 ; SKX-LABEL: test_v4i32_unaligned:
177 ; SKX: # %bb.0: # %entry
178 ; SKX-NEXT: vmovdqu64 (%rdi), %xmm0
181 %0 = load <4 x i32>, <4 x i32>* %V, align 4
185 define <2 x i64> @test_v2i64_unaligned(<2 x i64>* %V) {
186 ; SSE-LABEL: test_v2i64_unaligned:
187 ; SSE: # %bb.0: # %entry
188 ; SSE-NEXT: movdqu (%rdi), %xmm0
191 ; AVXONLY-LABEL: test_v2i64_unaligned:
192 ; AVXONLY: # %bb.0: # %entry
193 ; AVXONLY-NEXT: vmovdqu (%rdi), %xmm0
196 ; KNL-LABEL: test_v2i64_unaligned:
197 ; KNL: # %bb.0: # %entry
198 ; KNL-NEXT: vmovdqu (%rdi), %xmm0
201 ; SKX-LABEL: test_v2i64_unaligned:
202 ; SKX: # %bb.0: # %entry
203 ; SKX-NEXT: vmovdqu64 (%rdi), %xmm0
206 %0 = load <2 x i64>, <2 x i64>* %V, align 4
210 define <4 x float> @test_v4f32(<4 x float>* %V) {
211 ; SSE-LABEL: test_v4f32:
212 ; SSE: # %bb.0: # %entry
213 ; SSE-NEXT: movaps (%rdi), %xmm0
216 ; AVX-LABEL: test_v4f32:
217 ; AVX: # %bb.0: # %entry
218 ; AVX-NEXT: vmovaps (%rdi), %xmm0
221 %0 = load <4 x float>, <4 x float>* %V, align 16
225 define <2 x double> @test_v2f64(<2 x double>* %V) {
226 ; SSE-LABEL: test_v2f64:
227 ; SSE: # %bb.0: # %entry
228 ; SSE-NEXT: movapd (%rdi), %xmm0
231 ; AVX-LABEL: test_v2f64:
232 ; AVX: # %bb.0: # %entry
233 ; AVX-NEXT: vmovapd (%rdi), %xmm0
236 %0 = load <2 x double>, <2 x double>* %V, align 16
240 define <4 x float> @test_v4f32_unaligned(<4 x float>* %V) {
241 ; SSE-LABEL: test_v4f32_unaligned:
242 ; SSE: # %bb.0: # %entry
243 ; SSE-NEXT: movups (%rdi), %xmm0
246 ; AVX-LABEL: test_v4f32_unaligned:
247 ; AVX: # %bb.0: # %entry
248 ; AVX-NEXT: vmovups (%rdi), %xmm0
251 %0 = load <4 x float>, <4 x float>* %V, align 4
255 define <2 x double> @test_v2f64_unaligned(<2 x double>* %V) {
256 ; SSE-LABEL: test_v2f64_unaligned:
257 ; SSE: # %bb.0: # %entry
258 ; SSE-NEXT: movupd (%rdi), %xmm0
261 ; AVX-LABEL: test_v2f64_unaligned:
262 ; AVX: # %bb.0: # %entry
263 ; AVX-NEXT: vmovupd (%rdi), %xmm0
266 %0 = load <2 x double>, <2 x double>* %V, align 4
270 define <16 x i8> @test_v16i8_abi_alignment(<16 x i8>* %V) {
271 ; SSE-LABEL: test_v16i8_abi_alignment:
272 ; SSE: # %bb.0: # %entry
273 ; SSE-NEXT: movdqa (%rdi), %xmm0
276 ; AVXONLY-LABEL: test_v16i8_abi_alignment:
277 ; AVXONLY: # %bb.0: # %entry
278 ; AVXONLY-NEXT: vmovdqa (%rdi), %xmm0
281 ; KNL-LABEL: test_v16i8_abi_alignment:
282 ; KNL: # %bb.0: # %entry
283 ; KNL-NEXT: vmovdqa (%rdi), %xmm0
286 ; SKX-LABEL: test_v16i8_abi_alignment:
287 ; SKX: # %bb.0: # %entry
288 ; SKX-NEXT: vmovdqa64 (%rdi), %xmm0
291 %0 = load <16 x i8>, <16 x i8>* %V
295 define <8 x i16> @test_v8i16_abi_alignment(<8 x i16>* %V) {
296 ; SSE-LABEL: test_v8i16_abi_alignment:
297 ; SSE: # %bb.0: # %entry
298 ; SSE-NEXT: movdqa (%rdi), %xmm0
301 ; AVXONLY-LABEL: test_v8i16_abi_alignment:
302 ; AVXONLY: # %bb.0: # %entry
303 ; AVXONLY-NEXT: vmovdqa (%rdi), %xmm0
306 ; KNL-LABEL: test_v8i16_abi_alignment:
307 ; KNL: # %bb.0: # %entry
308 ; KNL-NEXT: vmovdqa (%rdi), %xmm0
311 ; SKX-LABEL: test_v8i16_abi_alignment:
312 ; SKX: # %bb.0: # %entry
313 ; SKX-NEXT: vmovdqa64 (%rdi), %xmm0
316 %0 = load <8 x i16>, <8 x i16>* %V
320 define <4 x i32> @test_v4i32_abi_alignment(<4 x i32>* %V) {
321 ; SSE-LABEL: test_v4i32_abi_alignment:
322 ; SSE: # %bb.0: # %entry
323 ; SSE-NEXT: movdqa (%rdi), %xmm0
326 ; AVXONLY-LABEL: test_v4i32_abi_alignment:
327 ; AVXONLY: # %bb.0: # %entry
328 ; AVXONLY-NEXT: vmovdqa (%rdi), %xmm0
331 ; KNL-LABEL: test_v4i32_abi_alignment:
332 ; KNL: # %bb.0: # %entry
333 ; KNL-NEXT: vmovdqa (%rdi), %xmm0
336 ; SKX-LABEL: test_v4i32_abi_alignment:
337 ; SKX: # %bb.0: # %entry
338 ; SKX-NEXT: vmovdqa64 (%rdi), %xmm0
341 %0 = load <4 x i32>, <4 x i32>* %V
345 define <2 x i64> @test_v2i64_abi_alignment(<2 x i64>* %V) {
346 ; SSE-LABEL: test_v2i64_abi_alignment:
347 ; SSE: # %bb.0: # %entry
348 ; SSE-NEXT: movdqa (%rdi), %xmm0
351 ; AVXONLY-LABEL: test_v2i64_abi_alignment:
352 ; AVXONLY: # %bb.0: # %entry
353 ; AVXONLY-NEXT: vmovdqa (%rdi), %xmm0
356 ; KNL-LABEL: test_v2i64_abi_alignment:
357 ; KNL: # %bb.0: # %entry
358 ; KNL-NEXT: vmovdqa (%rdi), %xmm0
361 ; SKX-LABEL: test_v2i64_abi_alignment:
362 ; SKX: # %bb.0: # %entry
363 ; SKX-NEXT: vmovdqa64 (%rdi), %xmm0
366 %0 = load <2 x i64>, <2 x i64>* %V
370 define <4 x float> @test_v4f32_abi_alignment(<4 x float>* %V) {
371 ; SSE-LABEL: test_v4f32_abi_alignment:
372 ; SSE: # %bb.0: # %entry
373 ; SSE-NEXT: movaps (%rdi), %xmm0
376 ; AVX-LABEL: test_v4f32_abi_alignment:
377 ; AVX: # %bb.0: # %entry
378 ; AVX-NEXT: vmovaps (%rdi), %xmm0
381 %0 = load <4 x float>, <4 x float>* %V
385 define <2 x double> @test_v2f64_abi_alignment(<2 x double>* %V) {
386 ; SSE-LABEL: test_v2f64_abi_alignment:
387 ; SSE: # %bb.0: # %entry
388 ; SSE-NEXT: movapd (%rdi), %xmm0
391 ; AVX-LABEL: test_v2f64_abi_alignment:
392 ; AVX: # %bb.0: # %entry
393 ; AVX-NEXT: vmovapd (%rdi), %xmm0
396 %0 = load <2 x double>, <2 x double>* %V
400 define <32 x i8> @test_v32i8(<32 x i8>* %V) {
401 ; SSE-LABEL: test_v32i8:
402 ; SSE: # %bb.0: # %entry
403 ; SSE-NEXT: movaps (%rdi), %xmm0
404 ; SSE-NEXT: movaps 16(%rdi), %xmm1
407 ; AVXONLY-LABEL: test_v32i8:
408 ; AVXONLY: # %bb.0: # %entry
409 ; AVXONLY-NEXT: vmovdqa (%rdi), %ymm0
412 ; KNL-LABEL: test_v32i8:
413 ; KNL: # %bb.0: # %entry
414 ; KNL-NEXT: vmovdqa (%rdi), %ymm0
417 ; SKX-LABEL: test_v32i8:
418 ; SKX: # %bb.0: # %entry
419 ; SKX-NEXT: vmovdqa64 (%rdi), %ymm0
422 %0 = load <32 x i8>, <32 x i8>* %V, align 32
426 define <16 x i16> @test_v16i16(<16 x i16>* %V) {
427 ; SSE-LABEL: test_v16i16:
428 ; SSE: # %bb.0: # %entry
429 ; SSE-NEXT: movaps (%rdi), %xmm0
430 ; SSE-NEXT: movaps 16(%rdi), %xmm1
433 ; AVXONLY-LABEL: test_v16i16:
434 ; AVXONLY: # %bb.0: # %entry
435 ; AVXONLY-NEXT: vmovdqa (%rdi), %ymm0
438 ; KNL-LABEL: test_v16i16:
439 ; KNL: # %bb.0: # %entry
440 ; KNL-NEXT: vmovdqa (%rdi), %ymm0
443 ; SKX-LABEL: test_v16i16:
444 ; SKX: # %bb.0: # %entry
445 ; SKX-NEXT: vmovdqa64 (%rdi), %ymm0
448 %0 = load <16 x i16>, <16 x i16>* %V, align 32
452 define <8 x i32> @test_v8i32(<8 x i32>* %V) {
453 ; SSE-LABEL: test_v8i32:
454 ; SSE: # %bb.0: # %entry
455 ; SSE-NEXT: movaps (%rdi), %xmm0
456 ; SSE-NEXT: movaps 16(%rdi), %xmm1
459 ; AVXONLY-LABEL: test_v8i32:
460 ; AVXONLY: # %bb.0: # %entry
461 ; AVXONLY-NEXT: vmovdqa (%rdi), %ymm0
464 ; KNL-LABEL: test_v8i32:
465 ; KNL: # %bb.0: # %entry
466 ; KNL-NEXT: vmovdqa (%rdi), %ymm0
469 ; SKX-LABEL: test_v8i32:
470 ; SKX: # %bb.0: # %entry
471 ; SKX-NEXT: vmovdqa64 (%rdi), %ymm0
474 %0 = load <8 x i32>, <8 x i32>* %V, align 32
478 define <4 x i64> @test_v4i64(<4 x i64>* %V) {
479 ; SSE-LABEL: test_v4i64:
480 ; SSE: # %bb.0: # %entry
481 ; SSE-NEXT: movaps (%rdi), %xmm0
482 ; SSE-NEXT: movaps 16(%rdi), %xmm1
485 ; AVXONLY-LABEL: test_v4i64:
486 ; AVXONLY: # %bb.0: # %entry
487 ; AVXONLY-NEXT: vmovdqa (%rdi), %ymm0
490 ; KNL-LABEL: test_v4i64:
491 ; KNL: # %bb.0: # %entry
492 ; KNL-NEXT: vmovdqa (%rdi), %ymm0
495 ; SKX-LABEL: test_v4i64:
496 ; SKX: # %bb.0: # %entry
497 ; SKX-NEXT: vmovdqa64 (%rdi), %ymm0
500 %0 = load <4 x i64>, <4 x i64>* %V, align 32
504 define <32 x i8> @test_v32i8_unaligned(<32 x i8>* %V) {
505 ; SSE-LABEL: test_v32i8_unaligned:
506 ; SSE: # %bb.0: # %entry
507 ; SSE-NEXT: movups (%rdi), %xmm0
508 ; SSE-NEXT: movups 16(%rdi), %xmm1
511 ; AVXONLY-LABEL: test_v32i8_unaligned:
512 ; AVXONLY: # %bb.0: # %entry
513 ; AVXONLY-NEXT: vmovdqu (%rdi), %ymm0
516 ; KNL-LABEL: test_v32i8_unaligned:
517 ; KNL: # %bb.0: # %entry
518 ; KNL-NEXT: vmovdqu (%rdi), %ymm0
521 ; SKX-LABEL: test_v32i8_unaligned:
522 ; SKX: # %bb.0: # %entry
523 ; SKX-NEXT: vmovdqu64 (%rdi), %ymm0
526 %0 = load <32 x i8>, <32 x i8>* %V, align 4
530 define <16 x i16> @test_v16i16_unaligned(<16 x i16>* %V) {
531 ; SSE-LABEL: test_v16i16_unaligned:
532 ; SSE: # %bb.0: # %entry
533 ; SSE-NEXT: movups (%rdi), %xmm0
534 ; SSE-NEXT: movups 16(%rdi), %xmm1
537 ; AVXONLY-LABEL: test_v16i16_unaligned:
538 ; AVXONLY: # %bb.0: # %entry
539 ; AVXONLY-NEXT: vmovdqu (%rdi), %ymm0
542 ; KNL-LABEL: test_v16i16_unaligned:
543 ; KNL: # %bb.0: # %entry
544 ; KNL-NEXT: vmovdqu (%rdi), %ymm0
547 ; SKX-LABEL: test_v16i16_unaligned:
548 ; SKX: # %bb.0: # %entry
549 ; SKX-NEXT: vmovdqu64 (%rdi), %ymm0
552 %0 = load <16 x i16>, <16 x i16>* %V, align 4
556 define <8 x i32> @test_v8i32_unaligned(<8 x i32>* %V) {
557 ; SSE-LABEL: test_v8i32_unaligned:
558 ; SSE: # %bb.0: # %entry
559 ; SSE-NEXT: movups (%rdi), %xmm0
560 ; SSE-NEXT: movups 16(%rdi), %xmm1
563 ; AVXONLY-LABEL: test_v8i32_unaligned:
564 ; AVXONLY: # %bb.0: # %entry
565 ; AVXONLY-NEXT: vmovdqu (%rdi), %ymm0
568 ; KNL-LABEL: test_v8i32_unaligned:
569 ; KNL: # %bb.0: # %entry
570 ; KNL-NEXT: vmovdqu (%rdi), %ymm0
573 ; SKX-LABEL: test_v8i32_unaligned:
574 ; SKX: # %bb.0: # %entry
575 ; SKX-NEXT: vmovdqu64 (%rdi), %ymm0
578 %0 = load <8 x i32>, <8 x i32>* %V, align 4
582 define <4 x i64> @test_v4i64_unaligned(<4 x i64>* %V) {
583 ; SSE-LABEL: test_v4i64_unaligned:
584 ; SSE: # %bb.0: # %entry
585 ; SSE-NEXT: movups (%rdi), %xmm0
586 ; SSE-NEXT: movups 16(%rdi), %xmm1
589 ; AVXONLY-LABEL: test_v4i64_unaligned:
590 ; AVXONLY: # %bb.0: # %entry
591 ; AVXONLY-NEXT: vmovdqu (%rdi), %ymm0
594 ; KNL-LABEL: test_v4i64_unaligned:
595 ; KNL: # %bb.0: # %entry
596 ; KNL-NEXT: vmovdqu (%rdi), %ymm0
599 ; SKX-LABEL: test_v4i64_unaligned:
600 ; SKX: # %bb.0: # %entry
601 ; SKX-NEXT: vmovdqu64 (%rdi), %ymm0
604 %0 = load <4 x i64>, <4 x i64>* %V, align 4
608 define <8 x float> @test_v8f32(<8 x float>* %V) {
609 ; SSE-LABEL: test_v8f32:
610 ; SSE: # %bb.0: # %entry
611 ; SSE-NEXT: movaps (%rdi), %xmm0
612 ; SSE-NEXT: movaps 16(%rdi), %xmm1
615 ; AVX-LABEL: test_v8f32:
616 ; AVX: # %bb.0: # %entry
617 ; AVX-NEXT: vmovaps (%rdi), %ymm0
620 %0 = load <8 x float>, <8 x float>* %V, align 32
624 define <4 x double> @test_v4f64(<4 x double>* %V) {
625 ; SSE-LABEL: test_v4f64:
626 ; SSE: # %bb.0: # %entry
627 ; SSE-NEXT: movapd (%rdi), %xmm0
628 ; SSE-NEXT: movapd 16(%rdi), %xmm1
631 ; AVX-LABEL: test_v4f64:
632 ; AVX: # %bb.0: # %entry
633 ; AVX-NEXT: vmovapd (%rdi), %ymm0
636 %0 = load <4 x double>, <4 x double>* %V, align 32
640 define <8 x float> @test_v8f32_unaligned(<8 x float>* %V) {
641 ; SSE-LABEL: test_v8f32_unaligned:
642 ; SSE: # %bb.0: # %entry
643 ; SSE-NEXT: movups (%rdi), %xmm0
644 ; SSE-NEXT: movups 16(%rdi), %xmm1
647 ; AVX-LABEL: test_v8f32_unaligned:
648 ; AVX: # %bb.0: # %entry
649 ; AVX-NEXT: vmovups (%rdi), %ymm0
652 %0 = load <8 x float>, <8 x float>* %V, align 4
656 define <4 x double> @test_v4f64_unaligned(<4 x double>* %V) {
657 ; SSE-LABEL: test_v4f64_unaligned:
658 ; SSE: # %bb.0: # %entry
659 ; SSE-NEXT: movupd (%rdi), %xmm0
660 ; SSE-NEXT: movupd 16(%rdi), %xmm1
663 ; AVX-LABEL: test_v4f64_unaligned:
664 ; AVX: # %bb.0: # %entry
665 ; AVX-NEXT: vmovupd (%rdi), %ymm0
668 %0 = load <4 x double>, <4 x double>* %V, align 4
672 define <64 x i8> @test_v64i8(<64 x i8>* %V) {
673 ; SSE-LABEL: test_v64i8:
674 ; SSE: # %bb.0: # %entry
675 ; SSE-NEXT: movaps (%rdi), %xmm0
676 ; SSE-NEXT: movaps 16(%rdi), %xmm1
677 ; SSE-NEXT: movaps 32(%rdi), %xmm2
678 ; SSE-NEXT: movaps 48(%rdi), %xmm3
681 ; AVXONLY-LABEL: test_v64i8:
682 ; AVXONLY: # %bb.0: # %entry
683 ; AVXONLY-NEXT: vmovaps (%rdi), %ymm0
684 ; AVXONLY-NEXT: vmovaps 32(%rdi), %ymm1
687 ; AVX512-LABEL: test_v64i8:
688 ; AVX512: # %bb.0: # %entry
689 ; AVX512-NEXT: vmovdqa64 (%rdi), %zmm0
692 %0 = load <64 x i8>, <64 x i8>* %V, align 64
696 define <32 x i16> @test_v32i16(<32 x i16>* %V) {
697 ; SSE-LABEL: test_v32i16:
698 ; SSE: # %bb.0: # %entry
699 ; SSE-NEXT: movaps (%rdi), %xmm0
700 ; SSE-NEXT: movaps 16(%rdi), %xmm1
701 ; SSE-NEXT: movaps 32(%rdi), %xmm2
702 ; SSE-NEXT: movaps 48(%rdi), %xmm3
705 ; AVXONLY-LABEL: test_v32i16:
706 ; AVXONLY: # %bb.0: # %entry
707 ; AVXONLY-NEXT: vmovaps (%rdi), %ymm0
708 ; AVXONLY-NEXT: vmovaps 32(%rdi), %ymm1
711 ; AVX512-LABEL: test_v32i16:
712 ; AVX512: # %bb.0: # %entry
713 ; AVX512-NEXT: vmovdqa64 (%rdi), %zmm0
716 %0 = load <32 x i16>, <32 x i16>* %V, align 64
720 define <16 x i32> @test_v16i32(<16 x i32>* %V) {
721 ; SSE-LABEL: test_v16i32:
722 ; SSE: # %bb.0: # %entry
723 ; SSE-NEXT: movaps (%rdi), %xmm0
724 ; SSE-NEXT: movaps 16(%rdi), %xmm1
725 ; SSE-NEXT: movaps 32(%rdi), %xmm2
726 ; SSE-NEXT: movaps 48(%rdi), %xmm3
729 ; AVXONLY-LABEL: test_v16i32:
730 ; AVXONLY: # %bb.0: # %entry
731 ; AVXONLY-NEXT: vmovaps (%rdi), %ymm0
732 ; AVXONLY-NEXT: vmovaps 32(%rdi), %ymm1
735 ; AVX512-LABEL: test_v16i32:
736 ; AVX512: # %bb.0: # %entry
737 ; AVX512-NEXT: vmovdqa64 (%rdi), %zmm0
740 %0 = load <16 x i32>, <16 x i32>* %V, align 64
744 define <8 x i64> @test_v8i64(<8 x i64>* %V) {
745 ; SSE-LABEL: test_v8i64:
746 ; SSE: # %bb.0: # %entry
747 ; SSE-NEXT: movaps (%rdi), %xmm0
748 ; SSE-NEXT: movaps 16(%rdi), %xmm1
749 ; SSE-NEXT: movaps 32(%rdi), %xmm2
750 ; SSE-NEXT: movaps 48(%rdi), %xmm3
753 ; AVXONLY-LABEL: test_v8i64:
754 ; AVXONLY: # %bb.0: # %entry
755 ; AVXONLY-NEXT: vmovaps (%rdi), %ymm0
756 ; AVXONLY-NEXT: vmovaps 32(%rdi), %ymm1
759 ; AVX512-LABEL: test_v8i64:
760 ; AVX512: # %bb.0: # %entry
761 ; AVX512-NEXT: vmovdqa64 (%rdi), %zmm0
764 %0 = load <8 x i64>, <8 x i64>* %V, align 64
768 define <64 x i8> @test_v64i8_unaligned(<64 x i8>* %V) {
769 ; SSE-LABEL: test_v64i8_unaligned:
770 ; SSE: # %bb.0: # %entry
771 ; SSE-NEXT: movups (%rdi), %xmm0
772 ; SSE-NEXT: movups 16(%rdi), %xmm1
773 ; SSE-NEXT: movups 32(%rdi), %xmm2
774 ; SSE-NEXT: movups 48(%rdi), %xmm3
777 ; AVXONLY-LABEL: test_v64i8_unaligned:
778 ; AVXONLY: # %bb.0: # %entry
779 ; AVXONLY-NEXT: vmovups (%rdi), %ymm0
780 ; AVXONLY-NEXT: vmovups 32(%rdi), %ymm1
783 ; AVX512-LABEL: test_v64i8_unaligned:
784 ; AVX512: # %bb.0: # %entry
785 ; AVX512-NEXT: vmovdqu64 (%rdi), %zmm0
788 %0 = load <64 x i8>, <64 x i8>* %V, align 4
792 define <32 x i16> @test_v32i16_unaligned(<32 x i16>* %V) {
793 ; SSE-LABEL: test_v32i16_unaligned:
794 ; SSE: # %bb.0: # %entry
795 ; SSE-NEXT: movups (%rdi), %xmm0
796 ; SSE-NEXT: movups 16(%rdi), %xmm1
797 ; SSE-NEXT: movups 32(%rdi), %xmm2
798 ; SSE-NEXT: movups 48(%rdi), %xmm3
801 ; AVXONLY-LABEL: test_v32i16_unaligned:
802 ; AVXONLY: # %bb.0: # %entry
803 ; AVXONLY-NEXT: vmovups (%rdi), %ymm0
804 ; AVXONLY-NEXT: vmovups 32(%rdi), %ymm1
807 ; AVX512-LABEL: test_v32i16_unaligned:
808 ; AVX512: # %bb.0: # %entry
809 ; AVX512-NEXT: vmovdqu64 (%rdi), %zmm0
812 %0 = load <32 x i16>, <32 x i16>* %V, align 4
816 define <16 x i32> @test_v16i32_unaligned(<16 x i32>* %V) {
817 ; SSE-LABEL: test_v16i32_unaligned:
818 ; SSE: # %bb.0: # %entry
819 ; SSE-NEXT: movups (%rdi), %xmm0
820 ; SSE-NEXT: movups 16(%rdi), %xmm1
821 ; SSE-NEXT: movups 32(%rdi), %xmm2
822 ; SSE-NEXT: movups 48(%rdi), %xmm3
825 ; AVXONLY-LABEL: test_v16i32_unaligned:
826 ; AVXONLY: # %bb.0: # %entry
827 ; AVXONLY-NEXT: vmovups (%rdi), %ymm0
828 ; AVXONLY-NEXT: vmovups 32(%rdi), %ymm1
831 ; AVX512-LABEL: test_v16i32_unaligned:
832 ; AVX512: # %bb.0: # %entry
833 ; AVX512-NEXT: vmovdqu64 (%rdi), %zmm0
836 %0 = load <16 x i32>, <16 x i32>* %V, align 4
840 define <8 x i64> @test_v8i64_unaligned(<8 x i64>* %V) {
841 ; SSE-LABEL: test_v8i64_unaligned:
842 ; SSE: # %bb.0: # %entry
843 ; SSE-NEXT: movups (%rdi), %xmm0
844 ; SSE-NEXT: movups 16(%rdi), %xmm1
845 ; SSE-NEXT: movups 32(%rdi), %xmm2
846 ; SSE-NEXT: movups 48(%rdi), %xmm3
849 ; AVXONLY-LABEL: test_v8i64_unaligned:
850 ; AVXONLY: # %bb.0: # %entry
851 ; AVXONLY-NEXT: vmovups (%rdi), %ymm0
852 ; AVXONLY-NEXT: vmovups 32(%rdi), %ymm1
855 ; AVX512-LABEL: test_v8i64_unaligned:
856 ; AVX512: # %bb.0: # %entry
857 ; AVX512-NEXT: vmovdqu64 (%rdi), %zmm0
860 %0 = load <8 x i64>, <8 x i64>* %V, align 4
864 define <8 x float> @test_v16f32(<8 x float>* %V) {
865 ; SSE-LABEL: test_v16f32:
866 ; SSE: # %bb.0: # %entry
867 ; SSE-NEXT: movaps (%rdi), %xmm0
868 ; SSE-NEXT: movaps 16(%rdi), %xmm1
871 ; AVX-LABEL: test_v16f32:
872 ; AVX: # %bb.0: # %entry
873 ; AVX-NEXT: vmovaps (%rdi), %ymm0
876 %0 = load <8 x float>, <8 x float>* %V, align 64
880 define <8 x double> @test_v8f64(<8 x double>* %V) {
881 ; SSE-LABEL: test_v8f64:
882 ; SSE: # %bb.0: # %entry
883 ; SSE-NEXT: movapd (%rdi), %xmm0
884 ; SSE-NEXT: movapd 16(%rdi), %xmm1
885 ; SSE-NEXT: movapd 32(%rdi), %xmm2
886 ; SSE-NEXT: movapd 48(%rdi), %xmm3
889 ; AVXONLY-LABEL: test_v8f64:
890 ; AVXONLY: # %bb.0: # %entry
891 ; AVXONLY-NEXT: vmovapd (%rdi), %ymm0
892 ; AVXONLY-NEXT: vmovapd 32(%rdi), %ymm1
895 ; AVX512-LABEL: test_v8f64:
896 ; AVX512: # %bb.0: # %entry
897 ; AVX512-NEXT: vmovapd (%rdi), %zmm0
900 %0 = load <8 x double>, <8 x double>* %V, align 64
904 define <16 x float> @test_v16f32_unaligned(<16 x float>* %V) {
905 ; SSE-LABEL: test_v16f32_unaligned:
906 ; SSE: # %bb.0: # %entry
907 ; SSE-NEXT: movups (%rdi), %xmm0
908 ; SSE-NEXT: movups 16(%rdi), %xmm1
909 ; SSE-NEXT: movups 32(%rdi), %xmm2
910 ; SSE-NEXT: movups 48(%rdi), %xmm3
913 ; AVXONLY-LABEL: test_v16f32_unaligned:
914 ; AVXONLY: # %bb.0: # %entry
915 ; AVXONLY-NEXT: vmovups (%rdi), %ymm0
916 ; AVXONLY-NEXT: vmovups 32(%rdi), %ymm1
919 ; AVX512-LABEL: test_v16f32_unaligned:
920 ; AVX512: # %bb.0: # %entry
921 ; AVX512-NEXT: vmovups (%rdi), %zmm0
924 %0 = load <16 x float>, <16 x float>* %V, align 4
928 define <8 x double> @test_v8f64_unaligned(<8 x double>* %V) {
929 ; SSE-LABEL: test_v8f64_unaligned:
930 ; SSE: # %bb.0: # %entry
931 ; SSE-NEXT: movupd (%rdi), %xmm0
932 ; SSE-NEXT: movupd 16(%rdi), %xmm1
933 ; SSE-NEXT: movupd 32(%rdi), %xmm2
934 ; SSE-NEXT: movupd 48(%rdi), %xmm3
937 ; AVXONLY-LABEL: test_v8f64_unaligned:
938 ; AVXONLY: # %bb.0: # %entry
939 ; AVXONLY-NEXT: vmovupd (%rdi), %ymm0
940 ; AVXONLY-NEXT: vmovupd 32(%rdi), %ymm1
943 ; AVX512-LABEL: test_v8f64_unaligned:
944 ; AVX512: # %bb.0: # %entry
945 ; AVX512-NEXT: vmovupd (%rdi), %zmm0
948 %0 = load <8 x double>, <8 x double>* %V, align 4