1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s
4 define <32 x i8> @funcA(<32 x i8> %a) nounwind uwtable readnone ssp {
6 ; CHECK: # %bb.0: # %entry
7 ; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5]
8 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
11 %shuffle = shufflevector <32 x i8> %a, <32 x i8> undef, <32 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
12 ret <32 x i8> %shuffle
15 define <16 x i16> @funcB(<16 x i16> %a) nounwind uwtable readnone ssp {
17 ; CHECK: # %bb.0: # %entry
18 ; CHECK-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,6,7]
19 ; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,2,2]
20 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
23 %shuffle = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
24 ret <16 x i16> %shuffle
27 define <4 x i64> @funcC(i64 %q) nounwind uwtable readnone ssp {
29 ; CHECK: # %bb.0: # %entry
30 ; CHECK-NEXT: vmovq %rdi, %xmm0
31 ; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
32 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
35 %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0
36 %vecinit2.i = insertelement <4 x i64> %vecinit.i, i64 %q, i32 1
37 %vecinit4.i = insertelement <4 x i64> %vecinit2.i, i64 %q, i32 2
38 %vecinit6.i = insertelement <4 x i64> %vecinit4.i, i64 %q, i32 3
39 ret <4 x i64> %vecinit6.i
42 define <4 x double> @funcD(double %q) nounwind uwtable readnone ssp {
44 ; CHECK: # %bb.0: # %entry
45 ; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
46 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
49 %vecinit.i = insertelement <4 x double> undef, double %q, i32 0
50 %vecinit2.i = insertelement <4 x double> %vecinit.i, double %q, i32 1
51 %vecinit4.i = insertelement <4 x double> %vecinit2.i, double %q, i32 2
52 %vecinit6.i = insertelement <4 x double> %vecinit4.i, double %q, i32 3
53 ret <4 x double> %vecinit6.i
56 ; Test this turns into a broadcast:
57 ; shuffle (scalar_to_vector (load (ptr + 4))), undef, <0, 0, 0, 0>
59 define <8 x float> @funcE() nounwind {
61 ; CHECK: # %bb.0: # %allocas
62 ; CHECK-NEXT: xorl %eax, %eax
63 ; CHECK-NEXT: testb %al, %al
64 ; CHECK-NEXT: # implicit-def: $ymm0
65 ; CHECK-NEXT: jne .LBB4_2
66 ; CHECK-NEXT: # %bb.1: # %load.i1247
67 ; CHECK-NEXT: pushq %rbp
68 ; CHECK-NEXT: movq %rsp, %rbp
69 ; CHECK-NEXT: andq $-32, %rsp
70 ; CHECK-NEXT: subq $1312, %rsp # imm = 0x520
71 ; CHECK-NEXT: vbroadcastss {{[0-9]+}}(%rsp), %ymm0
72 ; CHECK-NEXT: movq %rbp, %rsp
73 ; CHECK-NEXT: popq %rbp
74 ; CHECK-NEXT: .LBB4_2: # %__load_and_broadcast_32.exit1249
77 %udx495 = alloca [18 x [18 x float]], align 32
78 br label %for_test505.preheader
80 for_test505.preheader: ; preds = %for_test505.preheader, %allocas
81 br i1 undef, label %for_exit499, label %for_test505.preheader
83 for_exit499: ; preds = %for_test505.preheader
84 br i1 undef, label %__load_and_broadcast_32.exit1249, label %load.i1247
86 load.i1247: ; preds = %for_exit499
87 %ptr1227 = getelementptr [18 x [18 x float]], [18 x [18 x float]]* %udx495, i64 0, i64 1, i64 1
88 %ptr.i1237 = bitcast float* %ptr1227 to i32*
89 %val.i1238 = load i32, i32* %ptr.i1237, align 4
90 %ret6.i1245 = insertelement <8 x i32> undef, i32 %val.i1238, i32 6
91 %ret7.i1246 = insertelement <8 x i32> %ret6.i1245, i32 %val.i1238, i32 7
92 %phitmp = bitcast <8 x i32> %ret7.i1246 to <8 x float>
93 br label %__load_and_broadcast_32.exit1249
95 __load_and_broadcast_32.exit1249: ; preds = %load.i1247, %for_exit499
96 %load_broadcast12281250 = phi <8 x float> [ %phitmp, %load.i1247 ], [ undef, %for_exit499 ]
97 ret <8 x float> %load_broadcast12281250
100 define <8 x float> @funcF(i32 %val) nounwind {
101 ; CHECK-LABEL: funcF:
103 ; CHECK-NEXT: vmovd %edi, %xmm0
104 ; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,0]
105 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
107 %ret6 = insertelement <8 x i32> undef, i32 %val, i32 6
108 %ret7 = insertelement <8 x i32> %ret6, i32 %val, i32 7
109 %tmp = bitcast <8 x i32> %ret7 to <8 x float>
113 define <8 x float> @funcG(<8 x float> %a) nounwind uwtable readnone ssp {
114 ; CHECK-LABEL: funcG:
115 ; CHECK: # %bb.0: # %entry
116 ; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
117 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
120 %shuffle = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
121 ret <8 x float> %shuffle
124 define <8 x float> @funcH(<8 x float> %a) nounwind uwtable readnone ssp {
125 ; CHECK-LABEL: funcH:
126 ; CHECK: # %bb.0: # %entry
127 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,1,1,1,5,5,5,5]
128 ; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
131 %shuffle = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
132 ret <8 x float> %shuffle
135 define <2 x double> @splat_load_2f64_11(<2 x double>* %ptr) {
136 ; CHECK-LABEL: splat_load_2f64_11:
138 ; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
140 %x = load <2 x double>, <2 x double>* %ptr
141 %x1 = shufflevector <2 x double> %x, <2 x double> undef, <2 x i32> <i32 1, i32 1>
145 define <4 x double> @splat_load_4f64_2222(<4 x double>* %ptr) {
146 ; CHECK-LABEL: splat_load_4f64_2222:
148 ; CHECK-NEXT: vbroadcastsd 16(%rdi), %ymm0
150 %x = load <4 x double>, <4 x double>* %ptr
151 %x1 = shufflevector <4 x double> %x, <4 x double> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
155 define <4 x float> @splat_load_4f32_0000(<4 x float>* %ptr) {
156 ; CHECK-LABEL: splat_load_4f32_0000:
158 ; CHECK-NEXT: vbroadcastss (%rdi), %xmm0
160 %x = load <4 x float>, <4 x float>* %ptr
161 %x1 = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
165 define <8 x float> @splat_load_8f32_77777777(<8 x float>* %ptr) {
166 ; CHECK-LABEL: splat_load_8f32_77777777:
168 ; CHECK-NEXT: vbroadcastss 28(%rdi), %ymm0
170 %x = load <8 x float>, <8 x float>* %ptr
171 %x1 = shufflevector <8 x float> %x, <8 x float> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>