1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X86
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X64
5 define <32 x i8> @funcA(<32 x i8> %a) nounwind uwtable readnone ssp {
7 ; CHECK: # %bb.0: # %entry
8 ; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5]
9 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
10 ; CHECK-NEXT: ret{{[l|q]}}
12 %shuffle = shufflevector <32 x i8> %a, <32 x i8> undef, <32 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
13 ret <32 x i8> %shuffle
16 define <16 x i16> @funcB(<16 x i16> %a) nounwind uwtable readnone ssp {
18 ; CHECK: # %bb.0: # %entry
19 ; CHECK-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,6,7]
20 ; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,2,2]
21 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
22 ; CHECK-NEXT: ret{{[l|q]}}
24 %shuffle = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
25 ret <16 x i16> %shuffle
28 define <4 x i64> @funcC(i64 %q) nounwind uwtable readnone ssp {
30 ; X86: # %bb.0: # %entry
31 ; X86-NEXT: vbroadcastsd {{[0-9]+}}(%esp), %ymm0
35 ; X64: # %bb.0: # %entry
36 ; X64-NEXT: vmovq %rdi, %xmm0
37 ; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
38 ; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
41 %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0
42 %vecinit2.i = insertelement <4 x i64> %vecinit.i, i64 %q, i32 1
43 %vecinit4.i = insertelement <4 x i64> %vecinit2.i, i64 %q, i32 2
44 %vecinit6.i = insertelement <4 x i64> %vecinit4.i, i64 %q, i32 3
45 ret <4 x i64> %vecinit6.i
48 define <4 x double> @funcD(double %q) nounwind uwtable readnone ssp {
50 ; X86: # %bb.0: # %entry
51 ; X86-NEXT: vbroadcastsd {{[0-9]+}}(%esp), %ymm0
55 ; X64: # %bb.0: # %entry
56 ; X64-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
57 ; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
60 %vecinit.i = insertelement <4 x double> undef, double %q, i32 0
61 %vecinit2.i = insertelement <4 x double> %vecinit.i, double %q, i32 1
62 %vecinit4.i = insertelement <4 x double> %vecinit2.i, double %q, i32 2
63 %vecinit6.i = insertelement <4 x double> %vecinit4.i, double %q, i32 3
64 ret <4 x double> %vecinit6.i
67 ; Test this turns into a broadcast:
68 ; shuffle (scalar_to_vector (load (ptr + 4))), undef, <0, 0, 0, 0>
70 define <8 x float> @funcE() nounwind {
72 ; X86: # %bb.0: # %allocas
73 ; X86-NEXT: xorl %eax, %eax
74 ; X86-NEXT: testb %al, %al
75 ; X86-NEXT: # implicit-def: $ymm0
76 ; X86-NEXT: jne .LBB4_2
77 ; X86-NEXT: # %bb.1: # %load.i1247
78 ; X86-NEXT: pushl %ebp
79 ; X86-NEXT: movl %esp, %ebp
80 ; X86-NEXT: andl $-32, %esp
81 ; X86-NEXT: subl $1312, %esp # imm = 0x520
82 ; X86-NEXT: vbroadcastss {{[0-9]+}}(%esp), %ymm0
83 ; X86-NEXT: movl %ebp, %esp
85 ; X86-NEXT: .LBB4_2: # %__load_and_broadcast_32.exit1249
89 ; X64: # %bb.0: # %allocas
90 ; X64-NEXT: xorl %eax, %eax
91 ; X64-NEXT: testb %al, %al
92 ; X64-NEXT: # implicit-def: $ymm0
93 ; X64-NEXT: jne .LBB4_2
94 ; X64-NEXT: # %bb.1: # %load.i1247
95 ; X64-NEXT: pushq %rbp
96 ; X64-NEXT: movq %rsp, %rbp
97 ; X64-NEXT: andq $-32, %rsp
98 ; X64-NEXT: subq $1312, %rsp # imm = 0x520
99 ; X64-NEXT: vbroadcastss {{[0-9]+}}(%rsp), %ymm0
100 ; X64-NEXT: movq %rbp, %rsp
101 ; X64-NEXT: popq %rbp
102 ; X64-NEXT: .LBB4_2: # %__load_and_broadcast_32.exit1249
105 %udx495 = alloca [18 x [18 x float]], align 32
106 br label %for_test505.preheader
108 for_test505.preheader: ; preds = %for_test505.preheader, %allocas
109 br i1 undef, label %for_exit499, label %for_test505.preheader
111 for_exit499: ; preds = %for_test505.preheader
112 br i1 undef, label %__load_and_broadcast_32.exit1249, label %load.i1247
114 load.i1247: ; preds = %for_exit499
115 %ptr1227 = getelementptr [18 x [18 x float]], [18 x [18 x float]]* %udx495, i64 0, i64 1, i64 1
116 %ptr.i1237 = bitcast float* %ptr1227 to i32*
117 %val.i1238 = load i32, i32* %ptr.i1237, align 4
118 %ret6.i1245 = insertelement <8 x i32> undef, i32 %val.i1238, i32 6
119 %ret7.i1246 = insertelement <8 x i32> %ret6.i1245, i32 %val.i1238, i32 7
120 %phitmp = bitcast <8 x i32> %ret7.i1246 to <8 x float>
121 br label %__load_and_broadcast_32.exit1249
123 __load_and_broadcast_32.exit1249: ; preds = %load.i1247, %for_exit499
124 %load_broadcast12281250 = phi <8 x float> [ %phitmp, %load.i1247 ], [ undef, %for_exit499 ]
125 ret <8 x float> %load_broadcast12281250
128 define <8 x float> @funcF(i32 %val) nounwind {
131 ; X86-NEXT: vbroadcastss {{[0-9]+}}(%esp), %ymm0
136 ; X64-NEXT: vmovd %edi, %xmm0
137 ; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,0]
138 ; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
140 %ret6 = insertelement <8 x i32> undef, i32 %val, i32 6
141 %ret7 = insertelement <8 x i32> %ret6, i32 %val, i32 7
142 %tmp = bitcast <8 x i32> %ret7 to <8 x float>
146 define <8 x float> @funcG(<8 x float> %a) nounwind uwtable readnone ssp {
147 ; CHECK-LABEL: funcG:
148 ; CHECK: # %bb.0: # %entry
149 ; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
150 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
151 ; CHECK-NEXT: ret{{[l|q]}}
153 %shuffle = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
154 ret <8 x float> %shuffle
157 define <8 x float> @funcH(<8 x float> %a) nounwind uwtable readnone ssp {
158 ; CHECK-LABEL: funcH:
159 ; CHECK: # %bb.0: # %entry
160 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,1,1,1,5,5,5,5]
161 ; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
162 ; CHECK-NEXT: ret{{[l|q]}}
164 %shuffle = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
165 ret <8 x float> %shuffle
168 define <2 x double> @splat_load_2f64_11(<2 x double>* %ptr) {
169 ; X86-LABEL: splat_load_2f64_11:
171 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
172 ; X86-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
175 ; X64-LABEL: splat_load_2f64_11:
177 ; X64-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
179 %x = load <2 x double>, <2 x double>* %ptr
180 %x1 = shufflevector <2 x double> %x, <2 x double> undef, <2 x i32> <i32 1, i32 1>
184 define <4 x double> @splat_load_4f64_2222(<4 x double>* %ptr) {
185 ; X86-LABEL: splat_load_4f64_2222:
187 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
188 ; X86-NEXT: vbroadcastsd 16(%eax), %ymm0
191 ; X64-LABEL: splat_load_4f64_2222:
193 ; X64-NEXT: vbroadcastsd 16(%rdi), %ymm0
195 %x = load <4 x double>, <4 x double>* %ptr
196 %x1 = shufflevector <4 x double> %x, <4 x double> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
200 define <4 x float> @splat_load_4f32_0000(<4 x float>* %ptr) {
201 ; X86-LABEL: splat_load_4f32_0000:
203 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
204 ; X86-NEXT: vbroadcastss (%eax), %xmm0
207 ; X64-LABEL: splat_load_4f32_0000:
209 ; X64-NEXT: vbroadcastss (%rdi), %xmm0
211 %x = load <4 x float>, <4 x float>* %ptr
212 %x1 = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
216 define <8 x float> @splat_load_8f32_77777777(<8 x float>* %ptr) {
217 ; X86-LABEL: splat_load_8f32_77777777:
219 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
220 ; X86-NEXT: vbroadcastss 28(%eax), %ymm0
223 ; X64-LABEL: splat_load_8f32_77777777:
225 ; X64-NEXT: vbroadcastss 28(%rdi), %ymm0
227 %x = load <8 x float>, <8 x float>* %ptr
228 %x1 = shufflevector <8 x float> %x, <8 x float> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>