1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --no_x86_scrub_mem_shuffle
2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X86
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X64
5 define <32 x i8> @funcA(<32 x i8> %a) nounwind uwtable readnone ssp {
7 ; CHECK: # %bb.0: # %entry
8 ; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5]
9 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
10 ; CHECK-NEXT: ret{{[l|q]}}
12 %shuffle = shufflevector <32 x i8> %a, <32 x i8> undef, <32 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
13 ret <32 x i8> %shuffle
16 define <16 x i16> @funcB(<16 x i16> %a) nounwind uwtable readnone ssp {
18 ; CHECK: # %bb.0: # %entry
19 ; CHECK-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,5,5]
20 ; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,2,2]
21 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
22 ; CHECK-NEXT: ret{{[l|q]}}
24 %shuffle = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
25 ret <16 x i16> %shuffle
28 define <4 x i64> @funcC(i64 %q) nounwind uwtable readnone ssp {
30 ; X86: # %bb.0: # %entry
31 ; X86-NEXT: vbroadcastsd {{[0-9]+}}(%esp), %ymm0
35 ; X64: # %bb.0: # %entry
36 ; X64-NEXT: vmovq %rdi, %xmm0
37 ; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
38 ; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
41 %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0
42 %vecinit2.i = insertelement <4 x i64> %vecinit.i, i64 %q, i32 1
43 %vecinit4.i = insertelement <4 x i64> %vecinit2.i, i64 %q, i32 2
44 %vecinit6.i = insertelement <4 x i64> %vecinit4.i, i64 %q, i32 3
45 ret <4 x i64> %vecinit6.i
48 define <4 x double> @funcD(double %q) nounwind uwtable readnone ssp {
50 ; X86: # %bb.0: # %entry
51 ; X86-NEXT: vbroadcastsd {{[0-9]+}}(%esp), %ymm0
55 ; X64: # %bb.0: # %entry
56 ; X64-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
57 ; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
60 %vecinit.i = insertelement <4 x double> undef, double %q, i32 0
61 %vecinit2.i = insertelement <4 x double> %vecinit.i, double %q, i32 1
62 %vecinit4.i = insertelement <4 x double> %vecinit2.i, double %q, i32 2
63 %vecinit6.i = insertelement <4 x double> %vecinit4.i, double %q, i32 3
64 ret <4 x double> %vecinit6.i
67 ; Test this turns into a broadcast:
68 ; shuffle (scalar_to_vector (load (ptr + 4))), undef, <0, 0, 0, 0>
70 define <8 x float> @funcE() nounwind {
72 ; X86: # %bb.0: # %allocas
73 ; X86-NEXT: xorl %eax, %eax
74 ; X86-NEXT: testb %al, %al
75 ; X86-NEXT: # implicit-def: $ymm0
76 ; X86-NEXT: jne .LBB4_2
77 ; X86-NEXT: # %bb.1: # %load.i1247
78 ; X86-NEXT: pushl %ebp
79 ; X86-NEXT: movl %esp, %ebp
80 ; X86-NEXT: andl $-32, %esp
81 ; X86-NEXT: subl $1312, %esp # imm = 0x520
82 ; X86-NEXT: vbroadcastss {{[0-9]+}}(%esp), %ymm0
83 ; X86-NEXT: movl %ebp, %esp
85 ; X86-NEXT: .LBB4_2: # %__load_and_broadcast_32.exit1249
89 ; X64: # %bb.0: # %allocas
90 ; X64-NEXT: xorl %eax, %eax
91 ; X64-NEXT: testb %al, %al
92 ; X64-NEXT: # implicit-def: $ymm0
93 ; X64-NEXT: jne .LBB4_2
94 ; X64-NEXT: # %bb.1: # %load.i1247
95 ; X64-NEXT: pushq %rbp
96 ; X64-NEXT: movq %rsp, %rbp
97 ; X64-NEXT: andq $-32, %rsp
98 ; X64-NEXT: subq $1312, %rsp # imm = 0x520
99 ; X64-NEXT: vbroadcastss {{[0-9]+}}(%rsp), %ymm0
100 ; X64-NEXT: movq %rbp, %rsp
101 ; X64-NEXT: popq %rbp
102 ; X64-NEXT: .LBB4_2: # %__load_and_broadcast_32.exit1249
105 %udx495 = alloca [18 x [18 x float]], align 32
106 br label %for_test505.preheader
108 for_test505.preheader: ; preds = %for_test505.preheader, %allocas
109 br i1 undef, label %for_exit499, label %for_test505.preheader
111 for_exit499: ; preds = %for_test505.preheader
112 br i1 undef, label %__load_and_broadcast_32.exit1249, label %load.i1247
114 load.i1247: ; preds = %for_exit499
115 %ptr1227 = getelementptr [18 x [18 x float]], ptr %udx495, i64 0, i64 1, i64 1
116 %val.i1238 = load i32, ptr %ptr1227, align 4
117 %ret6.i1245 = insertelement <8 x i32> undef, i32 %val.i1238, i32 6
118 %ret7.i1246 = insertelement <8 x i32> %ret6.i1245, i32 %val.i1238, i32 7
119 %phitmp = bitcast <8 x i32> %ret7.i1246 to <8 x float>
120 br label %__load_and_broadcast_32.exit1249
122 __load_and_broadcast_32.exit1249: ; preds = %load.i1247, %for_exit499
123 %load_broadcast12281250 = phi <8 x float> [ %phitmp, %load.i1247 ], [ undef, %for_exit499 ]
124 ret <8 x float> %load_broadcast12281250
127 define <8 x float> @funcF(i32 %val) nounwind {
130 ; X86-NEXT: vbroadcastss {{[0-9]+}}(%esp), %ymm0
135 ; X64-NEXT: vmovd %edi, %xmm0
136 ; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
137 ; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
139 %ret6 = insertelement <8 x i32> undef, i32 %val, i32 6
140 %ret7 = insertelement <8 x i32> %ret6, i32 %val, i32 7
141 %tmp = bitcast <8 x i32> %ret7 to <8 x float>
145 define <8 x float> @funcG(<8 x float> %a) nounwind uwtable readnone ssp {
146 ; CHECK-LABEL: funcG:
147 ; CHECK: # %bb.0: # %entry
148 ; CHECK-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0,0,0]
149 ; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
150 ; CHECK-NEXT: ret{{[l|q]}}
152 %shuffle = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
153 ret <8 x float> %shuffle
156 define <8 x float> @funcH(<8 x float> %a) nounwind uwtable readnone ssp {
157 ; CHECK-LABEL: funcH:
158 ; CHECK: # %bb.0: # %entry
159 ; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
160 ; CHECK-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,1,1,1,5,5,5,5]
161 ; CHECK-NEXT: ret{{[l|q]}}
163 %shuffle = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
164 ret <8 x float> %shuffle
167 define <2 x double> @splat_load_2f64_11(ptr %ptr) {
168 ; X86-LABEL: splat_load_2f64_11:
170 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
171 ; X86-NEXT: vmovddup 8(%eax), %xmm0 # xmm0 = mem[0,0]
174 ; X64-LABEL: splat_load_2f64_11:
176 ; X64-NEXT: vmovddup 8(%rdi), %xmm0 # xmm0 = mem[0,0]
178 %x = load <2 x double>, ptr %ptr
179 %x1 = shufflevector <2 x double> %x, <2 x double> undef, <2 x i32> <i32 1, i32 1>
183 define <4 x double> @splat_load_4f64_2222(ptr %ptr) {
184 ; X86-LABEL: splat_load_4f64_2222:
186 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
187 ; X86-NEXT: vbroadcastsd 16(%eax), %ymm0
190 ; X64-LABEL: splat_load_4f64_2222:
192 ; X64-NEXT: vbroadcastsd 16(%rdi), %ymm0
194 %x = load <4 x double>, ptr %ptr
195 %x1 = shufflevector <4 x double> %x, <4 x double> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
199 define <4 x float> @splat_load_4f32_0000(ptr %ptr) {
200 ; X86-LABEL: splat_load_4f32_0000:
202 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
203 ; X86-NEXT: vbroadcastss (%eax), %xmm0
206 ; X64-LABEL: splat_load_4f32_0000:
208 ; X64-NEXT: vbroadcastss (%rdi), %xmm0
210 %x = load <4 x float>, ptr %ptr
211 %x1 = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
215 define <8 x float> @splat_load_8f32_77777777(ptr %ptr) {
216 ; X86-LABEL: splat_load_8f32_77777777:
218 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
219 ; X86-NEXT: vbroadcastss 28(%eax), %ymm0
222 ; X64-LABEL: splat_load_8f32_77777777:
224 ; X64-NEXT: vbroadcastss 28(%rdi), %ymm0
226 %x = load <8 x float>, ptr %ptr
227 %x1 = shufflevector <8 x float> %x, <8 x float> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>