1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ;RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=knl | FileCheck %s --check-prefix=KNL
3 ;RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=skx | FileCheck %s --check-prefix=SKX
5 define i32 @hadd_16(<16 x i32> %x225) {
8 ; KNL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
9 ; KNL-NEXT: vpaddd %xmm1, %xmm0, %xmm0
10 ; KNL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
11 ; KNL-NEXT: vpaddd %xmm1, %xmm0, %xmm0
12 ; KNL-NEXT: vmovd %xmm0, %eax
17 ; SKX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
18 ; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0
19 ; SKX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
20 ; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0
21 ; SKX-NEXT: vmovd %xmm0, %eax
22 ; SKX-NEXT: vzeroupper
24 %x226 = shufflevector <16 x i32> %x225, <16 x i32> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
25 %x227 = add <16 x i32> %x225, %x226
26 %x228 = shufflevector <16 x i32> %x227, <16 x i32> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
27 %x229 = add <16 x i32> %x227, %x228
28 %x230 = extractelement <16 x i32> %x229, i32 0
32 define i32 @hsub_16(<16 x i32> %x225) {
35 ; KNL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
36 ; KNL-NEXT: vpaddd %xmm1, %xmm0, %xmm0
37 ; KNL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
38 ; KNL-NEXT: vpsubd %xmm1, %xmm0, %xmm0
39 ; KNL-NEXT: vmovd %xmm0, %eax
44 ; SKX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
45 ; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0
46 ; SKX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
47 ; SKX-NEXT: vpsubd %xmm1, %xmm0, %xmm0
48 ; SKX-NEXT: vmovd %xmm0, %eax
49 ; SKX-NEXT: vzeroupper
51 %x226 = shufflevector <16 x i32> %x225, <16 x i32> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
52 %x227 = add <16 x i32> %x225, %x226
53 %x228 = shufflevector <16 x i32> %x227, <16 x i32> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
54 %x229 = sub <16 x i32> %x227, %x228
55 %x230 = extractelement <16 x i32> %x229, i32 0
59 define float @fhadd_16(<16 x float> %x225) {
60 ; KNL-LABEL: fhadd_16:
62 ; KNL-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
63 ; KNL-NEXT: vaddps %xmm1, %xmm0, %xmm0
64 ; KNL-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
65 ; KNL-NEXT: vaddss %xmm1, %xmm0, %xmm0
68 ; SKX-LABEL: fhadd_16:
70 ; SKX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
71 ; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0
72 ; SKX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
73 ; SKX-NEXT: vaddss %xmm1, %xmm0, %xmm0
74 ; SKX-NEXT: vzeroupper
76 %x226 = shufflevector <16 x float> %x225, <16 x float> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
77 %x227 = fadd <16 x float> %x225, %x226
78 %x228 = shufflevector <16 x float> %x227, <16 x float> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
79 %x229 = fadd <16 x float> %x227, %x228
80 %x230 = extractelement <16 x float> %x229, i32 0
84 define float @fhsub_16(<16 x float> %x225) {
85 ; KNL-LABEL: fhsub_16:
87 ; KNL-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
88 ; KNL-NEXT: vaddps %xmm1, %xmm0, %xmm0
89 ; KNL-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
90 ; KNL-NEXT: vsubss %xmm1, %xmm0, %xmm0
93 ; SKX-LABEL: fhsub_16:
95 ; SKX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
96 ; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0
97 ; SKX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
98 ; SKX-NEXT: vsubss %xmm1, %xmm0, %xmm0
99 ; SKX-NEXT: vzeroupper
101 %x226 = shufflevector <16 x float> %x225, <16 x float> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
102 %x227 = fadd <16 x float> %x225, %x226
103 %x228 = shufflevector <16 x float> %x227, <16 x float> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
104 %x229 = fsub <16 x float> %x227, %x228
105 %x230 = extractelement <16 x float> %x229, i32 0
109 define <16 x i32> @hadd_16_3(<16 x i32> %x225, <16 x i32> %x227) {
110 ; KNL-LABEL: hadd_16_3:
112 ; KNL-NEXT: vphaddd %ymm1, %ymm0, %ymm0
115 ; SKX-LABEL: hadd_16_3:
117 ; SKX-NEXT: vphaddd %ymm1, %ymm0, %ymm0
119 %x226 = shufflevector <16 x i32> %x225, <16 x i32> %x227, <16 x i32> <i32 0, i32 2, i32 16, i32 18
120 , i32 4, i32 6, i32 20, i32 22, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
121 %x228 = shufflevector <16 x i32> %x225, <16 x i32> %x227, <16 x i32> <i32 1, i32 3, i32 17, i32 19
122 , i32 5 , i32 7, i32 21, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
123 i32 undef, i32 undef>
124 %x229 = add <16 x i32> %x226, %x228
128 define <16 x float> @fhadd_16_3(<16 x float> %x225, <16 x float> %x227) {
129 ; KNL-LABEL: fhadd_16_3:
131 ; KNL-NEXT: vhaddps %ymm1, %ymm0, %ymm0
134 ; SKX-LABEL: fhadd_16_3:
136 ; SKX-NEXT: vhaddps %ymm1, %ymm0, %ymm0
138 %x226 = shufflevector <16 x float> %x225, <16 x float> %x227, <16 x i32> <i32 0, i32 2, i32 16, i32 18
139 , i32 4, i32 6, i32 20, i32 22, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
140 %x228 = shufflevector <16 x float> %x225, <16 x float> %x227, <16 x i32> <i32 1, i32 3, i32 17, i32 19
141 , i32 5 , i32 7, i32 21, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
142 %x229 = fadd <16 x float> %x226, %x228
143 ret <16 x float> %x229
146 define <8 x double> @fhadd_16_4(<8 x double> %x225, <8 x double> %x227) {
147 ; KNL-LABEL: fhadd_16_4:
149 ; KNL-NEXT: vhaddpd %ymm1, %ymm0, %ymm0
152 ; SKX-LABEL: fhadd_16_4:
154 ; SKX-NEXT: vhaddpd %ymm1, %ymm0, %ymm0
156 %x226 = shufflevector <8 x double> %x225, <8 x double> %x227, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 undef, i32 undef, i32 undef, i32 undef>
157 %x228 = shufflevector <8 x double> %x225, <8 x double> %x227, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 undef ,i32 undef, i32 undef, i32 undef>
158 %x229 = fadd <8 x double> %x226, %x228
159 ret <8 x double> %x229
162 define <4 x double> @fadd_noundef_low(<8 x double> %x225, <8 x double> %x227) {
163 ; KNL-LABEL: fadd_noundef_low:
165 ; KNL-NEXT: vhaddpd %ymm1, %ymm0, %ymm0
168 ; SKX-LABEL: fadd_noundef_low:
170 ; SKX-NEXT: vhaddpd %ymm1, %ymm0, %ymm0
172 %x226 = shufflevector <8 x double> %x225, <8 x double> %x227, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
173 %x228 = shufflevector <8 x double> %x225, <8 x double> %x227, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5 ,i32 13, i32 7, i32 15>
174 %x229 = fadd <8 x double> %x226, %x228
175 %x230 = shufflevector <8 x double> %x229, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
176 ret <4 x double> %x230
179 define <4 x double> @fadd_noundef_high(<8 x double> %x225, <8 x double> %x227) {
180 ; KNL-LABEL: fadd_noundef_high:
182 ; KNL-NEXT: vunpcklpd {{.*#+}} zmm2 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
183 ; KNL-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
184 ; KNL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
185 ; KNL-NEXT: vextractf64x4 $1, %zmm2, %ymm1
186 ; KNL-NEXT: vaddpd %ymm0, %ymm1, %ymm0
189 ; SKX-LABEL: fadd_noundef_high:
191 ; SKX-NEXT: vunpcklpd {{.*#+}} zmm2 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
192 ; SKX-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
193 ; SKX-NEXT: vextractf64x4 $1, %zmm0, %ymm0
194 ; SKX-NEXT: vextractf64x4 $1, %zmm2, %ymm1
195 ; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0
197 %x226 = shufflevector <8 x double> %x225, <8 x double> %x227, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
198 %x228 = shufflevector <8 x double> %x225, <8 x double> %x227, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5 ,i32 13, i32 7, i32 15>
199 %x229 = fadd <8 x double> %x226, %x228
200 %x230 = shufflevector <8 x double> %x229, <8 x double> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
201 ret <4 x double> %x230
205 define <8 x i32> @hadd_16_3_sv(<16 x i32> %x225, <16 x i32> %x227) {
206 ; KNL-LABEL: hadd_16_3_sv:
208 ; KNL-NEXT: vphaddd %ymm1, %ymm0, %ymm0
211 ; SKX-LABEL: hadd_16_3_sv:
213 ; SKX-NEXT: vphaddd %ymm1, %ymm0, %ymm0
215 %x226 = shufflevector <16 x i32> %x225, <16 x i32> %x227, <16 x i32> <i32 0, i32 2, i32 16, i32 18
216 , i32 4, i32 6, i32 20, i32 22, i32 8, i32 10, i32 24, i32 26, i32 12, i32 14, i32 28, i32 30>
217 %x228 = shufflevector <16 x i32> %x225, <16 x i32> %x227, <16 x i32> <i32 1, i32 3, i32 17, i32 19
218 , i32 5 , i32 7, i32 21, i32 23, i32 9, i32 11, i32 25, i32 27, i32 13, i32 15,
220 %x229 = add <16 x i32> %x226, %x228
221 %x230 = shufflevector <16 x i32> %x229, <16 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4 ,i32 5, i32 6, i32 7>
226 define double @fadd_noundef_eel(<8 x double> %x225, <8 x double> %x227) {
227 ; KNL-LABEL: fadd_noundef_eel:
229 ; KNL-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
230 ; KNL-NEXT: vaddsd %xmm1, %xmm0, %xmm0
233 ; SKX-LABEL: fadd_noundef_eel:
235 ; SKX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
236 ; SKX-NEXT: vaddsd %xmm1, %xmm0, %xmm0
237 ; SKX-NEXT: vzeroupper
239 %x226 = shufflevector <8 x double> %x225, <8 x double> %x227, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
240 %x228 = shufflevector <8 x double> %x225, <8 x double> %x227, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5 ,i32 13, i32 7, i32 15>
241 %x229 = fadd <8 x double> %x226, %x228
242 %x230 = extractelement <8 x double> %x229, i32 0
248 define double @fsub_noundef_ee (<8 x double> %x225, <8 x double> %x227) {
249 ; KNL-LABEL: fsub_noundef_ee:
251 ; KNL-NEXT: vextractf32x4 $2, %zmm1, %xmm0
252 ; KNL-NEXT: vmovddup {{.*#+}} xmm1 = xmm0[0,0]
253 ; KNL-NEXT: vsubpd %xmm0, %xmm1, %xmm0
254 ; KNL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
257 ; SKX-LABEL: fsub_noundef_ee:
259 ; SKX-NEXT: vextractf32x4 $2, %zmm1, %xmm0
260 ; SKX-NEXT: vmovddup {{.*#+}} xmm1 = xmm0[0,0]
261 ; SKX-NEXT: vsubpd %xmm0, %xmm1, %xmm0
262 ; SKX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
263 ; SKX-NEXT: vzeroupper
265 %x226 = shufflevector <8 x double> %x225, <8 x double> %x227, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
266 %x228 = shufflevector <8 x double> %x225, <8 x double> %x227, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5 ,i32 13, i32 7, i32 15>
267 %x229 = fsub <8 x double> %x226, %x228
268 %x230 = extractelement <8 x double> %x229, i32 5