1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ;RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=knl | FileCheck %s --check-prefixes=CHECK,KNL
3 ;RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=skx | FileCheck %s --check-prefixes=CHECK,SKX
5 define i32 @hadd_16(<16 x i32> %x225) {
8 ; KNL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
9 ; KNL-NEXT: vpaddd %xmm1, %xmm0, %xmm0
10 ; KNL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
11 ; KNL-NEXT: vpaddd %xmm1, %xmm0, %xmm0
12 ; KNL-NEXT: vmovd %xmm0, %eax
17 ; SKX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
18 ; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0
19 ; SKX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
20 ; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0
21 ; SKX-NEXT: vmovd %xmm0, %eax
22 ; SKX-NEXT: vzeroupper
24 %x226 = shufflevector <16 x i32> %x225, <16 x i32> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
25 %x227 = add <16 x i32> %x225, %x226
26 %x228 = shufflevector <16 x i32> %x227, <16 x i32> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
27 %x229 = add <16 x i32> %x227, %x228
28 %x230 = extractelement <16 x i32> %x229, i32 0
32 define i32 @hsub_16(<16 x i32> %x225) {
35 ; KNL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
36 ; KNL-NEXT: vpaddd %xmm1, %xmm0, %xmm0
37 ; KNL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
38 ; KNL-NEXT: vpsubd %xmm1, %xmm0, %xmm0
39 ; KNL-NEXT: vmovd %xmm0, %eax
44 ; SKX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
45 ; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0
46 ; SKX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
47 ; SKX-NEXT: vpsubd %xmm1, %xmm0, %xmm0
48 ; SKX-NEXT: vmovd %xmm0, %eax
49 ; SKX-NEXT: vzeroupper
51 %x226 = shufflevector <16 x i32> %x225, <16 x i32> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
52 %x227 = add <16 x i32> %x225, %x226
53 %x228 = shufflevector <16 x i32> %x227, <16 x i32> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
54 %x229 = sub <16 x i32> %x227, %x228
55 %x230 = extractelement <16 x i32> %x229, i32 0
59 define float @fhadd_16(<16 x float> %x225) {
60 ; KNL-LABEL: fhadd_16:
62 ; KNL-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
63 ; KNL-NEXT: vaddps %xmm1, %xmm0, %xmm0
64 ; KNL-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
65 ; KNL-NEXT: vaddss %xmm1, %xmm0, %xmm0
68 ; SKX-LABEL: fhadd_16:
70 ; SKX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
71 ; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0
72 ; SKX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
73 ; SKX-NEXT: vaddss %xmm1, %xmm0, %xmm0
74 ; SKX-NEXT: vzeroupper
76 %x226 = shufflevector <16 x float> %x225, <16 x float> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
77 %x227 = fadd <16 x float> %x225, %x226
78 %x228 = shufflevector <16 x float> %x227, <16 x float> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
79 %x229 = fadd <16 x float> %x227, %x228
80 %x230 = extractelement <16 x float> %x229, i32 0
84 define float @fhsub_16(<16 x float> %x225) {
85 ; KNL-LABEL: fhsub_16:
87 ; KNL-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
88 ; KNL-NEXT: vaddps %xmm1, %xmm0, %xmm0
89 ; KNL-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
90 ; KNL-NEXT: vsubss %xmm1, %xmm0, %xmm0
93 ; SKX-LABEL: fhsub_16:
95 ; SKX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
96 ; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0
97 ; SKX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
98 ; SKX-NEXT: vsubss %xmm1, %xmm0, %xmm0
99 ; SKX-NEXT: vzeroupper
101 %x226 = shufflevector <16 x float> %x225, <16 x float> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
102 %x227 = fadd <16 x float> %x225, %x226
103 %x228 = shufflevector <16 x float> %x227, <16 x float> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
104 %x229 = fsub <16 x float> %x227, %x228
105 %x230 = extractelement <16 x float> %x229, i32 0
109 define <16 x i32> @hadd_16_3(<16 x i32> %x225, <16 x i32> %x227) {
110 ; CHECK-LABEL: hadd_16_3:
112 ; CHECK-NEXT: vphaddd %ymm1, %ymm0, %ymm0
114 %x226 = shufflevector <16 x i32> %x225, <16 x i32> %x227, <16 x i32> <i32 0, i32 2, i32 16, i32 18
115 , i32 4, i32 6, i32 20, i32 22, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
116 %x228 = shufflevector <16 x i32> %x225, <16 x i32> %x227, <16 x i32> <i32 1, i32 3, i32 17, i32 19
117 , i32 5 , i32 7, i32 21, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
118 i32 undef, i32 undef>
119 %x229 = add <16 x i32> %x226, %x228
123 define <16 x float> @fhadd_16_3(<16 x float> %x225, <16 x float> %x227) {
124 ; CHECK-LABEL: fhadd_16_3:
126 ; CHECK-NEXT: vhaddps %ymm1, %ymm0, %ymm0
128 %x226 = shufflevector <16 x float> %x225, <16 x float> %x227, <16 x i32> <i32 0, i32 2, i32 16, i32 18
129 , i32 4, i32 6, i32 20, i32 22, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
130 %x228 = shufflevector <16 x float> %x225, <16 x float> %x227, <16 x i32> <i32 1, i32 3, i32 17, i32 19
131 , i32 5 , i32 7, i32 21, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
132 %x229 = fadd <16 x float> %x226, %x228
133 ret <16 x float> %x229
136 define <8 x double> @fhadd_16_4(<8 x double> %x225, <8 x double> %x227) {
137 ; CHECK-LABEL: fhadd_16_4:
139 ; CHECK-NEXT: vhaddpd %ymm1, %ymm0, %ymm0
141 %x226 = shufflevector <8 x double> %x225, <8 x double> %x227, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 undef, i32 undef, i32 undef, i32 undef>
142 %x228 = shufflevector <8 x double> %x225, <8 x double> %x227, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 undef ,i32 undef, i32 undef, i32 undef>
143 %x229 = fadd <8 x double> %x226, %x228
144 ret <8 x double> %x229
147 define <4 x double> @fadd_noundef_low(<8 x double> %x225, <8 x double> %x227) {
148 ; CHECK-LABEL: fadd_noundef_low:
150 ; CHECK-NEXT: vhaddpd %ymm1, %ymm0, %ymm0
152 %x226 = shufflevector <8 x double> %x225, <8 x double> %x227, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
153 %x228 = shufflevector <8 x double> %x225, <8 x double> %x227, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5 ,i32 13, i32 7, i32 15>
154 %x229 = fadd <8 x double> %x226, %x228
155 %x230 = shufflevector <8 x double> %x229, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
156 ret <4 x double> %x230
159 define <4 x double> @fadd_noundef_high(<8 x double> %x225, <8 x double> %x227) {
160 ; CHECK-LABEL: fadd_noundef_high:
162 ; CHECK-NEXT: vunpcklpd {{.*#+}} zmm2 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
163 ; CHECK-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
164 ; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm0
165 ; CHECK-NEXT: vextractf64x4 $1, %zmm2, %ymm1
166 ; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0
168 %x226 = shufflevector <8 x double> %x225, <8 x double> %x227, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
169 %x228 = shufflevector <8 x double> %x225, <8 x double> %x227, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5 ,i32 13, i32 7, i32 15>
170 %x229 = fadd <8 x double> %x226, %x228
171 %x230 = shufflevector <8 x double> %x229, <8 x double> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
172 ret <4 x double> %x230
176 define <8 x i32> @hadd_16_3_sv(<16 x i32> %x225, <16 x i32> %x227) {
177 ; CHECK-LABEL: hadd_16_3_sv:
179 ; CHECK-NEXT: vphaddd %ymm1, %ymm0, %ymm0
181 %x226 = shufflevector <16 x i32> %x225, <16 x i32> %x227, <16 x i32> <i32 0, i32 2, i32 16, i32 18
182 , i32 4, i32 6, i32 20, i32 22, i32 8, i32 10, i32 24, i32 26, i32 12, i32 14, i32 28, i32 30>
183 %x228 = shufflevector <16 x i32> %x225, <16 x i32> %x227, <16 x i32> <i32 1, i32 3, i32 17, i32 19
184 , i32 5 , i32 7, i32 21, i32 23, i32 9, i32 11, i32 25, i32 27, i32 13, i32 15,
186 %x229 = add <16 x i32> %x226, %x228
187 %x230 = shufflevector <16 x i32> %x229, <16 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4 ,i32 5, i32 6, i32 7>
192 define double @fadd_noundef_eel(<8 x double> %x225, <8 x double> %x227) {
193 ; KNL-LABEL: fadd_noundef_eel:
195 ; KNL-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
196 ; KNL-NEXT: vaddsd %xmm1, %xmm0, %xmm0
199 ; SKX-LABEL: fadd_noundef_eel:
201 ; SKX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
202 ; SKX-NEXT: vaddsd %xmm1, %xmm0, %xmm0
203 ; SKX-NEXT: vzeroupper
205 %x226 = shufflevector <8 x double> %x225, <8 x double> %x227, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
206 %x228 = shufflevector <8 x double> %x225, <8 x double> %x227, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5 ,i32 13, i32 7, i32 15>
207 %x229 = fadd <8 x double> %x226, %x228
208 %x230 = extractelement <8 x double> %x229, i32 0
214 define double @fsub_noundef_ee (<8 x double> %x225, <8 x double> %x227) {
215 ; KNL-LABEL: fsub_noundef_ee:
217 ; KNL-NEXT: vextractf32x4 $2, %zmm1, %xmm0
218 ; KNL-NEXT: vmovddup {{.*#+}} xmm1 = xmm0[0,0]
219 ; KNL-NEXT: vsubpd %xmm0, %xmm1, %xmm0
220 ; KNL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
223 ; SKX-LABEL: fsub_noundef_ee:
225 ; SKX-NEXT: vextractf32x4 $2, %zmm1, %xmm0
226 ; SKX-NEXT: vmovddup {{.*#+}} xmm1 = xmm0[0,0]
227 ; SKX-NEXT: vsubpd %xmm0, %xmm1, %xmm0
228 ; SKX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
229 ; SKX-NEXT: vzeroupper
231 %x226 = shufflevector <8 x double> %x225, <8 x double> %x227, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
232 %x228 = shufflevector <8 x double> %x225, <8 x double> %x227, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5 ,i32 13, i32 7, i32 15>
233 %x229 = fsub <8 x double> %x226, %x228
234 %x230 = extractelement <8 x double> %x229, i32 5