1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
9 define <8 x i32> @test_broadcasti128(<8 x i32> %a0, <4 x i32> *%a1) {
10 ; GENERIC-LABEL: test_broadcasti128:
12 ; GENERIC-NEXT: vbroadcasti128 {{.*#+}} ymm1 = mem[0,1,0,1] sched: [6:1.00]
13 ; GENERIC-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
14 ; GENERIC-NEXT: retq # sched: [1:1.00]
16 ; HASWELL-LABEL: test_broadcasti128:
18 ; HASWELL-NEXT: vbroadcasti128 {{.*#+}} ymm1 = mem[0,1,0,1] sched: [7:0.50]
19 ; HASWELL-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
20 ; HASWELL-NEXT: retq # sched: [7:1.00]
22 ; BROADWELL-LABEL: test_broadcasti128:
24 ; BROADWELL-NEXT: vbroadcasti128 {{.*#+}} ymm1 = mem[0,1,0,1] sched: [6:0.50]
25 ; BROADWELL-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
26 ; BROADWELL-NEXT: retq # sched: [7:1.00]
28 ; SKYLAKE-LABEL: test_broadcasti128:
30 ; SKYLAKE-NEXT: vbroadcasti128 {{.*#+}} ymm1 = mem[0,1,0,1] sched: [7:0.50]
31 ; SKYLAKE-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
32 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
34 ; SKX-LABEL: test_broadcasti128:
36 ; SKX-NEXT: vbroadcasti128 {{.*#+}} ymm1 = mem[0,1,0,1] sched: [7:0.50]
37 ; SKX-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
38 ; SKX-NEXT: retq # sched: [7:1.00]
40 ; ZNVER1-LABEL: test_broadcasti128:
42 ; ZNVER1-NEXT: vbroadcasti128 {{.*#+}} ymm1 = mem[0,1,0,1] sched: [8:0.50]
43 ; ZNVER1-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.25]
44 ; ZNVER1-NEXT: retq # sched: [1:0.50]
45 %1 = load <4 x i32>, <4 x i32> *%a1, align 16
46 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
47 %3 = add <8 x i32> %2, %a0
51 define <4 x double> @test_broadcastsd_ymm(<2 x double> %a0) {
52 ; GENERIC-LABEL: test_broadcastsd_ymm:
54 ; GENERIC-NEXT: vbroadcastsd %xmm0, %ymm0 # sched: [1:1.00]
55 ; GENERIC-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
56 ; GENERIC-NEXT: retq # sched: [1:1.00]
58 ; HASWELL-LABEL: test_broadcastsd_ymm:
60 ; HASWELL-NEXT: vbroadcastsd %xmm0, %ymm0 # sched: [3:1.00]
61 ; HASWELL-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
62 ; HASWELL-NEXT: retq # sched: [7:1.00]
64 ; BROADWELL-LABEL: test_broadcastsd_ymm:
66 ; BROADWELL-NEXT: vbroadcastsd %xmm0, %ymm0 # sched: [3:1.00]
67 ; BROADWELL-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
68 ; BROADWELL-NEXT: retq # sched: [7:1.00]
70 ; SKYLAKE-LABEL: test_broadcastsd_ymm:
72 ; SKYLAKE-NEXT: vbroadcastsd %xmm0, %ymm0 # sched: [3:1.00]
73 ; SKYLAKE-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [4:0.50]
74 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
76 ; SKX-LABEL: test_broadcastsd_ymm:
78 ; SKX-NEXT: vbroadcastsd %xmm0, %ymm0 # sched: [3:1.00]
79 ; SKX-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [4:0.50]
80 ; SKX-NEXT: retq # sched: [7:1.00]
82 ; ZNVER1-LABEL: test_broadcastsd_ymm:
84 ; ZNVER1-NEXT: vbroadcastsd %xmm0, %ymm0 # sched: [100:0.25]
85 ; ZNVER1-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
86 ; ZNVER1-NEXT: retq # sched: [1:0.50]
87 %1 = shufflevector <2 x double> %a0, <2 x double> undef, <4 x i32> zeroinitializer
88 %2 = fadd <4 x double> %1, %1
92 define <4 x float> @test_broadcastss(<4 x float> %a0) {
93 ; GENERIC-LABEL: test_broadcastss:
95 ; GENERIC-NEXT: vbroadcastss %xmm0, %xmm0 # sched: [1:1.00]
96 ; GENERIC-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
97 ; GENERIC-NEXT: retq # sched: [1:1.00]
99 ; HASWELL-LABEL: test_broadcastss:
101 ; HASWELL-NEXT: vbroadcastss %xmm0, %xmm0 # sched: [1:1.00]
102 ; HASWELL-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
103 ; HASWELL-NEXT: retq # sched: [7:1.00]
105 ; BROADWELL-LABEL: test_broadcastss:
106 ; BROADWELL: # %bb.0:
107 ; BROADWELL-NEXT: vbroadcastss %xmm0, %xmm0 # sched: [1:1.00]
108 ; BROADWELL-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
109 ; BROADWELL-NEXT: retq # sched: [7:1.00]
111 ; SKYLAKE-LABEL: test_broadcastss:
113 ; SKYLAKE-NEXT: vbroadcastss %xmm0, %xmm0 # sched: [1:1.00]
114 ; SKYLAKE-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [4:0.50]
115 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
117 ; SKX-LABEL: test_broadcastss:
119 ; SKX-NEXT: vbroadcastss %xmm0, %xmm0 # sched: [1:1.00]
120 ; SKX-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [4:0.50]
121 ; SKX-NEXT: retq # sched: [7:1.00]
123 ; ZNVER1-LABEL: test_broadcastss:
125 ; ZNVER1-NEXT: vbroadcastss %xmm0, %xmm0 # sched: [1:0.50]
126 ; ZNVER1-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
127 ; ZNVER1-NEXT: retq # sched: [1:0.50]
128 %1 = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32> zeroinitializer
129 %2 = fadd <4 x float> %1, %1
133 define <8 x float> @test_broadcastss_ymm(<4 x float> %a0) {
134 ; GENERIC-LABEL: test_broadcastss_ymm:
136 ; GENERIC-NEXT: vbroadcastss %xmm0, %ymm0 # sched: [1:1.00]
137 ; GENERIC-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
138 ; GENERIC-NEXT: retq # sched: [1:1.00]
140 ; HASWELL-LABEL: test_broadcastss_ymm:
142 ; HASWELL-NEXT: vbroadcastss %xmm0, %ymm0 # sched: [3:1.00]
143 ; HASWELL-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
144 ; HASWELL-NEXT: retq # sched: [7:1.00]
146 ; BROADWELL-LABEL: test_broadcastss_ymm:
147 ; BROADWELL: # %bb.0:
148 ; BROADWELL-NEXT: vbroadcastss %xmm0, %ymm0 # sched: [3:1.00]
149 ; BROADWELL-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
150 ; BROADWELL-NEXT: retq # sched: [7:1.00]
152 ; SKYLAKE-LABEL: test_broadcastss_ymm:
154 ; SKYLAKE-NEXT: vbroadcastss %xmm0, %ymm0 # sched: [3:1.00]
155 ; SKYLAKE-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [4:0.50]
156 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
158 ; SKX-LABEL: test_broadcastss_ymm:
160 ; SKX-NEXT: vbroadcastss %xmm0, %ymm0 # sched: [3:1.00]
161 ; SKX-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [4:0.50]
162 ; SKX-NEXT: retq # sched: [7:1.00]
164 ; ZNVER1-LABEL: test_broadcastss_ymm:
166 ; ZNVER1-NEXT: vbroadcastss %xmm0, %ymm0 # sched: [100:0.25]
167 ; ZNVER1-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
168 ; ZNVER1-NEXT: retq # sched: [1:0.50]
169 %1 = shufflevector <4 x float> %a0, <4 x float> undef, <8 x i32> zeroinitializer
170 %2 = fadd <8 x float> %1, %1
174 define <4 x i32> @test_extracti128(<8 x i16> %a0, <4 x i32> *%a1) {
175 ; GENERIC-LABEL: test_extracti128:
177 ; GENERIC-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00]
178 ; GENERIC-NEXT: vextracti128 $1, %ymm1, %xmm0 # sched: [1:1.00]
179 ; GENERIC-NEXT: vextracti128 $1, %ymm1, (%rdi) # sched: [1:1.00]
180 ; GENERIC-NEXT: vzeroupper # sched: [1:1.00]
181 ; GENERIC-NEXT: retq # sched: [1:1.00]
183 ; HASWELL-LABEL: test_extracti128:
185 ; HASWELL-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00]
186 ; HASWELL-NEXT: vextracti128 $1, %ymm1, %xmm0 # sched: [3:1.00]
187 ; HASWELL-NEXT: vextracti128 $1, %ymm1, (%rdi) # sched: [1:1.00]
188 ; HASWELL-NEXT: vzeroupper # sched: [0:1.00]
189 ; HASWELL-NEXT: retq # sched: [7:1.00]
191 ; BROADWELL-LABEL: test_extracti128:
192 ; BROADWELL: # %bb.0:
193 ; BROADWELL-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00]
194 ; BROADWELL-NEXT: vextracti128 $1, %ymm1, %xmm0 # sched: [3:1.00]
195 ; BROADWELL-NEXT: vextracti128 $1, %ymm1, (%rdi) # sched: [1:1.00]
196 ; BROADWELL-NEXT: vzeroupper # sched: [0:1.00]
197 ; BROADWELL-NEXT: retq # sched: [7:1.00]
199 ; SKYLAKE-LABEL: test_extracti128:
201 ; SKYLAKE-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00]
202 ; SKYLAKE-NEXT: vextracti128 $1, %ymm1, %xmm0 # sched: [3:1.00]
203 ; SKYLAKE-NEXT: vextracti128 $1, %ymm1, (%rdi) # sched: [1:1.00]
204 ; SKYLAKE-NEXT: vzeroupper # sched: [0:0.67]
205 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
207 ; SKX-LABEL: test_extracti128:
209 ; SKX-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00]
210 ; SKX-NEXT: vextracti128 $1, %ymm1, %xmm0 # sched: [3:1.00]
211 ; SKX-NEXT: vextracti128 $1, %ymm1, (%rdi) # sched: [1:1.00]
212 ; SKX-NEXT: vzeroupper # sched: [0:0.67]
213 ; SKX-NEXT: retq # sched: [7:1.00]
215 ; ZNVER1-LABEL: test_extracti128:
217 ; ZNVER1-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:0.50]
218 ; ZNVER1-NEXT: vextracti128 $1, %ymm1, %xmm0 # sched: [2:0.25]
219 ; ZNVER1-NEXT: vextracti128 $1, %ymm1, (%rdi) # sched: [1:0.50]
220 ; ZNVER1-NEXT: vzeroupper # sched: [100:0.25]
221 ; ZNVER1-NEXT: retq # sched: [1:0.50]
222 %z = zext <8 x i16> %a0 to <8 x i32>
223 %ext = shufflevector <8 x i32> %z, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
224 store <4 x i32> %ext, <4 x i32> *%a1
228 define <2 x double> @test_gatherdpd(<2 x double> %a0, i8* %a1, <4 x i32> %a2, <2 x double> %a3) {
229 ; GENERIC-LABEL: test_gatherdpd:
231 ; GENERIC-NEXT: vgatherdpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [5:0.50]
232 ; GENERIC-NEXT: retq # sched: [1:1.00]
234 ; HASWELL-LABEL: test_gatherdpd:
236 ; HASWELL-NEXT: vgatherdpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [26:2.67]
237 ; HASWELL-NEXT: retq # sched: [7:1.00]
239 ; BROADWELL-LABEL: test_gatherdpd:
240 ; BROADWELL: # %bb.0:
241 ; BROADWELL-NEXT: vgatherdpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [25:3.00]
242 ; BROADWELL-NEXT: retq # sched: [7:1.00]
244 ; SKYLAKE-LABEL: test_gatherdpd:
246 ; SKYLAKE-NEXT: vgatherdpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
247 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
249 ; SKX-LABEL: test_gatherdpd:
251 ; SKX-NEXT: vgatherdpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
252 ; SKX-NEXT: retq # sched: [7:1.00]
254 ; ZNVER1-LABEL: test_gatherdpd:
256 ; ZNVER1-NEXT: vgatherdpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [100:0.25]
257 ; ZNVER1-NEXT: retq # sched: [1:0.50]
258 %1 = call <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double> %a0, i8* %a1, <4 x i32> %a2, <2 x double> %a3, i8 2)
261 declare <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double>, i8*, <4 x i32>, <2 x double>, i8) nounwind readonly
263 define <4 x double> @test_gatherdpd_ymm(<4 x double> %a0, i8* %a1, <4 x i32> %a2, <4 x double> %a3) {
264 ; GENERIC-LABEL: test_gatherdpd_ymm:
266 ; GENERIC-NEXT: vgatherdpd %ymm2, (%rdi,%xmm1,8), %ymm0 # sched: [5:0.50]
267 ; GENERIC-NEXT: retq # sched: [1:1.00]
269 ; HASWELL-LABEL: test_gatherdpd_ymm:
271 ; HASWELL-NEXT: vgatherdpd %ymm2, (%rdi,%xmm1,8), %ymm0 # sched: [27:4.00]
272 ; HASWELL-NEXT: retq # sched: [7:1.00]
274 ; BROADWELL-LABEL: test_gatherdpd_ymm:
275 ; BROADWELL: # %bb.0:
276 ; BROADWELL-NEXT: vgatherdpd %ymm2, (%rdi,%xmm1,8), %ymm0 # sched: [26:5.00]
277 ; BROADWELL-NEXT: retq # sched: [7:1.00]
279 ; SKYLAKE-LABEL: test_gatherdpd_ymm:
281 ; SKYLAKE-NEXT: vgatherdpd %ymm2, (%rdi,%xmm1,8), %ymm0 # sched: [25:1.00]
282 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
284 ; SKX-LABEL: test_gatherdpd_ymm:
286 ; SKX-NEXT: vgatherdpd %ymm2, (%rdi,%xmm1,8), %ymm0 # sched: [25:1.00]
287 ; SKX-NEXT: retq # sched: [7:1.00]
289 ; ZNVER1-LABEL: test_gatherdpd_ymm:
291 ; ZNVER1-NEXT: vgatherdpd %ymm2, (%rdi,%xmm1,8), %ymm0 # sched: [100:0.25]
292 ; ZNVER1-NEXT: retq # sched: [1:0.50]
293 %1 = call <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double> %a0, i8* %a1, <4 x i32> %a2, <4 x double> %a3, i8 8)
296 declare <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double>, i8*, <4 x i32>, <4 x double>, i8) nounwind readonly
298 define <4 x float> @test_gatherdps(<4 x float> %a0, i8* %a1, <4 x i32> %a2, <4 x float> %a3) {
299 ; GENERIC-LABEL: test_gatherdps:
301 ; GENERIC-NEXT: vgatherdps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [5:0.50]
302 ; GENERIC-NEXT: retq # sched: [1:1.00]
304 ; HASWELL-LABEL: test_gatherdps:
306 ; HASWELL-NEXT: vgatherdps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [25:3.67]
307 ; HASWELL-NEXT: retq # sched: [7:1.00]
309 ; BROADWELL-LABEL: test_gatherdps:
310 ; BROADWELL: # %bb.0:
311 ; BROADWELL-NEXT: vgatherdps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [25:3.00]
312 ; BROADWELL-NEXT: retq # sched: [7:1.00]
314 ; SKYLAKE-LABEL: test_gatherdps:
316 ; SKYLAKE-NEXT: vgatherdps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
317 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
319 ; SKX-LABEL: test_gatherdps:
321 ; SKX-NEXT: vgatherdps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
322 ; SKX-NEXT: retq # sched: [7:1.00]
324 ; ZNVER1-LABEL: test_gatherdps:
326 ; ZNVER1-NEXT: vgatherdps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [100:0.25]
327 ; ZNVER1-NEXT: retq # sched: [1:0.50]
328 %1 = call <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float> %a0, i8* %a1, <4 x i32> %a2, <4 x float> %a3, i8 2)
331 declare <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float>, i8*, <4 x i32>, <4 x float>, i8) nounwind readonly
333 define <8 x float> @test_gatherdps_ymm(<8 x float> %a0, i8* %a1, <8 x i32> %a2, <8 x float> %a3) {
334 ; GENERIC-LABEL: test_gatherdps_ymm:
336 ; GENERIC-NEXT: vgatherdps %ymm2, (%rdi,%ymm1,4), %ymm0 # sched: [5:0.50]
337 ; GENERIC-NEXT: retq # sched: [1:1.00]
339 ; HASWELL-LABEL: test_gatherdps_ymm:
341 ; HASWELL-NEXT: vgatherdps %ymm2, (%rdi,%ymm1,4), %ymm0 # sched: [27:6.50]
342 ; HASWELL-NEXT: retq # sched: [7:1.00]
344 ; BROADWELL-LABEL: test_gatherdps_ymm:
345 ; BROADWELL: # %bb.0:
346 ; BROADWELL-NEXT: vgatherdps %ymm2, (%rdi,%ymm1,4), %ymm0 # sched: [26:4.00]
347 ; BROADWELL-NEXT: retq # sched: [7:1.00]
349 ; SKYLAKE-LABEL: test_gatherdps_ymm:
351 ; SKYLAKE-NEXT: vgatherdps %ymm2, (%rdi,%ymm1,4), %ymm0 # sched: [25:1.00]
352 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
354 ; SKX-LABEL: test_gatherdps_ymm:
356 ; SKX-NEXT: vgatherdps %ymm2, (%rdi,%ymm1,4), %ymm0 # sched: [25:1.00]
357 ; SKX-NEXT: retq # sched: [7:1.00]
359 ; ZNVER1-LABEL: test_gatherdps_ymm:
361 ; ZNVER1-NEXT: vgatherdps %ymm2, (%rdi,%ymm1,4), %ymm0 # sched: [100:0.25]
362 ; ZNVER1-NEXT: retq # sched: [1:0.50]
363 %1 = call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> %a0, i8* %a1, <8 x i32> %a2, <8 x float> %a3, i8 4)
366 declare <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float>, i8*, <8 x i32>, <8 x float>, i8) nounwind readonly
368 define <2 x double> @test_gatherqpd(<2 x double> %a0, i8* %a1, <2 x i64> %a2, <2 x double> %a3) {
369 ; GENERIC-LABEL: test_gatherqpd:
371 ; GENERIC-NEXT: vgatherqpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [5:0.50]
372 ; GENERIC-NEXT: retq # sched: [1:1.00]
374 ; HASWELL-LABEL: test_gatherqpd:
376 ; HASWELL-NEXT: vgatherqpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [23:3.33]
377 ; HASWELL-NEXT: retq # sched: [7:1.00]
379 ; BROADWELL-LABEL: test_gatherqpd:
380 ; BROADWELL: # %bb.0:
381 ; BROADWELL-NEXT: vgatherqpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:3.00]
382 ; BROADWELL-NEXT: retq # sched: [7:1.00]
384 ; SKYLAKE-LABEL: test_gatherqpd:
386 ; SKYLAKE-NEXT: vgatherqpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
387 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
389 ; SKX-LABEL: test_gatherqpd:
391 ; SKX-NEXT: vgatherqpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
392 ; SKX-NEXT: retq # sched: [7:1.00]
394 ; ZNVER1-LABEL: test_gatherqpd:
396 ; ZNVER1-NEXT: vgatherqpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [100:0.25]
397 ; ZNVER1-NEXT: retq # sched: [1:0.50]
398 %1 = call <2 x double> @llvm.x86.avx2.gather.q.pd(<2 x double> %a0, i8* %a1, <2 x i64> %a2, <2 x double> %a3, i8 2)
401 declare <2 x double> @llvm.x86.avx2.gather.q.pd(<2 x double>, i8*, <2 x i64>, <2 x double>, i8) nounwind readonly
403 define <4 x double> @test_gatherqpd_ymm(<4 x double> %a0, i8* %a1, <4 x i64> %a2, <4 x double> %a3) {
404 ; GENERIC-LABEL: test_gatherqpd_ymm:
406 ; GENERIC-NEXT: vgatherqpd %ymm2, (%rdi,%ymm1,8), %ymm0 # sched: [5:0.50]
407 ; GENERIC-NEXT: retq # sched: [1:1.00]
409 ; HASWELL-LABEL: test_gatherqpd_ymm:
411 ; HASWELL-NEXT: vgatherqpd %ymm2, (%rdi,%ymm1,8), %ymm0 # sched: [24:5.00]
412 ; HASWELL-NEXT: retq # sched: [7:1.00]
414 ; BROADWELL-LABEL: test_gatherqpd_ymm:
415 ; BROADWELL: # %bb.0:
416 ; BROADWELL-NEXT: vgatherqpd %ymm2, (%rdi,%ymm1,8), %ymm0 # sched: [23:3.00]
417 ; BROADWELL-NEXT: retq # sched: [7:1.00]
419 ; SKYLAKE-LABEL: test_gatherqpd_ymm:
421 ; SKYLAKE-NEXT: vgatherqpd %ymm2, (%rdi,%ymm1,8), %ymm0 # sched: [25:1.00]
422 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
424 ; SKX-LABEL: test_gatherqpd_ymm:
426 ; SKX-NEXT: vgatherqpd %ymm2, (%rdi,%ymm1,8), %ymm0 # sched: [25:1.00]
427 ; SKX-NEXT: retq # sched: [7:1.00]
429 ; ZNVER1-LABEL: test_gatherqpd_ymm:
431 ; ZNVER1-NEXT: vgatherqpd %ymm2, (%rdi,%ymm1,8), %ymm0 # sched: [100:0.25]
432 ; ZNVER1-NEXT: retq # sched: [1:0.50]
433 %1 = call <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double> %a0, i8* %a1, <4 x i64> %a2, <4 x double> %a3, i8 8)
436 declare <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double>, i8*, <4 x i64>, <4 x double>, i8) nounwind readonly
438 define <4 x float> @test_gatherqps(<4 x float> %a0, i8* %a1, <2 x i64> %a2, <4 x float> %a3) {
439 ; GENERIC-LABEL: test_gatherqps:
441 ; GENERIC-NEXT: vgatherqps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [5:0.50]
442 ; GENERIC-NEXT: retq # sched: [1:1.00]
444 ; HASWELL-LABEL: test_gatherqps:
446 ; HASWELL-NEXT: vgatherqps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [25:3.67]
447 ; HASWELL-NEXT: retq # sched: [7:1.00]
449 ; BROADWELL-LABEL: test_gatherqps:
450 ; BROADWELL: # %bb.0:
451 ; BROADWELL-NEXT: vgatherqps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [27:5.00]
452 ; BROADWELL-NEXT: retq # sched: [7:1.00]
454 ; SKYLAKE-LABEL: test_gatherqps:
456 ; SKYLAKE-NEXT: vgatherqps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
457 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
459 ; SKX-LABEL: test_gatherqps:
461 ; SKX-NEXT: vgatherqps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
462 ; SKX-NEXT: retq # sched: [7:1.00]
464 ; ZNVER1-LABEL: test_gatherqps:
466 ; ZNVER1-NEXT: vgatherqps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [100:0.25]
467 ; ZNVER1-NEXT: retq # sched: [1:0.50]
468 %1 = call <4 x float> @llvm.x86.avx2.gather.q.ps(<4 x float> %a0, i8* %a1, <2 x i64> %a2, <4 x float> %a3, i8 2)
471 declare <4 x float> @llvm.x86.avx2.gather.q.ps(<4 x float>, i8*, <2 x i64>, <4 x float>, i8) nounwind readonly
473 define <4 x float> @test_gatherqps_ymm(<4 x float> %a0, i8* %a1, <4 x i64> %a2, <4 x float> %a3) {
474 ; GENERIC-LABEL: test_gatherqps_ymm:
476 ; GENERIC-NEXT: vgatherqps %xmm2, (%rdi,%ymm1,4), %xmm0 # sched: [5:0.50]
477 ; GENERIC-NEXT: vzeroupper # sched: [1:1.00]
478 ; GENERIC-NEXT: retq # sched: [1:1.00]
480 ; HASWELL-LABEL: test_gatherqps_ymm:
482 ; HASWELL-NEXT: vgatherqps %xmm2, (%rdi,%ymm1,4), %xmm0 # sched: [28:3.67]
483 ; HASWELL-NEXT: vzeroupper # sched: [0:1.00]
484 ; HASWELL-NEXT: retq # sched: [7:1.00]
486 ; BROADWELL-LABEL: test_gatherqps_ymm:
487 ; BROADWELL: # %bb.0:
488 ; BROADWELL-NEXT: vgatherqps %xmm2, (%rdi,%ymm1,4), %xmm0 # sched: [24:5.00]
489 ; BROADWELL-NEXT: vzeroupper # sched: [0:1.00]
490 ; BROADWELL-NEXT: retq # sched: [7:1.00]
492 ; SKYLAKE-LABEL: test_gatherqps_ymm:
494 ; SKYLAKE-NEXT: vgatherqps %xmm2, (%rdi,%ymm1,4), %xmm0 # sched: [25:1.00]
495 ; SKYLAKE-NEXT: vzeroupper # sched: [0:0.67]
496 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
498 ; SKX-LABEL: test_gatherqps_ymm:
500 ; SKX-NEXT: vgatherqps %xmm2, (%rdi,%ymm1,4), %xmm0 # sched: [25:1.00]
501 ; SKX-NEXT: vzeroupper # sched: [0:0.67]
502 ; SKX-NEXT: retq # sched: [7:1.00]
504 ; ZNVER1-LABEL: test_gatherqps_ymm:
506 ; ZNVER1-NEXT: vgatherqps %xmm2, (%rdi,%ymm1,4), %xmm0 # sched: [100:0.25]
507 ; ZNVER1-NEXT: vzeroupper # sched: [100:0.25]
508 ; ZNVER1-NEXT: retq # sched: [1:0.50]
509 %1 = call <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float> %a0, i8* %a1, <4 x i64> %a2, <4 x float> %a3, i8 4)
512 declare <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float>, i8*, <4 x i64>, <4 x float>, i8) nounwind readonly
514 define <8 x i32> @test_inserti128(<8 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
515 ; GENERIC-LABEL: test_inserti128:
517 ; GENERIC-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 # sched: [1:1.00]
518 ; GENERIC-NEXT: vinserti128 $1, (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
519 ; GENERIC-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
520 ; GENERIC-NEXT: retq # sched: [1:1.00]
522 ; HASWELL-LABEL: test_inserti128:
524 ; HASWELL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 # sched: [3:1.00]
525 ; HASWELL-NEXT: vinserti128 $1, (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
526 ; HASWELL-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
527 ; HASWELL-NEXT: retq # sched: [7:1.00]
529 ; BROADWELL-LABEL: test_inserti128:
530 ; BROADWELL: # %bb.0:
531 ; BROADWELL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 # sched: [3:1.00]
532 ; BROADWELL-NEXT: vinserti128 $1, (%rdi), %ymm0, %ymm0 # sched: [6:0.50]
533 ; BROADWELL-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
534 ; BROADWELL-NEXT: retq # sched: [7:1.00]
536 ; SKYLAKE-LABEL: test_inserti128:
538 ; SKYLAKE-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 # sched: [3:1.00]
539 ; SKYLAKE-NEXT: vinserti128 $1, (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
540 ; SKYLAKE-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
541 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
543 ; SKX-LABEL: test_inserti128:
545 ; SKX-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 # sched: [3:1.00]
546 ; SKX-NEXT: vinserti128 $1, (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
547 ; SKX-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
548 ; SKX-NEXT: retq # sched: [7:1.00]
550 ; ZNVER1-LABEL: test_inserti128:
552 ; ZNVER1-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 # sched: [2:0.25]
553 ; ZNVER1-NEXT: vinserti128 $1, (%rdi), %ymm0, %ymm0 # sched: [9:0.50]
554 ; ZNVER1-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.25]
555 ; ZNVER1-NEXT: retq # sched: [1:0.50]
556 %1 = shufflevector <4 x i32> %a1, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
557 %2 = shufflevector <8 x i32> %a0, <8 x i32> %1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
558 %3 = load <4 x i32>, <4 x i32> *%a2, align 16
559 %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
560 %5 = shufflevector <8 x i32> %a0, <8 x i32> %4, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
561 %6 = add <8 x i32> %2, %5
565 define <4 x i64> @test_movntdqa(i8* %a0) {
566 ; GENERIC-LABEL: test_movntdqa:
568 ; GENERIC-NEXT: vmovntdqa (%rdi), %ymm0 # sched: [7:0.50]
569 ; GENERIC-NEXT: retq # sched: [1:1.00]
571 ; HASWELL-LABEL: test_movntdqa:
573 ; HASWELL-NEXT: vmovntdqa (%rdi), %ymm0 # sched: [7:0.50]
574 ; HASWELL-NEXT: retq # sched: [7:1.00]
576 ; BROADWELL-LABEL: test_movntdqa:
577 ; BROADWELL: # %bb.0:
578 ; BROADWELL-NEXT: vmovntdqa (%rdi), %ymm0 # sched: [6:0.50]
579 ; BROADWELL-NEXT: retq # sched: [7:1.00]
581 ; SKYLAKE-LABEL: test_movntdqa:
583 ; SKYLAKE-NEXT: vmovntdqa (%rdi), %ymm0 # sched: [7:0.50]
584 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
586 ; SKX-LABEL: test_movntdqa:
588 ; SKX-NEXT: vmovntdqa (%rdi), %ymm0 # sched: [7:0.50]
589 ; SKX-NEXT: retq # sched: [7:1.00]
591 ; ZNVER1-LABEL: test_movntdqa:
593 ; ZNVER1-NEXT: vmovntdqa (%rdi), %ymm0 # sched: [8:0.50]
594 ; ZNVER1-NEXT: retq # sched: [1:0.50]
595 %1 = call <4 x i64> @llvm.x86.avx2.movntdqa(i8* %a0)
598 declare <4 x i64> @llvm.x86.avx2.movntdqa(i8*) nounwind readonly
600 define <16 x i16> @test_mpsadbw(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
601 ; GENERIC-LABEL: test_mpsadbw:
603 ; GENERIC-NEXT: vmpsadbw $7, %ymm1, %ymm0, %ymm0 # sched: [7:1.00]
604 ; GENERIC-NEXT: vmpsadbw $7, (%rdi), %ymm0, %ymm0 # sched: [14:1.00]
605 ; GENERIC-NEXT: retq # sched: [1:1.00]
607 ; HASWELL-LABEL: test_mpsadbw:
609 ; HASWELL-NEXT: vmpsadbw $7, %ymm1, %ymm0, %ymm0 # sched: [7:2.00]
610 ; HASWELL-NEXT: vmpsadbw $7, (%rdi), %ymm0, %ymm0 # sched: [14:2.00]
611 ; HASWELL-NEXT: retq # sched: [7:1.00]
613 ; BROADWELL-LABEL: test_mpsadbw:
614 ; BROADWELL: # %bb.0:
615 ; BROADWELL-NEXT: vmpsadbw $7, %ymm1, %ymm0, %ymm0 # sched: [7:2.00]
616 ; BROADWELL-NEXT: vmpsadbw $7, (%rdi), %ymm0, %ymm0 # sched: [13:2.00]
617 ; BROADWELL-NEXT: retq # sched: [7:1.00]
619 ; SKYLAKE-LABEL: test_mpsadbw:
621 ; SKYLAKE-NEXT: vmpsadbw $7, %ymm1, %ymm0, %ymm0 # sched: [4:2.00]
622 ; SKYLAKE-NEXT: vmpsadbw $7, (%rdi), %ymm0, %ymm0 # sched: [11:2.00]
623 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
625 ; SKX-LABEL: test_mpsadbw:
627 ; SKX-NEXT: vmpsadbw $7, %ymm1, %ymm0, %ymm0 # sched: [4:2.00]
628 ; SKX-NEXT: vmpsadbw $7, (%rdi), %ymm0, %ymm0 # sched: [11:2.00]
629 ; SKX-NEXT: retq # sched: [7:1.00]
631 ; ZNVER1-LABEL: test_mpsadbw:
633 ; ZNVER1-NEXT: vmpsadbw $7, %ymm1, %ymm0, %ymm0 # sched: [100:0.25]
634 ; ZNVER1-NEXT: vmpsadbw $7, (%rdi), %ymm0, %ymm0 # sched: [100:0.25]
635 ; ZNVER1-NEXT: retq # sched: [1:0.50]
636 %1 = call <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8> %a0, <32 x i8> %a1, i8 7)
637 %2 = bitcast <16 x i16> %1 to <32 x i8>
638 %3 = load <32 x i8>, <32 x i8> *%a2, align 32
639 %4 = call <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8> %2, <32 x i8> %3, i8 7)
642 declare <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8>, <32 x i8>, i8) nounwind readnone
644 define <32 x i8> @test_pabsb(<32 x i8> %a0, <32 x i8> *%a1) {
645 ; GENERIC-LABEL: test_pabsb:
647 ; GENERIC-NEXT: vpabsb %ymm0, %ymm0 # sched: [1:0.50]
648 ; GENERIC-NEXT: vpabsb (%rdi), %ymm1 # sched: [8:0.50]
649 ; GENERIC-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
650 ; GENERIC-NEXT: retq # sched: [1:1.00]
652 ; HASWELL-LABEL: test_pabsb:
654 ; HASWELL-NEXT: vpabsb %ymm0, %ymm0 # sched: [1:0.50]
655 ; HASWELL-NEXT: vpabsb (%rdi), %ymm1 # sched: [8:0.50]
656 ; HASWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
657 ; HASWELL-NEXT: retq # sched: [7:1.00]
659 ; BROADWELL-LABEL: test_pabsb:
660 ; BROADWELL: # %bb.0:
661 ; BROADWELL-NEXT: vpabsb %ymm0, %ymm0 # sched: [1:0.50]
662 ; BROADWELL-NEXT: vpabsb (%rdi), %ymm1 # sched: [7:0.50]
663 ; BROADWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
664 ; BROADWELL-NEXT: retq # sched: [7:1.00]
666 ; SKYLAKE-LABEL: test_pabsb:
668 ; SKYLAKE-NEXT: vpabsb %ymm0, %ymm0 # sched: [1:0.50]
669 ; SKYLAKE-NEXT: vpabsb (%rdi), %ymm1 # sched: [8:0.50]
670 ; SKYLAKE-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
671 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
673 ; SKX-LABEL: test_pabsb:
675 ; SKX-NEXT: vpabsb %ymm0, %ymm0 # sched: [1:0.50]
676 ; SKX-NEXT: vpabsb (%rdi), %ymm1 # sched: [8:0.50]
677 ; SKX-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
678 ; SKX-NEXT: retq # sched: [7:1.00]
680 ; ZNVER1-LABEL: test_pabsb:
682 ; ZNVER1-NEXT: vpabsb (%rdi), %ymm1 # sched: [8:0.50]
683 ; ZNVER1-NEXT: vpabsb %ymm0, %ymm0 # sched: [1:0.25]
684 ; ZNVER1-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
685 ; ZNVER1-NEXT: retq # sched: [1:0.50]
686 %1 = call <32 x i8> @llvm.x86.avx2.pabs.b(<32 x i8> %a0)
687 %2 = load <32 x i8>, <32 x i8> *%a1, align 32
688 %3 = call <32 x i8> @llvm.x86.avx2.pabs.b(<32 x i8> %2)
689 %4 = or <32 x i8> %1, %3
692 declare <32 x i8> @llvm.x86.avx2.pabs.b(<32 x i8>) nounwind readnone
694 define <8 x i32> @test_pabsd(<8 x i32> %a0, <8 x i32> *%a1) {
695 ; GENERIC-LABEL: test_pabsd:
697 ; GENERIC-NEXT: vpabsd %ymm0, %ymm0 # sched: [1:0.50]
698 ; GENERIC-NEXT: vpabsd (%rdi), %ymm1 # sched: [8:0.50]
699 ; GENERIC-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
700 ; GENERIC-NEXT: retq # sched: [1:1.00]
702 ; HASWELL-LABEL: test_pabsd:
704 ; HASWELL-NEXT: vpabsd %ymm0, %ymm0 # sched: [1:0.50]
705 ; HASWELL-NEXT: vpabsd (%rdi), %ymm1 # sched: [8:0.50]
706 ; HASWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
707 ; HASWELL-NEXT: retq # sched: [7:1.00]
709 ; BROADWELL-LABEL: test_pabsd:
710 ; BROADWELL: # %bb.0:
711 ; BROADWELL-NEXT: vpabsd %ymm0, %ymm0 # sched: [1:0.50]
712 ; BROADWELL-NEXT: vpabsd (%rdi), %ymm1 # sched: [7:0.50]
713 ; BROADWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
714 ; BROADWELL-NEXT: retq # sched: [7:1.00]
716 ; SKYLAKE-LABEL: test_pabsd:
718 ; SKYLAKE-NEXT: vpabsd %ymm0, %ymm0 # sched: [1:0.50]
719 ; SKYLAKE-NEXT: vpabsd (%rdi), %ymm1 # sched: [8:0.50]
720 ; SKYLAKE-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
721 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
723 ; SKX-LABEL: test_pabsd:
725 ; SKX-NEXT: vpabsd %ymm0, %ymm0 # sched: [1:0.50]
726 ; SKX-NEXT: vpabsd (%rdi), %ymm1 # sched: [8:0.50]
727 ; SKX-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
728 ; SKX-NEXT: retq # sched: [7:1.00]
730 ; ZNVER1-LABEL: test_pabsd:
732 ; ZNVER1-NEXT: vpabsd (%rdi), %ymm1 # sched: [8:0.50]
733 ; ZNVER1-NEXT: vpabsd %ymm0, %ymm0 # sched: [1:0.25]
734 ; ZNVER1-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
735 ; ZNVER1-NEXT: retq # sched: [1:0.50]
736 %1 = call <8 x i32> @llvm.x86.avx2.pabs.d(<8 x i32> %a0)
737 %2 = load <8 x i32>, <8 x i32> *%a1, align 32
738 %3 = call <8 x i32> @llvm.x86.avx2.pabs.d(<8 x i32> %2)
739 %4 = or <8 x i32> %1, %3
742 declare <8 x i32> @llvm.x86.avx2.pabs.d(<8 x i32>) nounwind readnone
744 define <16 x i16> @test_pabsw(<16 x i16> %a0, <16 x i16> *%a1) {
745 ; GENERIC-LABEL: test_pabsw:
747 ; GENERIC-NEXT: vpabsw %ymm0, %ymm0 # sched: [1:0.50]
748 ; GENERIC-NEXT: vpabsw (%rdi), %ymm1 # sched: [8:0.50]
749 ; GENERIC-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
750 ; GENERIC-NEXT: retq # sched: [1:1.00]
752 ; HASWELL-LABEL: test_pabsw:
754 ; HASWELL-NEXT: vpabsw %ymm0, %ymm0 # sched: [1:0.50]
755 ; HASWELL-NEXT: vpabsw (%rdi), %ymm1 # sched: [8:0.50]
756 ; HASWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
757 ; HASWELL-NEXT: retq # sched: [7:1.00]
759 ; BROADWELL-LABEL: test_pabsw:
760 ; BROADWELL: # %bb.0:
761 ; BROADWELL-NEXT: vpabsw %ymm0, %ymm0 # sched: [1:0.50]
762 ; BROADWELL-NEXT: vpabsw (%rdi), %ymm1 # sched: [7:0.50]
763 ; BROADWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
764 ; BROADWELL-NEXT: retq # sched: [7:1.00]
766 ; SKYLAKE-LABEL: test_pabsw:
768 ; SKYLAKE-NEXT: vpabsw %ymm0, %ymm0 # sched: [1:0.50]
769 ; SKYLAKE-NEXT: vpabsw (%rdi), %ymm1 # sched: [8:0.50]
770 ; SKYLAKE-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
771 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
773 ; SKX-LABEL: test_pabsw:
775 ; SKX-NEXT: vpabsw %ymm0, %ymm0 # sched: [1:0.50]
776 ; SKX-NEXT: vpabsw (%rdi), %ymm1 # sched: [8:0.50]
777 ; SKX-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
778 ; SKX-NEXT: retq # sched: [7:1.00]
780 ; ZNVER1-LABEL: test_pabsw:
782 ; ZNVER1-NEXT: vpabsw (%rdi), %ymm1 # sched: [8:0.50]
783 ; ZNVER1-NEXT: vpabsw %ymm0, %ymm0 # sched: [1:0.25]
784 ; ZNVER1-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
785 ; ZNVER1-NEXT: retq # sched: [1:0.50]
786 %1 = call <16 x i16> @llvm.x86.avx2.pabs.w(<16 x i16> %a0)
787 %2 = load <16 x i16>, <16 x i16> *%a1, align 32
788 %3 = call <16 x i16> @llvm.x86.avx2.pabs.w(<16 x i16> %2)
789 %4 = or <16 x i16> %1, %3
792 declare <16 x i16> @llvm.x86.avx2.pabs.w(<16 x i16>) nounwind readnone
794 define <16 x i16> @test_packssdw(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
795 ; GENERIC-LABEL: test_packssdw:
797 ; GENERIC-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
798 ; GENERIC-NEXT: vpackssdw (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
799 ; GENERIC-NEXT: retq # sched: [1:1.00]
801 ; HASWELL-LABEL: test_packssdw:
803 ; HASWELL-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
804 ; HASWELL-NEXT: vpackssdw (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
805 ; HASWELL-NEXT: retq # sched: [7:1.00]
807 ; BROADWELL-LABEL: test_packssdw:
808 ; BROADWELL: # %bb.0:
809 ; BROADWELL-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
810 ; BROADWELL-NEXT: vpackssdw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
811 ; BROADWELL-NEXT: retq # sched: [7:1.00]
813 ; SKYLAKE-LABEL: test_packssdw:
815 ; SKYLAKE-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
816 ; SKYLAKE-NEXT: vpackssdw (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
817 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
819 ; SKX-LABEL: test_packssdw:
821 ; SKX-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
822 ; SKX-NEXT: vpackssdw (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
823 ; SKX-NEXT: retq # sched: [7:1.00]
825 ; ZNVER1-LABEL: test_packssdw:
827 ; ZNVER1-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
828 ; ZNVER1-NEXT: vpackssdw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
829 ; ZNVER1-NEXT: retq # sched: [1:0.50]
830 %1 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %a0, <8 x i32> %a1)
831 %2 = bitcast <16 x i16> %1 to <8 x i32>
832 %3 = load <8 x i32>, <8 x i32> *%a2, align 32
833 %4 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %2, <8 x i32> %3)
836 declare <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32>, <8 x i32>) nounwind readnone
838 define <32 x i8> @test_packsswb(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
839 ; GENERIC-LABEL: test_packsswb:
841 ; GENERIC-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
842 ; GENERIC-NEXT: vpacksswb (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
843 ; GENERIC-NEXT: retq # sched: [1:1.00]
845 ; HASWELL-LABEL: test_packsswb:
847 ; HASWELL-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
848 ; HASWELL-NEXT: vpacksswb (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
849 ; HASWELL-NEXT: retq # sched: [7:1.00]
851 ; BROADWELL-LABEL: test_packsswb:
852 ; BROADWELL: # %bb.0:
853 ; BROADWELL-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
854 ; BROADWELL-NEXT: vpacksswb (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
855 ; BROADWELL-NEXT: retq # sched: [7:1.00]
857 ; SKYLAKE-LABEL: test_packsswb:
859 ; SKYLAKE-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
860 ; SKYLAKE-NEXT: vpacksswb (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
861 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
863 ; SKX-LABEL: test_packsswb:
865 ; SKX-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
866 ; SKX-NEXT: vpacksswb (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
867 ; SKX-NEXT: retq # sched: [7:1.00]
869 ; ZNVER1-LABEL: test_packsswb:
871 ; ZNVER1-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
872 ; ZNVER1-NEXT: vpacksswb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
873 ; ZNVER1-NEXT: retq # sched: [1:0.50]
874 %1 = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %a0, <16 x i16> %a1)
875 %2 = bitcast <32 x i8> %1 to <16 x i16>
876 %3 = load <16 x i16>, <16 x i16> *%a2, align 32
877 %4 = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %2, <16 x i16> %3)
880 declare <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16>, <16 x i16>) nounwind readnone
882 define <16 x i16> @test_packusdw(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
883 ; GENERIC-LABEL: test_packusdw:
885 ; GENERIC-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
886 ; GENERIC-NEXT: vpackusdw (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
887 ; GENERIC-NEXT: retq # sched: [1:1.00]
889 ; HASWELL-LABEL: test_packusdw:
891 ; HASWELL-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
892 ; HASWELL-NEXT: vpackusdw (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
893 ; HASWELL-NEXT: retq # sched: [7:1.00]
895 ; BROADWELL-LABEL: test_packusdw:
896 ; BROADWELL: # %bb.0:
897 ; BROADWELL-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
898 ; BROADWELL-NEXT: vpackusdw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
899 ; BROADWELL-NEXT: retq # sched: [7:1.00]
901 ; SKYLAKE-LABEL: test_packusdw:
903 ; SKYLAKE-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
904 ; SKYLAKE-NEXT: vpackusdw (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
905 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
907 ; SKX-LABEL: test_packusdw:
909 ; SKX-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
910 ; SKX-NEXT: vpackusdw (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
911 ; SKX-NEXT: retq # sched: [7:1.00]
913 ; ZNVER1-LABEL: test_packusdw:
915 ; ZNVER1-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
916 ; ZNVER1-NEXT: vpackusdw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
917 ; ZNVER1-NEXT: retq # sched: [1:0.50]
918 %1 = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %a0, <8 x i32> %a1)
919 %2 = bitcast <16 x i16> %1 to <8 x i32>
920 %3 = load <8 x i32>, <8 x i32> *%a2, align 32
921 %4 = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %2, <8 x i32> %3)
924 declare <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32>, <8 x i32>) nounwind readnone
926 define <32 x i8> @test_packuswb(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
927 ; GENERIC-LABEL: test_packuswb:
929 ; GENERIC-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
930 ; GENERIC-NEXT: vpackuswb (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
931 ; GENERIC-NEXT: retq # sched: [1:1.00]
933 ; HASWELL-LABEL: test_packuswb:
935 ; HASWELL-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
936 ; HASWELL-NEXT: vpackuswb (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
937 ; HASWELL-NEXT: retq # sched: [7:1.00]
939 ; BROADWELL-LABEL: test_packuswb:
940 ; BROADWELL: # %bb.0:
941 ; BROADWELL-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
942 ; BROADWELL-NEXT: vpackuswb (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
943 ; BROADWELL-NEXT: retq # sched: [7:1.00]
945 ; SKYLAKE-LABEL: test_packuswb:
947 ; SKYLAKE-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
948 ; SKYLAKE-NEXT: vpackuswb (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
949 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
951 ; SKX-LABEL: test_packuswb:
953 ; SKX-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
954 ; SKX-NEXT: vpackuswb (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
955 ; SKX-NEXT: retq # sched: [7:1.00]
957 ; ZNVER1-LABEL: test_packuswb:
959 ; ZNVER1-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
960 ; ZNVER1-NEXT: vpackuswb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
961 ; ZNVER1-NEXT: retq # sched: [1:0.50]
962 %1 = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> %a0, <16 x i16> %a1)
963 %2 = bitcast <32 x i8> %1 to <16 x i16>
964 %3 = load <16 x i16>, <16 x i16> *%a2, align 32
965 %4 = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> %2, <16 x i16> %3)
968 declare <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16>, <16 x i16>) nounwind readnone
970 define <32 x i8> @test_paddb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
971 ; GENERIC-LABEL: test_paddb:
973 ; GENERIC-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
974 ; GENERIC-NEXT: vpaddb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
975 ; GENERIC-NEXT: retq # sched: [1:1.00]
977 ; HASWELL-LABEL: test_paddb:
979 ; HASWELL-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
980 ; HASWELL-NEXT: vpaddb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
981 ; HASWELL-NEXT: retq # sched: [7:1.00]
983 ; BROADWELL-LABEL: test_paddb:
984 ; BROADWELL: # %bb.0:
985 ; BROADWELL-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
986 ; BROADWELL-NEXT: vpaddb (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
987 ; BROADWELL-NEXT: retq # sched: [7:1.00]
989 ; SKYLAKE-LABEL: test_paddb:
991 ; SKYLAKE-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
992 ; SKYLAKE-NEXT: vpaddb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
993 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
995 ; SKX-LABEL: test_paddb:
997 ; SKX-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
998 ; SKX-NEXT: vpaddb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
999 ; SKX-NEXT: retq # sched: [7:1.00]
1001 ; ZNVER1-LABEL: test_paddb:
1003 ; ZNVER1-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
1004 ; ZNVER1-NEXT: vpaddb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1005 ; ZNVER1-NEXT: retq # sched: [1:0.50]
1006 %1 = add <32 x i8> %a0, %a1
1007 %2 = load <32 x i8>, <32 x i8> *%a2, align 32
1008 %3 = add <32 x i8> %1, %2
1012 define <8 x i32> @test_paddd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
1013 ; GENERIC-LABEL: test_paddd:
1015 ; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1016 ; GENERIC-NEXT: vpaddd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1017 ; GENERIC-NEXT: retq # sched: [1:1.00]
1019 ; HASWELL-LABEL: test_paddd:
1021 ; HASWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1022 ; HASWELL-NEXT: vpaddd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1023 ; HASWELL-NEXT: retq # sched: [7:1.00]
1025 ; BROADWELL-LABEL: test_paddd:
1026 ; BROADWELL: # %bb.0:
1027 ; BROADWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1028 ; BROADWELL-NEXT: vpaddd (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
1029 ; BROADWELL-NEXT: retq # sched: [7:1.00]
1031 ; SKYLAKE-LABEL: test_paddd:
1033 ; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1034 ; SKYLAKE-NEXT: vpaddd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1035 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
1037 ; SKX-LABEL: test_paddd:
1039 ; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1040 ; SKX-NEXT: vpaddd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1041 ; SKX-NEXT: retq # sched: [7:1.00]
1043 ; ZNVER1-LABEL: test_paddd:
1045 ; ZNVER1-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
1046 ; ZNVER1-NEXT: vpaddd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1047 ; ZNVER1-NEXT: retq # sched: [1:0.50]
1048 %1 = add <8 x i32> %a0, %a1
1049 %2 = load <8 x i32>, <8 x i32> *%a2, align 32
1050 %3 = add <8 x i32> %1, %2
1054 define <4 x i64> @test_paddq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
1055 ; GENERIC-LABEL: test_paddq:
1057 ; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1058 ; GENERIC-NEXT: vpaddq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1059 ; GENERIC-NEXT: retq # sched: [1:1.00]
1061 ; HASWELL-LABEL: test_paddq:
1063 ; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1064 ; HASWELL-NEXT: vpaddq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1065 ; HASWELL-NEXT: retq # sched: [7:1.00]
1067 ; BROADWELL-LABEL: test_paddq:
1068 ; BROADWELL: # %bb.0:
1069 ; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1070 ; BROADWELL-NEXT: vpaddq (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
1071 ; BROADWELL-NEXT: retq # sched: [7:1.00]
1073 ; SKYLAKE-LABEL: test_paddq:
1075 ; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1076 ; SKYLAKE-NEXT: vpaddq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1077 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
1079 ; SKX-LABEL: test_paddq:
1081 ; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1082 ; SKX-NEXT: vpaddq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1083 ; SKX-NEXT: retq # sched: [7:1.00]
1085 ; ZNVER1-LABEL: test_paddq:
1087 ; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
1088 ; ZNVER1-NEXT: vpaddq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1089 ; ZNVER1-NEXT: retq # sched: [1:0.50]
1090 %1 = add <4 x i64> %a0, %a1
1091 %2 = load <4 x i64>, <4 x i64> *%a2, align 32
1092 %3 = add <4 x i64> %1, %2
1096 define <32 x i8> @test_paddsb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
1097 ; GENERIC-LABEL: test_paddsb:
1099 ; GENERIC-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1100 ; GENERIC-NEXT: vpaddsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1101 ; GENERIC-NEXT: retq # sched: [1:1.00]
1103 ; HASWELL-LABEL: test_paddsb:
1105 ; HASWELL-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1106 ; HASWELL-NEXT: vpaddsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1107 ; HASWELL-NEXT: retq # sched: [7:1.00]
1109 ; BROADWELL-LABEL: test_paddsb:
1110 ; BROADWELL: # %bb.0:
1111 ; BROADWELL-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1112 ; BROADWELL-NEXT: vpaddsb (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
1113 ; BROADWELL-NEXT: retq # sched: [7:1.00]
1115 ; SKYLAKE-LABEL: test_paddsb:
1117 ; SKYLAKE-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1118 ; SKYLAKE-NEXT: vpaddsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1119 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
1121 ; SKX-LABEL: test_paddsb:
1123 ; SKX-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1124 ; SKX-NEXT: vpaddsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1125 ; SKX-NEXT: retq # sched: [7:1.00]
1127 ; ZNVER1-LABEL: test_paddsb:
1129 ; ZNVER1-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
1130 ; ZNVER1-NEXT: vpaddsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1131 ; ZNVER1-NEXT: retq # sched: [1:0.50]
1132 %1 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> %a0, <32 x i8> %a1)
1133 %2 = load <32 x i8>, <32 x i8> *%a2, align 32
1134 %3 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> %1, <32 x i8> %2)
1137 declare <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8>, <32 x i8>) nounwind readnone
1139 define <16 x i16> @test_paddsw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
1140 ; GENERIC-LABEL: test_paddsw:
1142 ; GENERIC-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1143 ; GENERIC-NEXT: vpaddsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1144 ; GENERIC-NEXT: retq # sched: [1:1.00]
1146 ; HASWELL-LABEL: test_paddsw:
1148 ; HASWELL-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1149 ; HASWELL-NEXT: vpaddsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1150 ; HASWELL-NEXT: retq # sched: [7:1.00]
1152 ; BROADWELL-LABEL: test_paddsw:
1153 ; BROADWELL: # %bb.0:
1154 ; BROADWELL-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1155 ; BROADWELL-NEXT: vpaddsw (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
1156 ; BROADWELL-NEXT: retq # sched: [7:1.00]
1158 ; SKYLAKE-LABEL: test_paddsw:
1160 ; SKYLAKE-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1161 ; SKYLAKE-NEXT: vpaddsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1162 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
1164 ; SKX-LABEL: test_paddsw:
1166 ; SKX-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1167 ; SKX-NEXT: vpaddsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1168 ; SKX-NEXT: retq # sched: [7:1.00]
1170 ; ZNVER1-LABEL: test_paddsw:
1172 ; ZNVER1-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
1173 ; ZNVER1-NEXT: vpaddsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1174 ; ZNVER1-NEXT: retq # sched: [1:0.50]
1175 %1 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> %a0, <16 x i16> %a1)
1176 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
1177 %3 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> %1, <16 x i16> %2)
1180 declare <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16>, <16 x i16>) nounwind readnone
1182 define <32 x i8> @test_paddusb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
1183 ; GENERIC-LABEL: test_paddusb:
1185 ; GENERIC-NEXT: vpaddusb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1186 ; GENERIC-NEXT: vpaddusb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1187 ; GENERIC-NEXT: retq # sched: [1:1.00]
1189 ; HASWELL-LABEL: test_paddusb:
1191 ; HASWELL-NEXT: vpaddusb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1192 ; HASWELL-NEXT: vpaddusb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1193 ; HASWELL-NEXT: retq # sched: [7:1.00]
1195 ; BROADWELL-LABEL: test_paddusb:
1196 ; BROADWELL: # %bb.0:
1197 ; BROADWELL-NEXT: vpaddusb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1198 ; BROADWELL-NEXT: vpaddusb (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
1199 ; BROADWELL-NEXT: retq # sched: [7:1.00]
1201 ; SKYLAKE-LABEL: test_paddusb:
1203 ; SKYLAKE-NEXT: vpaddusb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1204 ; SKYLAKE-NEXT: vpaddusb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1205 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
1207 ; SKX-LABEL: test_paddusb:
1209 ; SKX-NEXT: vpaddusb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1210 ; SKX-NEXT: vpaddusb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1211 ; SKX-NEXT: retq # sched: [7:1.00]
1213 ; ZNVER1-LABEL: test_paddusb:
1215 ; ZNVER1-NEXT: vpaddusb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
1216 ; ZNVER1-NEXT: vpaddusb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1217 ; ZNVER1-NEXT: retq # sched: [1:0.50]
1218 %1 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> %a0, <32 x i8> %a1)
1219 %2 = load <32 x i8>, <32 x i8> *%a2, align 32
1220 %3 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> %1, <32 x i8> %2)
1223 declare <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8>, <32 x i8>) nounwind readnone
1225 define <16 x i16> @test_paddusw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
1226 ; GENERIC-LABEL: test_paddusw:
1228 ; GENERIC-NEXT: vpaddusw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1229 ; GENERIC-NEXT: vpaddusw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1230 ; GENERIC-NEXT: retq # sched: [1:1.00]
1232 ; HASWELL-LABEL: test_paddusw:
1234 ; HASWELL-NEXT: vpaddusw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1235 ; HASWELL-NEXT: vpaddusw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1236 ; HASWELL-NEXT: retq # sched: [7:1.00]
1238 ; BROADWELL-LABEL: test_paddusw:
1239 ; BROADWELL: # %bb.0:
1240 ; BROADWELL-NEXT: vpaddusw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1241 ; BROADWELL-NEXT: vpaddusw (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
1242 ; BROADWELL-NEXT: retq # sched: [7:1.00]
1244 ; SKYLAKE-LABEL: test_paddusw:
1246 ; SKYLAKE-NEXT: vpaddusw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1247 ; SKYLAKE-NEXT: vpaddusw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1248 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
1250 ; SKX-LABEL: test_paddusw:
1252 ; SKX-NEXT: vpaddusw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1253 ; SKX-NEXT: vpaddusw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1254 ; SKX-NEXT: retq # sched: [7:1.00]
1256 ; ZNVER1-LABEL: test_paddusw:
1258 ; ZNVER1-NEXT: vpaddusw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
1259 ; ZNVER1-NEXT: vpaddusw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1260 ; ZNVER1-NEXT: retq # sched: [1:0.50]
1261 %1 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> %a0, <16 x i16> %a1)
1262 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
1263 %3 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> %1, <16 x i16> %2)
1266 declare <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16>, <16 x i16>) nounwind readnone
1268 define <16 x i16> @test_paddw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
1269 ; GENERIC-LABEL: test_paddw:
1271 ; GENERIC-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1272 ; GENERIC-NEXT: vpaddw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1273 ; GENERIC-NEXT: retq # sched: [1:1.00]
1275 ; HASWELL-LABEL: test_paddw:
1277 ; HASWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1278 ; HASWELL-NEXT: vpaddw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1279 ; HASWELL-NEXT: retq # sched: [7:1.00]
1281 ; BROADWELL-LABEL: test_paddw:
1282 ; BROADWELL: # %bb.0:
1283 ; BROADWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1284 ; BROADWELL-NEXT: vpaddw (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
1285 ; BROADWELL-NEXT: retq # sched: [7:1.00]
1287 ; SKYLAKE-LABEL: test_paddw:
1289 ; SKYLAKE-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1290 ; SKYLAKE-NEXT: vpaddw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1291 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
1293 ; SKX-LABEL: test_paddw:
1295 ; SKX-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1296 ; SKX-NEXT: vpaddw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1297 ; SKX-NEXT: retq # sched: [7:1.00]
1299 ; ZNVER1-LABEL: test_paddw:
1301 ; ZNVER1-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
1302 ; ZNVER1-NEXT: vpaddw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1303 ; ZNVER1-NEXT: retq # sched: [1:0.50]
1304 %1 = add <16 x i16> %a0, %a1
1305 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
1306 %3 = add <16 x i16> %1, %2
1310 define <32 x i8> @test_palignr(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
1311 ; GENERIC-LABEL: test_palignr:
1313 ; GENERIC-NEXT: vpalignr {{.*#+}} ymm1 = ymm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],ymm1[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16] sched: [1:1.00]
1314 ; GENERIC-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm1[0],ymm0[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm1[16] sched: [1:1.00]
1315 ; GENERIC-NEXT: vpaddb %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
1316 ; GENERIC-NEXT: retq # sched: [1:1.00]
1318 ; HASWELL-LABEL: test_palignr:
1320 ; HASWELL-NEXT: vpalignr {{.*#+}} ymm1 = ymm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],ymm1[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16] sched: [1:1.00]
1321 ; HASWELL-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm1[0],ymm0[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm1[16] sched: [1:1.00]
1322 ; HASWELL-NEXT: vpaddb %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
1323 ; HASWELL-NEXT: retq # sched: [7:1.00]
1325 ; BROADWELL-LABEL: test_palignr:
1326 ; BROADWELL: # %bb.0:
1327 ; BROADWELL-NEXT: vpalignr {{.*#+}} ymm1 = ymm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],ymm1[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16] sched: [1:1.00]
1328 ; BROADWELL-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm1[0],ymm0[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm1[16] sched: [1:1.00]
1329 ; BROADWELL-NEXT: vpaddb %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
1330 ; BROADWELL-NEXT: retq # sched: [7:1.00]
1332 ; SKYLAKE-LABEL: test_palignr:
1334 ; SKYLAKE-NEXT: vpalignr {{.*#+}} ymm1 = ymm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],ymm1[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16] sched: [1:1.00]
1335 ; SKYLAKE-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm1[0],ymm0[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm1[16] sched: [1:1.00]
1336 ; SKYLAKE-NEXT: vpaddb %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
1337 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
1339 ; SKX-LABEL: test_palignr:
1341 ; SKX-NEXT: vpalignr {{.*#+}} ymm1 = ymm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],ymm1[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16] sched: [1:1.00]
1342 ; SKX-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm1[0],ymm0[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm1[16] sched: [1:1.00]
1343 ; SKX-NEXT: vpaddb %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
1344 ; SKX-NEXT: retq # sched: [7:1.00]
1346 ; ZNVER1-LABEL: test_palignr:
1348 ; ZNVER1-NEXT: vpalignr {{.*#+}} ymm1 = ymm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],ymm1[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16] sched: [1:0.25]
1349 ; ZNVER1-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm1[0],ymm0[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm1[16] sched: [1:0.25]
1350 ; ZNVER1-NEXT: vpaddb %ymm0, %ymm1, %ymm0 # sched: [1:0.25]
1351 ; ZNVER1-NEXT: retq # sched: [1:0.50]
1352 %1 = shufflevector <32 x i8> %a1, <32 x i8> %a0, <32 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 32, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 48>
1353 %2 = load <32 x i8>, <32 x i8> *%a2, align 32
1354 %3 = shufflevector <32 x i8> %a0, <32 x i8> %1, <32 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 32, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 48>
1355 %4 = add <32 x i8> %1, %3
1359 define <4 x i64> @test_pand(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
1360 ; GENERIC-LABEL: test_pand:
1362 ; GENERIC-NEXT: vpand %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1363 ; GENERIC-NEXT: vpand (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1364 ; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1365 ; GENERIC-NEXT: retq # sched: [1:1.00]
1367 ; HASWELL-LABEL: test_pand:
1369 ; HASWELL-NEXT: vpand %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1370 ; HASWELL-NEXT: vpand (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1371 ; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1372 ; HASWELL-NEXT: retq # sched: [7:1.00]
1374 ; BROADWELL-LABEL: test_pand:
1375 ; BROADWELL: # %bb.0:
1376 ; BROADWELL-NEXT: vpand %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1377 ; BROADWELL-NEXT: vpand (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
1378 ; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1379 ; BROADWELL-NEXT: retq # sched: [7:1.00]
1381 ; SKYLAKE-LABEL: test_pand:
1383 ; SKYLAKE-NEXT: vpand %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1384 ; SKYLAKE-NEXT: vpand (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1385 ; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1386 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
1388 ; SKX-LABEL: test_pand:
1390 ; SKX-NEXT: vpand %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1391 ; SKX-NEXT: vpand (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1392 ; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1393 ; SKX-NEXT: retq # sched: [7:1.00]
1395 ; ZNVER1-LABEL: test_pand:
1397 ; ZNVER1-NEXT: vpand %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
1398 ; ZNVER1-NEXT: vpand (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1399 ; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
1400 ; ZNVER1-NEXT: retq # sched: [1:0.50]
1401 %1 = and <4 x i64> %a0, %a1
1402 %2 = load <4 x i64>, <4 x i64> *%a2, align 32
1403 %3 = and <4 x i64> %1, %2
1404 %4 = add <4 x i64> %3, %a1
1408 define <4 x i64> @test_pandn(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
1409 ; GENERIC-LABEL: test_pandn:
1411 ; GENERIC-NEXT: vpandn %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1412 ; GENERIC-NEXT: vpandn (%rdi), %ymm0, %ymm1 # sched: [8:0.50]
1413 ; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1414 ; GENERIC-NEXT: retq # sched: [1:1.00]
1416 ; HASWELL-LABEL: test_pandn:
1418 ; HASWELL-NEXT: vpandn %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1419 ; HASWELL-NEXT: vpandn (%rdi), %ymm0, %ymm1 # sched: [8:0.50]
1420 ; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1421 ; HASWELL-NEXT: retq # sched: [7:1.00]
1423 ; BROADWELL-LABEL: test_pandn:
1424 ; BROADWELL: # %bb.0:
1425 ; BROADWELL-NEXT: vpandn %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1426 ; BROADWELL-NEXT: vpandn (%rdi), %ymm0, %ymm1 # sched: [7:0.50]
1427 ; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1428 ; BROADWELL-NEXT: retq # sched: [7:1.00]
1430 ; SKYLAKE-LABEL: test_pandn:
1432 ; SKYLAKE-NEXT: vpandn %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1433 ; SKYLAKE-NEXT: vpandn (%rdi), %ymm0, %ymm1 # sched: [8:0.50]
1434 ; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1435 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
1437 ; SKX-LABEL: test_pandn:
1439 ; SKX-NEXT: vpandn %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1440 ; SKX-NEXT: vpandn (%rdi), %ymm0, %ymm1 # sched: [8:0.50]
1441 ; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1442 ; SKX-NEXT: retq # sched: [7:1.00]
1444 ; ZNVER1-LABEL: test_pandn:
1446 ; ZNVER1-NEXT: vpandn %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
1447 ; ZNVER1-NEXT: vpandn (%rdi), %ymm0, %ymm1 # sched: [8:0.50]
1448 ; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
1449 ; ZNVER1-NEXT: retq # sched: [1:0.50]
1450 %1 = xor <4 x i64> %a0, <i64 -1, i64 -1, i64 -1, i64 -1>
1451 %2 = and <4 x i64> %a1, %1
1452 %3 = load <4 x i64>, <4 x i64> *%a2, align 32
1453 %4 = xor <4 x i64> %2, <i64 -1, i64 -1, i64 -1, i64 -1>
1454 %5 = and <4 x i64> %3, %4
1455 %6 = add <4 x i64> %2, %5
1459 define <32 x i8> @test_pavgb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
1460 ; GENERIC-LABEL: test_pavgb:
1462 ; GENERIC-NEXT: vpavgb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1463 ; GENERIC-NEXT: vpavgb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1464 ; GENERIC-NEXT: retq # sched: [1:1.00]
1466 ; HASWELL-LABEL: test_pavgb:
1468 ; HASWELL-NEXT: vpavgb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1469 ; HASWELL-NEXT: vpavgb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1470 ; HASWELL-NEXT: retq # sched: [7:1.00]
1472 ; BROADWELL-LABEL: test_pavgb:
1473 ; BROADWELL: # %bb.0:
1474 ; BROADWELL-NEXT: vpavgb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1475 ; BROADWELL-NEXT: vpavgb (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
1476 ; BROADWELL-NEXT: retq # sched: [7:1.00]
1478 ; SKYLAKE-LABEL: test_pavgb:
1480 ; SKYLAKE-NEXT: vpavgb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1481 ; SKYLAKE-NEXT: vpavgb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1482 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
1484 ; SKX-LABEL: test_pavgb:
1486 ; SKX-NEXT: vpavgb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1487 ; SKX-NEXT: vpavgb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1488 ; SKX-NEXT: retq # sched: [7:1.00]
1490 ; ZNVER1-LABEL: test_pavgb:
1492 ; ZNVER1-NEXT: vpavgb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
1493 ; ZNVER1-NEXT: vpavgb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1494 ; ZNVER1-NEXT: retq # sched: [1:0.50]
1495 %1 = zext <32 x i8> %a0 to <32 x i16>
1496 %2 = zext <32 x i8> %a1 to <32 x i16>
1497 %3 = add <32 x i16> %1, %2
1498 %4 = add <32 x i16> %3, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
1499 %5 = lshr <32 x i16> %4, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
1500 %6 = trunc <32 x i16> %5 to <32 x i8>
1501 %7 = load <32 x i8>, <32 x i8> *%a2, align 32
1502 %8 = zext <32 x i8> %6 to <32 x i16>
1503 %9 = zext <32 x i8> %7 to <32 x i16>
1504 %10 = add <32 x i16> %8, %9
1505 %11 = add <32 x i16> %10, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
1506 %12 = lshr <32 x i16> %11, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
1507 %13 = trunc <32 x i16> %12 to <32 x i8>
1511 define <16 x i16> @test_pavgw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
1512 ; GENERIC-LABEL: test_pavgw:
1514 ; GENERIC-NEXT: vpavgw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1515 ; GENERIC-NEXT: vpavgw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1516 ; GENERIC-NEXT: retq # sched: [1:1.00]
1518 ; HASWELL-LABEL: test_pavgw:
1520 ; HASWELL-NEXT: vpavgw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1521 ; HASWELL-NEXT: vpavgw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1522 ; HASWELL-NEXT: retq # sched: [7:1.00]
1524 ; BROADWELL-LABEL: test_pavgw:
1525 ; BROADWELL: # %bb.0:
1526 ; BROADWELL-NEXT: vpavgw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1527 ; BROADWELL-NEXT: vpavgw (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
1528 ; BROADWELL-NEXT: retq # sched: [7:1.00]
1530 ; SKYLAKE-LABEL: test_pavgw:
1532 ; SKYLAKE-NEXT: vpavgw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1533 ; SKYLAKE-NEXT: vpavgw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1534 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
1536 ; SKX-LABEL: test_pavgw:
1538 ; SKX-NEXT: vpavgw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1539 ; SKX-NEXT: vpavgw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1540 ; SKX-NEXT: retq # sched: [7:1.00]
1542 ; ZNVER1-LABEL: test_pavgw:
1544 ; ZNVER1-NEXT: vpavgw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
1545 ; ZNVER1-NEXT: vpavgw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1546 ; ZNVER1-NEXT: retq # sched: [1:0.50]
1547 %1 = zext <16 x i16> %a0 to <16 x i32>
1548 %2 = zext <16 x i16> %a1 to <16 x i32>
1549 %3 = add <16 x i32> %1, %2
1550 %4 = add <16 x i32> %3, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
1551 %5 = lshr <16 x i32> %4, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
1552 %6 = trunc <16 x i32> %5 to <16 x i16>
1553 %7 = load <16 x i16>, <16 x i16> *%a2, align 32
1554 %8 = zext <16 x i16> %6 to <16 x i32>
1555 %9 = zext <16 x i16> %7 to <16 x i32>
1556 %10 = add <16 x i32> %8, %9
1557 %11 = add <16 x i32> %10, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
1558 %12 = lshr <16 x i32> %11, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
1559 %13 = trunc <16 x i32> %12 to <16 x i16>
1563 define <4 x i32> @test_pblendd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
1564 ; GENERIC-LABEL: test_pblendd:
1566 ; GENERIC-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3] sched: [1:0.50]
1567 ; GENERIC-NEXT: vpblendd {{.*#+}} xmm1 = mem[0],xmm1[1],mem[2],xmm1[3] sched: [7:0.50]
1568 ; GENERIC-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1569 ; GENERIC-NEXT: retq # sched: [1:1.00]
1571 ; HASWELL-LABEL: test_pblendd:
1573 ; HASWELL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3] sched: [1:0.33]
1574 ; HASWELL-NEXT: vpblendd {{.*#+}} xmm1 = mem[0],xmm1[1],mem[2],xmm1[3] sched: [7:0.50]
1575 ; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1576 ; HASWELL-NEXT: retq # sched: [7:1.00]
1578 ; BROADWELL-LABEL: test_pblendd:
1579 ; BROADWELL: # %bb.0:
1580 ; BROADWELL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3] sched: [1:0.33]
1581 ; BROADWELL-NEXT: vpblendd {{.*#+}} xmm1 = mem[0],xmm1[1],mem[2],xmm1[3] sched: [6:0.50]
1582 ; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1583 ; BROADWELL-NEXT: retq # sched: [7:1.00]
1585 ; SKYLAKE-LABEL: test_pblendd:
1587 ; SKYLAKE-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3] sched: [1:0.33]
1588 ; SKYLAKE-NEXT: vpblendd {{.*#+}} xmm1 = mem[0],xmm1[1],mem[2],xmm1[3] sched: [7:0.50]
1589 ; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
1590 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
1592 ; SKX-LABEL: test_pblendd:
1594 ; SKX-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3] sched: [1:0.33]
1595 ; SKX-NEXT: vpblendd {{.*#+}} xmm1 = mem[0],xmm1[1],mem[2],xmm1[3] sched: [7:0.50]
1596 ; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
1597 ; SKX-NEXT: retq # sched: [7:1.00]
1599 ; ZNVER1-LABEL: test_pblendd:
1601 ; ZNVER1-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3] sched: [1:0.50]
1602 ; ZNVER1-NEXT: vpblendd {{.*#+}} xmm1 = mem[0],xmm1[1],mem[2],xmm1[3] sched: [8:1.00]
1603 ; ZNVER1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
1604 ; ZNVER1-NEXT: retq # sched: [1:0.50]
1605 %1 = shufflevector <4 x i32> %a0, <4 x i32> %a1, <4 x i32> <i32 4, i32 5, i32 6, i32 3>
1606 %2 = load <4 x i32>, <4 x i32> *%a2, align 16
1607 %3 = shufflevector <4 x i32> %a1, <4 x i32> %2, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
1608 %4 = add <4 x i32> %1, %3
1612 define <8 x i32> @test_pblendd_ymm(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
1613 ; GENERIC-LABEL: test_pblendd_ymm:
1615 ; GENERIC-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6],ymm1[7] sched: [1:0.50]
1616 ; GENERIC-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],mem[1,2],ymm1[3,4,5,6,7] sched: [8:0.50]
1617 ; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1618 ; GENERIC-NEXT: retq # sched: [1:1.00]
1620 ; HASWELL-LABEL: test_pblendd_ymm:
1622 ; HASWELL-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6],ymm1[7] sched: [1:0.33]
1623 ; HASWELL-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],mem[1,2],ymm1[3,4,5,6,7] sched: [8:0.50]
1624 ; HASWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1625 ; HASWELL-NEXT: retq # sched: [7:1.00]
1627 ; BROADWELL-LABEL: test_pblendd_ymm:
1628 ; BROADWELL: # %bb.0:
1629 ; BROADWELL-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6],ymm1[7] sched: [1:0.33]
1630 ; BROADWELL-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],mem[1,2],ymm1[3,4,5,6,7] sched: [7:0.50]
1631 ; BROADWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1632 ; BROADWELL-NEXT: retq # sched: [7:1.00]
1634 ; SKYLAKE-LABEL: test_pblendd_ymm:
1636 ; SKYLAKE-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6],ymm1[7] sched: [1:0.33]
1637 ; SKYLAKE-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],mem[1,2],ymm1[3,4,5,6,7] sched: [8:0.50]
1638 ; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1639 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
1641 ; SKX-LABEL: test_pblendd_ymm:
1643 ; SKX-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6],ymm1[7] sched: [1:0.33]
1644 ; SKX-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],mem[1,2],ymm1[3,4,5,6,7] sched: [8:0.50]
1645 ; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1646 ; SKX-NEXT: retq # sched: [7:1.00]
1648 ; ZNVER1-LABEL: test_pblendd_ymm:
1650 ; ZNVER1-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6],ymm1[7] sched: [1:0.50]
1651 ; ZNVER1-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],mem[1,2],ymm1[3,4,5,6,7] sched: [9:1.50]
1652 ; ZNVER1-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
1653 ; ZNVER1-NEXT: retq # sched: [1:0.50]
1654 %1 = shufflevector <8 x i32> %a0, <8 x i32> %a1, <8 x i32> <i32 8, i32 9, i32 10, i32 3, i32 4, i32 5, i32 6, i32 15>
1655 %2 = load <8 x i32>, <8 x i32> *%a2, align 32
1656 %3 = shufflevector <8 x i32> %a1, <8 x i32> %2, <8 x i32> <i32 0, i32 9, i32 10, i32 3, i32 4, i32 5, i32 6, i32 7>
1657 %4 = add <8 x i32> %1, %3
1661 define <32 x i8> @test_pblendvb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> %a2, <32 x i8> *%a3, <32 x i8> %a4) {
1662 ; GENERIC-LABEL: test_pblendvb:
1664 ; GENERIC-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:1.00]
1665 ; GENERIC-NEXT: vpblendvb %ymm3, (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
1666 ; GENERIC-NEXT: retq # sched: [1:1.00]
1668 ; HASWELL-LABEL: test_pblendvb:
1670 ; HASWELL-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:2.00]
1671 ; HASWELL-NEXT: vpblendvb %ymm3, (%rdi), %ymm0, %ymm0 # sched: [9:2.00]
1672 ; HASWELL-NEXT: retq # sched: [7:1.00]
1674 ; BROADWELL-LABEL: test_pblendvb:
1675 ; BROADWELL: # %bb.0:
1676 ; BROADWELL-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:2.00]
1677 ; BROADWELL-NEXT: vpblendvb %ymm3, (%rdi), %ymm0, %ymm0 # sched: [8:2.00]
1678 ; BROADWELL-NEXT: retq # sched: [7:1.00]
1680 ; SKYLAKE-LABEL: test_pblendvb:
1682 ; SKYLAKE-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:0.67]
1683 ; SKYLAKE-NEXT: vpblendvb %ymm3, (%rdi), %ymm0, %ymm0 # sched: [8:0.67]
1684 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
1686 ; SKX-LABEL: test_pblendvb:
1688 ; SKX-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:0.67]
1689 ; SKX-NEXT: vpblendvb %ymm3, (%rdi), %ymm0, %ymm0 # sched: [8:0.67]
1690 ; SKX-NEXT: retq # sched: [7:1.00]
1692 ; ZNVER1-LABEL: test_pblendvb:
1694 ; ZNVER1-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
1695 ; ZNVER1-NEXT: vpblendvb %ymm3, (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
1696 ; ZNVER1-NEXT: retq # sched: [1:0.50]
1697 %1 = call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> %a2)
1698 %2 = load <32 x i8>, <32 x i8> *%a3, align 32
1699 %3 = call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %1, <32 x i8> %2, <32 x i8> %a4)
1702 declare <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8>, <32 x i8>, <32 x i8>) nounwind readnone
1704 define <16 x i16> @test_pblendw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
1705 ; GENERIC-LABEL: test_pblendw:
1707 ; GENERIC-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4],ymm0[5,6,7,8,9],ymm1[10,11,12],ymm0[13,14,15] sched: [1:0.50]
1708 ; GENERIC-NEXT: vpblendw {{.*#+}} ymm1 = mem[0],ymm1[1],mem[2],ymm1[3],mem[4],ymm1[5],mem[6],ymm1[7],mem[8],ymm1[9],mem[10],ymm1[11],mem[12],ymm1[13],mem[14],ymm1[15] sched: [8:0.50]
1709 ; GENERIC-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1710 ; GENERIC-NEXT: retq # sched: [1:1.00]
1712 ; HASWELL-LABEL: test_pblendw:
1714 ; HASWELL-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4],ymm0[5,6,7,8,9],ymm1[10,11,12],ymm0[13,14,15] sched: [1:1.00]
1715 ; HASWELL-NEXT: vpblendw {{.*#+}} ymm1 = mem[0],ymm1[1],mem[2],ymm1[3],mem[4],ymm1[5],mem[6],ymm1[7],mem[8],ymm1[9],mem[10],ymm1[11],mem[12],ymm1[13],mem[14],ymm1[15] sched: [8:1.00]
1716 ; HASWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1717 ; HASWELL-NEXT: retq # sched: [7:1.00]
1719 ; BROADWELL-LABEL: test_pblendw:
1720 ; BROADWELL: # %bb.0:
1721 ; BROADWELL-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4],ymm0[5,6,7,8,9],ymm1[10,11,12],ymm0[13,14,15] sched: [1:1.00]
1722 ; BROADWELL-NEXT: vpblendw {{.*#+}} ymm1 = mem[0],ymm1[1],mem[2],ymm1[3],mem[4],ymm1[5],mem[6],ymm1[7],mem[8],ymm1[9],mem[10],ymm1[11],mem[12],ymm1[13],mem[14],ymm1[15] sched: [7:1.00]
1723 ; BROADWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1724 ; BROADWELL-NEXT: retq # sched: [7:1.00]
1726 ; SKYLAKE-LABEL: test_pblendw:
1728 ; SKYLAKE-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4],ymm0[5,6,7,8,9],ymm1[10,11,12],ymm0[13,14,15] sched: [1:1.00]
1729 ; SKYLAKE-NEXT: vpblendw {{.*#+}} ymm1 = mem[0],ymm1[1],mem[2],ymm1[3],mem[4],ymm1[5],mem[6],ymm1[7],mem[8],ymm1[9],mem[10],ymm1[11],mem[12],ymm1[13],mem[14],ymm1[15] sched: [8:1.00]
1730 ; SKYLAKE-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1731 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
1733 ; SKX-LABEL: test_pblendw:
1735 ; SKX-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4],ymm0[5,6,7,8,9],ymm1[10,11,12],ymm0[13,14,15] sched: [1:1.00]
1736 ; SKX-NEXT: vpblendw {{.*#+}} ymm1 = mem[0],ymm1[1],mem[2],ymm1[3],mem[4],ymm1[5],mem[6],ymm1[7],mem[8],ymm1[9],mem[10],ymm1[11],mem[12],ymm1[13],mem[14],ymm1[15] sched: [8:1.00]
1737 ; SKX-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1738 ; SKX-NEXT: retq # sched: [7:1.00]
1740 ; ZNVER1-LABEL: test_pblendw:
1742 ; ZNVER1-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4],ymm0[5,6,7,8,9],ymm1[10,11,12],ymm0[13,14,15] sched: [2:0.33]
1743 ; ZNVER1-NEXT: vpblendw {{.*#+}} ymm1 = mem[0],ymm1[1],mem[2],ymm1[3],mem[4],ymm1[5],mem[6],ymm1[7],mem[8],ymm1[9],mem[10],ymm1[11],mem[12],ymm1[13],mem[14],ymm1[15] sched: [9:0.50]
1744 ; ZNVER1-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
1745 ; ZNVER1-NEXT: retq # sched: [1:0.50]
1746 %1 = shufflevector <16 x i16> %a0, <16 x i16> %a1, <16 x i32> <i32 0, i32 1, i32 18, i32 19, i32 20, i32 5, i32 6, i32 7, i32 8, i32 9, i32 26, i32 27, i32 28, i32 13, i32 14, i32 15>
1747 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
1748 %3 = shufflevector <16 x i16> %a1, <16 x i16> %2, <16 x i32> <i32 16, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 24, i32 9, i32 26, i32 11, i32 28, i32 13, i32 30, i32 15>
1749 %4 = add <16 x i16> %1, %3
1753 define <16 x i8> @test_pbroadcastb(<16 x i8> %a0, <16 x i8> *%a1) {
1754 ; GENERIC-LABEL: test_pbroadcastb:
1756 ; GENERIC-NEXT: vpbroadcastb %xmm0, %xmm0 # sched: [1:0.50]
1757 ; GENERIC-NEXT: vpbroadcastb (%rdi), %xmm1 # sched: [7:0.50]
1758 ; GENERIC-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1759 ; GENERIC-NEXT: retq # sched: [1:1.00]
1761 ; HASWELL-LABEL: test_pbroadcastb:
1763 ; HASWELL-NEXT: vpbroadcastb %xmm0, %xmm0 # sched: [3:1.00]
1764 ; HASWELL-NEXT: vpbroadcastb (%rdi), %xmm1 # sched: [9:1.00]
1765 ; HASWELL-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1766 ; HASWELL-NEXT: retq # sched: [7:1.00]
1768 ; BROADWELL-LABEL: test_pbroadcastb:
1769 ; BROADWELL: # %bb.0:
1770 ; BROADWELL-NEXT: vpbroadcastb (%rdi), %xmm1 # sched: [9:1.00]
1771 ; BROADWELL-NEXT: vpbroadcastb %xmm0, %xmm0 # sched: [3:1.00]
1772 ; BROADWELL-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1773 ; BROADWELL-NEXT: retq # sched: [7:1.00]
1775 ; SKYLAKE-LABEL: test_pbroadcastb:
1777 ; SKYLAKE-NEXT: vpbroadcastb %xmm0, %xmm0 # sched: [3:1.00]
1778 ; SKYLAKE-NEXT: vpbroadcastb (%rdi), %xmm1 # sched: [7:1.00]
1779 ; SKYLAKE-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
1780 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
1782 ; SKX-LABEL: test_pbroadcastb:
1784 ; SKX-NEXT: vpbroadcastb %xmm0, %xmm0 # sched: [3:1.00]
1785 ; SKX-NEXT: vpbroadcastb (%rdi), %xmm1 # sched: [7:1.00]
1786 ; SKX-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
1787 ; SKX-NEXT: retq # sched: [7:1.00]
1789 ; ZNVER1-LABEL: test_pbroadcastb:
1791 ; ZNVER1-NEXT: vpbroadcastb (%rdi), %xmm1 # sched: [8:1.00]
1792 ; ZNVER1-NEXT: vpbroadcastb %xmm0, %xmm0 # sched: [1:0.25]
1793 ; ZNVER1-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
1794 ; ZNVER1-NEXT: retq # sched: [1:0.50]
1795 %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <16 x i32> zeroinitializer
1796 %2 = load <16 x i8>, <16 x i8> *%a1, align 16
1797 %3 = shufflevector <16 x i8> %2, <16 x i8> undef, <16 x i32> zeroinitializer
1798 %4 = add <16 x i8> %1, %3
1802 define <32 x i8> @test_pbroadcastb_ymm(<32 x i8> %a0, <32 x i8> *%a1) {
1803 ; GENERIC-LABEL: test_pbroadcastb_ymm:
1805 ; GENERIC-NEXT: vpbroadcastb %xmm0, %ymm0 # sched: [1:1.00]
1806 ; GENERIC-NEXT: vpbroadcastb (%rdi), %ymm1 # sched: [7:0.50]
1807 ; GENERIC-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1808 ; GENERIC-NEXT: retq # sched: [1:1.00]
1810 ; HASWELL-LABEL: test_pbroadcastb_ymm:
1812 ; HASWELL-NEXT: vpbroadcastb %xmm0, %ymm0 # sched: [3:1.00]
1813 ; HASWELL-NEXT: vpbroadcastb (%rdi), %ymm1 # sched: [9:1.00]
1814 ; HASWELL-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1815 ; HASWELL-NEXT: retq # sched: [7:1.00]
1817 ; BROADWELL-LABEL: test_pbroadcastb_ymm:
1818 ; BROADWELL: # %bb.0:
1819 ; BROADWELL-NEXT: vpbroadcastb (%rdi), %ymm1 # sched: [9:1.00]
1820 ; BROADWELL-NEXT: vpbroadcastb %xmm0, %ymm0 # sched: [3:1.00]
1821 ; BROADWELL-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1822 ; BROADWELL-NEXT: retq # sched: [7:1.00]
1824 ; SKYLAKE-LABEL: test_pbroadcastb_ymm:
1826 ; SKYLAKE-NEXT: vpbroadcastb %xmm0, %ymm0 # sched: [3:1.00]
1827 ; SKYLAKE-NEXT: vpbroadcastb (%rdi), %ymm1 # sched: [8:1.00]
1828 ; SKYLAKE-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1829 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
1831 ; SKX-LABEL: test_pbroadcastb_ymm:
1833 ; SKX-NEXT: vpbroadcastb %xmm0, %ymm0 # sched: [3:1.00]
1834 ; SKX-NEXT: vpbroadcastb (%rdi), %ymm1 # sched: [8:1.00]
1835 ; SKX-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1836 ; SKX-NEXT: retq # sched: [7:1.00]
1838 ; ZNVER1-LABEL: test_pbroadcastb_ymm:
1840 ; ZNVER1-NEXT: vpbroadcastb (%rdi), %ymm1 # sched: [8:2.00]
1841 ; ZNVER1-NEXT: vpbroadcastb %xmm0, %ymm0 # sched: [2:0.25]
1842 ; ZNVER1-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
1843 ; ZNVER1-NEXT: retq # sched: [1:0.50]
1844 %1 = shufflevector <32 x i8> %a0, <32 x i8> undef, <32 x i32> zeroinitializer
1845 %2 = load <32 x i8>, <32 x i8> *%a1, align 32
1846 %3 = shufflevector <32 x i8> %2, <32 x i8> undef, <32 x i32> zeroinitializer
1847 %4 = add <32 x i8> %1, %3
1851 define <4 x i32> @test_pbroadcastd(<4 x i32> %a0, <4 x i32> *%a1) {
1852 ; GENERIC-LABEL: test_pbroadcastd:
1854 ; GENERIC-NEXT: vpbroadcastd %xmm0, %xmm0 # sched: [1:0.50]
1855 ; GENERIC-NEXT: vpbroadcastd (%rdi), %xmm1 # sched: [7:0.50]
1856 ; GENERIC-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1857 ; GENERIC-NEXT: retq # sched: [1:1.00]
1859 ; HASWELL-LABEL: test_pbroadcastd:
1861 ; HASWELL-NEXT: vpbroadcastd %xmm0, %xmm0 # sched: [1:1.00]
1862 ; HASWELL-NEXT: vpbroadcastd (%rdi), %xmm1 # sched: [6:0.50]
1863 ; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1864 ; HASWELL-NEXT: retq # sched: [7:1.00]
1866 ; BROADWELL-LABEL: test_pbroadcastd:
1867 ; BROADWELL: # %bb.0:
1868 ; BROADWELL-NEXT: vpbroadcastd %xmm0, %xmm0 # sched: [1:1.00]
1869 ; BROADWELL-NEXT: vpbroadcastd (%rdi), %xmm1 # sched: [5:0.50]
1870 ; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1871 ; BROADWELL-NEXT: retq # sched: [7:1.00]
1873 ; SKYLAKE-LABEL: test_pbroadcastd:
1875 ; SKYLAKE-NEXT: vpbroadcastd %xmm0, %xmm0 # sched: [1:1.00]
1876 ; SKYLAKE-NEXT: vpbroadcastd (%rdi), %xmm1 # sched: [6:0.50]
1877 ; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
1878 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
1880 ; SKX-LABEL: test_pbroadcastd:
1882 ; SKX-NEXT: vpbroadcastd %xmm0, %xmm0 # sched: [1:1.00]
1883 ; SKX-NEXT: vpbroadcastd (%rdi), %xmm1 # sched: [6:0.50]
1884 ; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
1885 ; SKX-NEXT: retq # sched: [7:1.00]
1887 ; ZNVER1-LABEL: test_pbroadcastd:
1889 ; ZNVER1-NEXT: vpbroadcastd (%rdi), %xmm1 # sched: [8:0.50]
1890 ; ZNVER1-NEXT: vpbroadcastd %xmm0, %xmm0 # sched: [1:0.25]
1891 ; ZNVER1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
1892 ; ZNVER1-NEXT: retq # sched: [1:0.50]
1893 %1 = shufflevector <4 x i32> %a0, <4 x i32> undef, <4 x i32> zeroinitializer
1894 %2 = load <4 x i32>, <4 x i32> *%a1, align 16
1895 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> zeroinitializer
1896 %4 = add <4 x i32> %1, %3
1900 define <8 x i32> @test_pbroadcastd_ymm(<8 x i32> %a0, <8 x i32> *%a1) {
1901 ; GENERIC-LABEL: test_pbroadcastd_ymm:
1903 ; GENERIC-NEXT: vpbroadcastd %xmm0, %ymm0 # sched: [1:1.00]
1904 ; GENERIC-NEXT: vpbroadcastd (%rdi), %ymm1 # sched: [7:0.50]
1905 ; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1906 ; GENERIC-NEXT: retq # sched: [1:1.00]
1908 ; HASWELL-LABEL: test_pbroadcastd_ymm:
1910 ; HASWELL-NEXT: vpbroadcastd %xmm0, %ymm0 # sched: [3:1.00]
1911 ; HASWELL-NEXT: vpbroadcastd (%rdi), %ymm1 # sched: [7:0.50]
1912 ; HASWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1913 ; HASWELL-NEXT: retq # sched: [7:1.00]
1915 ; BROADWELL-LABEL: test_pbroadcastd_ymm:
1916 ; BROADWELL: # %bb.0:
1917 ; BROADWELL-NEXT: vpbroadcastd %xmm0, %ymm0 # sched: [3:1.00]
1918 ; BROADWELL-NEXT: vpbroadcastd (%rdi), %ymm1 # sched: [6:0.50]
1919 ; BROADWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1920 ; BROADWELL-NEXT: retq # sched: [7:1.00]
1922 ; SKYLAKE-LABEL: test_pbroadcastd_ymm:
1924 ; SKYLAKE-NEXT: vpbroadcastd %xmm0, %ymm0 # sched: [3:1.00]
1925 ; SKYLAKE-NEXT: vpbroadcastd (%rdi), %ymm1 # sched: [7:0.50]
1926 ; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1927 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
1929 ; SKX-LABEL: test_pbroadcastd_ymm:
1931 ; SKX-NEXT: vpbroadcastd %xmm0, %ymm0 # sched: [3:1.00]
1932 ; SKX-NEXT: vpbroadcastd (%rdi), %ymm1 # sched: [7:0.50]
1933 ; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1934 ; SKX-NEXT: retq # sched: [7:1.00]
1936 ; ZNVER1-LABEL: test_pbroadcastd_ymm:
1938 ; ZNVER1-NEXT: vpbroadcastd (%rdi), %ymm1 # sched: [8:0.50]
1939 ; ZNVER1-NEXT: vpbroadcastd %xmm0, %ymm0 # sched: [2:0.25]
1940 ; ZNVER1-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
1941 ; ZNVER1-NEXT: retq # sched: [1:0.50]
1942 %1 = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> zeroinitializer
1943 %2 = load <8 x i32>, <8 x i32> *%a1, align 32
1944 %3 = shufflevector <8 x i32> %2, <8 x i32> undef, <8 x i32> zeroinitializer
1945 %4 = add <8 x i32> %1, %3
1949 define <2 x i64> @test_pbroadcastq(<2 x i64> %a0, <2 x i64> *%a1) {
1950 ; GENERIC-LABEL: test_pbroadcastq:
1952 ; GENERIC-NEXT: vpbroadcastq %xmm0, %xmm0 # sched: [1:0.50]
1953 ; GENERIC-NEXT: vpbroadcastq (%rdi), %xmm1 # sched: [7:0.50]
1954 ; GENERIC-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1955 ; GENERIC-NEXT: retq # sched: [1:1.00]
1957 ; HASWELL-LABEL: test_pbroadcastq:
1959 ; HASWELL-NEXT: vpbroadcastq %xmm0, %xmm0 # sched: [1:1.00]
1960 ; HASWELL-NEXT: vpbroadcastq (%rdi), %xmm1 # sched: [6:0.50]
1961 ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1962 ; HASWELL-NEXT: retq # sched: [7:1.00]
1964 ; BROADWELL-LABEL: test_pbroadcastq:
1965 ; BROADWELL: # %bb.0:
1966 ; BROADWELL-NEXT: vpbroadcastq %xmm0, %xmm0 # sched: [1:1.00]
1967 ; BROADWELL-NEXT: vpbroadcastq (%rdi), %xmm1 # sched: [5:0.50]
1968 ; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1969 ; BROADWELL-NEXT: retq # sched: [7:1.00]
1971 ; SKYLAKE-LABEL: test_pbroadcastq:
1973 ; SKYLAKE-NEXT: vpbroadcastq %xmm0, %xmm0 # sched: [1:1.00]
1974 ; SKYLAKE-NEXT: vpbroadcastq (%rdi), %xmm1 # sched: [6:0.50]
1975 ; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
1976 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
1978 ; SKX-LABEL: test_pbroadcastq:
1980 ; SKX-NEXT: vpbroadcastq %xmm0, %xmm0 # sched: [1:1.00]
1981 ; SKX-NEXT: vpbroadcastq (%rdi), %xmm1 # sched: [6:0.50]
1982 ; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
1983 ; SKX-NEXT: retq # sched: [7:1.00]
1985 ; ZNVER1-LABEL: test_pbroadcastq:
1987 ; ZNVER1-NEXT: vpbroadcastq (%rdi), %xmm1 # sched: [8:0.50]
1988 ; ZNVER1-NEXT: vpbroadcastq %xmm0, %xmm0 # sched: [1:0.25]
1989 ; ZNVER1-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
1990 ; ZNVER1-NEXT: retq # sched: [1:0.50]
1991 %1 = shufflevector <2 x i64> %a0, <2 x i64> undef, <2 x i32> zeroinitializer
1992 %2 = load <2 x i64>, <2 x i64> *%a1, align 16
1993 %3 = shufflevector <2 x i64> %2, <2 x i64> undef, <2 x i32> zeroinitializer
1994 %4 = add <2 x i64> %1, %3
1998 define <4 x i64> @test_pbroadcastq_ymm(<4 x i64> %a0, <4 x i64> *%a1) {
1999 ; GENERIC-LABEL: test_pbroadcastq_ymm:
2001 ; GENERIC-NEXT: vpbroadcastq %xmm0, %ymm0 # sched: [1:1.00]
2002 ; GENERIC-NEXT: vpbroadcastq (%rdi), %ymm1 # sched: [7:0.50]
2003 ; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2004 ; GENERIC-NEXT: retq # sched: [1:1.00]
2006 ; HASWELL-LABEL: test_pbroadcastq_ymm:
2008 ; HASWELL-NEXT: vpbroadcastq %xmm0, %ymm0 # sched: [3:1.00]
2009 ; HASWELL-NEXT: vpbroadcastq (%rdi), %ymm1 # sched: [7:0.50]
2010 ; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2011 ; HASWELL-NEXT: retq # sched: [7:1.00]
2013 ; BROADWELL-LABEL: test_pbroadcastq_ymm:
2014 ; BROADWELL: # %bb.0:
2015 ; BROADWELL-NEXT: vpbroadcastq %xmm0, %ymm0 # sched: [3:1.00]
2016 ; BROADWELL-NEXT: vpbroadcastq (%rdi), %ymm1 # sched: [6:0.50]
2017 ; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2018 ; BROADWELL-NEXT: retq # sched: [7:1.00]
2020 ; SKYLAKE-LABEL: test_pbroadcastq_ymm:
2022 ; SKYLAKE-NEXT: vpbroadcastq %xmm0, %ymm0 # sched: [3:1.00]
2023 ; SKYLAKE-NEXT: vpbroadcastq (%rdi), %ymm1 # sched: [7:0.50]
2024 ; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
2025 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
2027 ; SKX-LABEL: test_pbroadcastq_ymm:
2029 ; SKX-NEXT: vpbroadcastq %xmm0, %ymm0 # sched: [3:1.00]
2030 ; SKX-NEXT: vpbroadcastq (%rdi), %ymm1 # sched: [7:0.50]
2031 ; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
2032 ; SKX-NEXT: retq # sched: [7:1.00]
2034 ; ZNVER1-LABEL: test_pbroadcastq_ymm:
2036 ; ZNVER1-NEXT: vpbroadcastq (%rdi), %ymm1 # sched: [8:0.50]
2037 ; ZNVER1-NEXT: vpbroadcastq %xmm0, %ymm0 # sched: [2:0.25]
2038 ; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
2039 ; ZNVER1-NEXT: retq # sched: [1:0.50]
2040 %1 = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32> zeroinitializer
2041 %2 = load <4 x i64>, <4 x i64> *%a1, align 32
2042 %3 = shufflevector <4 x i64> %2, <4 x i64> undef, <4 x i32> zeroinitializer
2043 %4 = add <4 x i64> %1, %3
2047 define <8 x i16> @test_pbroadcastw(<8 x i16> %a0, <8 x i16> *%a1) {
2048 ; GENERIC-LABEL: test_pbroadcastw:
2050 ; GENERIC-NEXT: vpbroadcastw %xmm0, %xmm0 # sched: [1:0.50]
2051 ; GENERIC-NEXT: vpbroadcastw (%rdi), %xmm1 # sched: [7:0.50]
2052 ; GENERIC-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
2053 ; GENERIC-NEXT: retq # sched: [1:1.00]
2055 ; HASWELL-LABEL: test_pbroadcastw:
2057 ; HASWELL-NEXT: vpbroadcastw %xmm0, %xmm0 # sched: [3:1.00]
2058 ; HASWELL-NEXT: vpbroadcastw (%rdi), %xmm1 # sched: [9:1.00]
2059 ; HASWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
2060 ; HASWELL-NEXT: retq # sched: [7:1.00]
2062 ; BROADWELL-LABEL: test_pbroadcastw:
2063 ; BROADWELL: # %bb.0:
2064 ; BROADWELL-NEXT: vpbroadcastw (%rdi), %xmm1 # sched: [9:1.00]
2065 ; BROADWELL-NEXT: vpbroadcastw %xmm0, %xmm0 # sched: [3:1.00]
2066 ; BROADWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
2067 ; BROADWELL-NEXT: retq # sched: [7:1.00]
2069 ; SKYLAKE-LABEL: test_pbroadcastw:
2071 ; SKYLAKE-NEXT: vpbroadcastw %xmm0, %xmm0 # sched: [3:1.00]
2072 ; SKYLAKE-NEXT: vpbroadcastw (%rdi), %xmm1 # sched: [7:1.00]
2073 ; SKYLAKE-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
2074 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
2076 ; SKX-LABEL: test_pbroadcastw:
2078 ; SKX-NEXT: vpbroadcastw %xmm0, %xmm0 # sched: [3:1.00]
2079 ; SKX-NEXT: vpbroadcastw (%rdi), %xmm1 # sched: [7:1.00]
2080 ; SKX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
2081 ; SKX-NEXT: retq # sched: [7:1.00]
2083 ; ZNVER1-LABEL: test_pbroadcastw:
2085 ; ZNVER1-NEXT: vpbroadcastw (%rdi), %xmm1 # sched: [8:1.00]
2086 ; ZNVER1-NEXT: vpbroadcastw %xmm0, %xmm0 # sched: [1:0.25]
2087 ; ZNVER1-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
2088 ; ZNVER1-NEXT: retq # sched: [1:0.50]
2089 %1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> zeroinitializer
2090 %2 = load <8 x i16>, <8 x i16> *%a1, align 16
2091 %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> zeroinitializer
2092 %4 = add <8 x i16> %1, %3
2096 define <16 x i16> @test_pbroadcastw_ymm(<16 x i16> %a0, <16 x i16> *%a1) {
2097 ; GENERIC-LABEL: test_pbroadcastw_ymm:
2099 ; GENERIC-NEXT: vpbroadcastw %xmm0, %ymm0 # sched: [1:1.00]
2100 ; GENERIC-NEXT: vpbroadcastw (%rdi), %ymm1 # sched: [7:0.50]
2101 ; GENERIC-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2102 ; GENERIC-NEXT: retq # sched: [1:1.00]
2104 ; HASWELL-LABEL: test_pbroadcastw_ymm:
2106 ; HASWELL-NEXT: vpbroadcastw %xmm0, %ymm0 # sched: [3:1.00]
2107 ; HASWELL-NEXT: vpbroadcastw (%rdi), %ymm1 # sched: [9:1.00]
2108 ; HASWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2109 ; HASWELL-NEXT: retq # sched: [7:1.00]
2111 ; BROADWELL-LABEL: test_pbroadcastw_ymm:
2112 ; BROADWELL: # %bb.0:
2113 ; BROADWELL-NEXT: vpbroadcastw (%rdi), %ymm1 # sched: [9:1.00]
2114 ; BROADWELL-NEXT: vpbroadcastw %xmm0, %ymm0 # sched: [3:1.00]
2115 ; BROADWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2116 ; BROADWELL-NEXT: retq # sched: [7:1.00]
2118 ; SKYLAKE-LABEL: test_pbroadcastw_ymm:
2120 ; SKYLAKE-NEXT: vpbroadcastw %xmm0, %ymm0 # sched: [3:1.00]
2121 ; SKYLAKE-NEXT: vpbroadcastw (%rdi), %ymm1 # sched: [8:1.00]
2122 ; SKYLAKE-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
2123 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
2125 ; SKX-LABEL: test_pbroadcastw_ymm:
2127 ; SKX-NEXT: vpbroadcastw %xmm0, %ymm0 # sched: [3:1.00]
2128 ; SKX-NEXT: vpbroadcastw (%rdi), %ymm1 # sched: [8:1.00]
2129 ; SKX-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
2130 ; SKX-NEXT: retq # sched: [7:1.00]
2132 ; ZNVER1-LABEL: test_pbroadcastw_ymm:
2134 ; ZNVER1-NEXT: vpbroadcastw (%rdi), %ymm1 # sched: [8:2.00]
2135 ; ZNVER1-NEXT: vpbroadcastw %xmm0, %ymm0 # sched: [2:0.25]
2136 ; ZNVER1-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
2137 ; ZNVER1-NEXT: retq # sched: [1:0.50]
2138 %1 = shufflevector <16 x i16> %a0, <16 x i16> undef, <16 x i32> zeroinitializer
2139 %2 = load <16 x i16>, <16 x i16> *%a1, align 32
2140 %3 = shufflevector <16 x i16> %2, <16 x i16> undef, <16 x i32> zeroinitializer
2141 %4 = add <16 x i16> %1, %3
2145 define <32 x i8> @test_pcmpeqb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
2146 ; GENERIC-LABEL: test_pcmpeqb:
2148 ; GENERIC-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2149 ; GENERIC-NEXT: vpcmpeqb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2150 ; GENERIC-NEXT: retq # sched: [1:1.00]
2152 ; HASWELL-LABEL: test_pcmpeqb:
2154 ; HASWELL-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2155 ; HASWELL-NEXT: vpcmpeqb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2156 ; HASWELL-NEXT: retq # sched: [7:1.00]
2158 ; BROADWELL-LABEL: test_pcmpeqb:
2159 ; BROADWELL: # %bb.0:
2160 ; BROADWELL-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2161 ; BROADWELL-NEXT: vpcmpeqb (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
2162 ; BROADWELL-NEXT: retq # sched: [7:1.00]
2164 ; SKYLAKE-LABEL: test_pcmpeqb:
2166 ; SKYLAKE-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2167 ; SKYLAKE-NEXT: vpcmpeqb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2168 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
2170 ; SKX-LABEL: test_pcmpeqb:
2172 ; SKX-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2173 ; SKX-NEXT: vpcmpeqb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2174 ; SKX-NEXT: retq # sched: [7:1.00]
2176 ; ZNVER1-LABEL: test_pcmpeqb:
2178 ; ZNVER1-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
2179 ; ZNVER1-NEXT: vpcmpeqb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2180 ; ZNVER1-NEXT: retq # sched: [1:0.50]
2181 %1 = icmp eq <32 x i8> %a0, %a1
2182 %2 = sext <32 x i1> %1 to <32 x i8>
2183 %3 = load <32 x i8>, <32 x i8> *%a2, align 32
2184 %4 = icmp eq <32 x i8> %2, %3
2185 %5 = sext <32 x i1> %4 to <32 x i8>
2189 define <8 x i32> @test_pcmpeqd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
2190 ; GENERIC-LABEL: test_pcmpeqd:
2192 ; GENERIC-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2193 ; GENERIC-NEXT: vpcmpeqd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2194 ; GENERIC-NEXT: retq # sched: [1:1.00]
2196 ; HASWELL-LABEL: test_pcmpeqd:
2198 ; HASWELL-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2199 ; HASWELL-NEXT: vpcmpeqd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2200 ; HASWELL-NEXT: retq # sched: [7:1.00]
2202 ; BROADWELL-LABEL: test_pcmpeqd:
2203 ; BROADWELL: # %bb.0:
2204 ; BROADWELL-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2205 ; BROADWELL-NEXT: vpcmpeqd (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
2206 ; BROADWELL-NEXT: retq # sched: [7:1.00]
2208 ; SKYLAKE-LABEL: test_pcmpeqd:
2210 ; SKYLAKE-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2211 ; SKYLAKE-NEXT: vpcmpeqd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2212 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
2214 ; SKX-LABEL: test_pcmpeqd:
2216 ; SKX-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2217 ; SKX-NEXT: vpcmpeqd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2218 ; SKX-NEXT: retq # sched: [7:1.00]
2220 ; ZNVER1-LABEL: test_pcmpeqd:
2222 ; ZNVER1-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
2223 ; ZNVER1-NEXT: vpcmpeqd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2224 ; ZNVER1-NEXT: retq # sched: [1:0.50]
2225 %1 = icmp eq <8 x i32> %a0, %a1
2226 %2 = sext <8 x i1> %1 to <8 x i32>
2227 %3 = load <8 x i32>, <8 x i32> *%a2, align 32
2228 %4 = icmp eq <8 x i32> %2, %3
2229 %5 = sext <8 x i1> %4 to <8 x i32>
2233 define <4 x i64> @test_pcmpeqq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
2234 ; GENERIC-LABEL: test_pcmpeqq:
2236 ; GENERIC-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2237 ; GENERIC-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2238 ; GENERIC-NEXT: retq # sched: [1:1.00]
2240 ; HASWELL-LABEL: test_pcmpeqq:
2242 ; HASWELL-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2243 ; HASWELL-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2244 ; HASWELL-NEXT: retq # sched: [7:1.00]
2246 ; BROADWELL-LABEL: test_pcmpeqq:
2247 ; BROADWELL: # %bb.0:
2248 ; BROADWELL-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2249 ; BROADWELL-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
2250 ; BROADWELL-NEXT: retq # sched: [7:1.00]
2252 ; SKYLAKE-LABEL: test_pcmpeqq:
2254 ; SKYLAKE-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2255 ; SKYLAKE-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2256 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
2258 ; SKX-LABEL: test_pcmpeqq:
2260 ; SKX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2261 ; SKX-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2262 ; SKX-NEXT: retq # sched: [7:1.00]
2264 ; ZNVER1-LABEL: test_pcmpeqq:
2266 ; ZNVER1-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
2267 ; ZNVER1-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2268 ; ZNVER1-NEXT: retq # sched: [1:0.50]
2269 %1 = icmp eq <4 x i64> %a0, %a1
2270 %2 = sext <4 x i1> %1 to <4 x i64>
2271 %3 = load <4 x i64>, <4 x i64> *%a2, align 32
2272 %4 = icmp eq <4 x i64> %2, %3
2273 %5 = sext <4 x i1> %4 to <4 x i64>
2277 define <16 x i16> @test_pcmpeqw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
2278 ; GENERIC-LABEL: test_pcmpeqw:
2280 ; GENERIC-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2281 ; GENERIC-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2282 ; GENERIC-NEXT: retq # sched: [1:1.00]
2284 ; HASWELL-LABEL: test_pcmpeqw:
2286 ; HASWELL-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2287 ; HASWELL-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2288 ; HASWELL-NEXT: retq # sched: [7:1.00]
2290 ; BROADWELL-LABEL: test_pcmpeqw:
2291 ; BROADWELL: # %bb.0:
2292 ; BROADWELL-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2293 ; BROADWELL-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
2294 ; BROADWELL-NEXT: retq # sched: [7:1.00]
2296 ; SKYLAKE-LABEL: test_pcmpeqw:
2298 ; SKYLAKE-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2299 ; SKYLAKE-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2300 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
2302 ; SKX-LABEL: test_pcmpeqw:
2304 ; SKX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2305 ; SKX-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2306 ; SKX-NEXT: retq # sched: [7:1.00]
2308 ; ZNVER1-LABEL: test_pcmpeqw:
2310 ; ZNVER1-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
2311 ; ZNVER1-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2312 ; ZNVER1-NEXT: retq # sched: [1:0.50]
2313 %1 = icmp eq <16 x i16> %a0, %a1
2314 %2 = sext <16 x i1> %1 to <16 x i16>
2315 %3 = load <16 x i16>, <16 x i16> *%a2, align 32
2316 %4 = icmp eq <16 x i16> %2, %3
2317 %5 = sext <16 x i1> %4 to <16 x i16>
2321 define <32 x i8> @test_pcmpgtb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
2322 ; GENERIC-LABEL: test_pcmpgtb:
2324 ; GENERIC-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2325 ; GENERIC-NEXT: vpcmpgtb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2326 ; GENERIC-NEXT: retq # sched: [1:1.00]
2328 ; HASWELL-LABEL: test_pcmpgtb:
2330 ; HASWELL-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2331 ; HASWELL-NEXT: vpcmpgtb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2332 ; HASWELL-NEXT: retq # sched: [7:1.00]
2334 ; BROADWELL-LABEL: test_pcmpgtb:
2335 ; BROADWELL: # %bb.0:
2336 ; BROADWELL-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2337 ; BROADWELL-NEXT: vpcmpgtb (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
2338 ; BROADWELL-NEXT: retq # sched: [7:1.00]
2340 ; SKYLAKE-LABEL: test_pcmpgtb:
2342 ; SKYLAKE-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2343 ; SKYLAKE-NEXT: vpcmpgtb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2344 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
2346 ; SKX-LABEL: test_pcmpgtb:
2348 ; SKX-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2349 ; SKX-NEXT: vpcmpgtb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2350 ; SKX-NEXT: retq # sched: [7:1.00]
2352 ; ZNVER1-LABEL: test_pcmpgtb:
2354 ; ZNVER1-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
2355 ; ZNVER1-NEXT: vpcmpgtb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2356 ; ZNVER1-NEXT: retq # sched: [1:0.50]
2357 %1 = icmp sgt <32 x i8> %a0, %a1
2358 %2 = sext <32 x i1> %1 to <32 x i8>
2359 %3 = load <32 x i8>, <32 x i8> *%a2, align 32
2360 %4 = icmp sgt <32 x i8> %2, %3
2361 %5 = sext <32 x i1> %4 to <32 x i8>
2365 define <8 x i32> @test_pcmpgtd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
2366 ; GENERIC-LABEL: test_pcmpgtd:
2368 ; GENERIC-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2369 ; GENERIC-NEXT: vpcmpgtd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2370 ; GENERIC-NEXT: retq # sched: [1:1.00]
2372 ; HASWELL-LABEL: test_pcmpgtd:
2374 ; HASWELL-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2375 ; HASWELL-NEXT: vpcmpgtd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2376 ; HASWELL-NEXT: retq # sched: [7:1.00]
2378 ; BROADWELL-LABEL: test_pcmpgtd:
2379 ; BROADWELL: # %bb.0:
2380 ; BROADWELL-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2381 ; BROADWELL-NEXT: vpcmpgtd (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
2382 ; BROADWELL-NEXT: retq # sched: [7:1.00]
2384 ; SKYLAKE-LABEL: test_pcmpgtd:
2386 ; SKYLAKE-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2387 ; SKYLAKE-NEXT: vpcmpgtd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2388 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
2390 ; SKX-LABEL: test_pcmpgtd:
2392 ; SKX-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2393 ; SKX-NEXT: vpcmpgtd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2394 ; SKX-NEXT: retq # sched: [7:1.00]
2396 ; ZNVER1-LABEL: test_pcmpgtd:
2398 ; ZNVER1-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
2399 ; ZNVER1-NEXT: vpcmpgtd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2400 ; ZNVER1-NEXT: retq # sched: [1:0.50]
2401 %1 = icmp sgt <8 x i32> %a0, %a1
2402 %2 = sext <8 x i1> %1 to <8 x i32>
2403 %3 = load <8 x i32>, <8 x i32> *%a2, align 32
2404 %4 = icmp sgt <8 x i32> %2, %3
2405 %5 = sext <8 x i1> %4 to <8 x i32>
2409 define <4 x i64> @test_pcmpgtq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
2410 ; GENERIC-LABEL: test_pcmpgtq:
2412 ; GENERIC-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2413 ; GENERIC-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2414 ; GENERIC-NEXT: retq # sched: [1:1.00]
2416 ; HASWELL-LABEL: test_pcmpgtq:
2418 ; HASWELL-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
2419 ; HASWELL-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
2420 ; HASWELL-NEXT: retq # sched: [7:1.00]
2422 ; BROADWELL-LABEL: test_pcmpgtq:
2423 ; BROADWELL: # %bb.0:
2424 ; BROADWELL-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
2425 ; BROADWELL-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
2426 ; BROADWELL-NEXT: retq # sched: [7:1.00]
2428 ; SKYLAKE-LABEL: test_pcmpgtq:
2430 ; SKYLAKE-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
2431 ; SKYLAKE-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
2432 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
2434 ; SKX-LABEL: test_pcmpgtq:
2436 ; SKX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
2437 ; SKX-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
2438 ; SKX-NEXT: retq # sched: [7:1.00]
2440 ; ZNVER1-LABEL: test_pcmpgtq:
2442 ; ZNVER1-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2443 ; ZNVER1-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
2444 ; ZNVER1-NEXT: retq # sched: [1:0.50]
2445 %1 = icmp sgt <4 x i64> %a0, %a1
2446 %2 = sext <4 x i1> %1 to <4 x i64>
2447 %3 = load <4 x i64>, <4 x i64> *%a2, align 32
2448 %4 = icmp sgt <4 x i64> %2, %3
2449 %5 = sext <4 x i1> %4 to <4 x i64>
2453 define <16 x i16> @test_pcmpgtw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
2454 ; GENERIC-LABEL: test_pcmpgtw:
2456 ; GENERIC-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2457 ; GENERIC-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2458 ; GENERIC-NEXT: retq # sched: [1:1.00]
2460 ; HASWELL-LABEL: test_pcmpgtw:
2462 ; HASWELL-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2463 ; HASWELL-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2464 ; HASWELL-NEXT: retq # sched: [7:1.00]
2466 ; BROADWELL-LABEL: test_pcmpgtw:
2467 ; BROADWELL: # %bb.0:
2468 ; BROADWELL-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2469 ; BROADWELL-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
2470 ; BROADWELL-NEXT: retq # sched: [7:1.00]
2472 ; SKYLAKE-LABEL: test_pcmpgtw:
2474 ; SKYLAKE-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2475 ; SKYLAKE-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2476 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
2478 ; SKX-LABEL: test_pcmpgtw:
2480 ; SKX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2481 ; SKX-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2482 ; SKX-NEXT: retq # sched: [7:1.00]
2484 ; ZNVER1-LABEL: test_pcmpgtw:
2486 ; ZNVER1-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
2487 ; ZNVER1-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2488 ; ZNVER1-NEXT: retq # sched: [1:0.50]
2489 %1 = icmp sgt <16 x i16> %a0, %a1
2490 %2 = sext <16 x i1> %1 to <16 x i16>
2491 %3 = load <16 x i16>, <16 x i16> *%a2, align 32
2492 %4 = icmp sgt <16 x i16> %2, %3
2493 %5 = sext <16 x i1> %4 to <16 x i16>
2497 define <4 x i64> @test_perm2i128(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
2498 ; GENERIC-LABEL: test_perm2i128:
2500 ; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [1:1.00]
2501 ; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [8:1.00]
2502 ; GENERIC-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
2503 ; GENERIC-NEXT: retq # sched: [1:1.00]
2505 ; HASWELL-LABEL: test_perm2i128:
2507 ; HASWELL-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [3:1.00]
2508 ; HASWELL-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [10:1.00]
2509 ; HASWELL-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
2510 ; HASWELL-NEXT: retq # sched: [7:1.00]
2512 ; BROADWELL-LABEL: test_perm2i128:
2513 ; BROADWELL: # %bb.0:
2514 ; BROADWELL-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [3:1.00]
2515 ; BROADWELL-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [9:1.00]
2516 ; BROADWELL-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
2517 ; BROADWELL-NEXT: retq # sched: [7:1.00]
2519 ; SKYLAKE-LABEL: test_perm2i128:
2521 ; SKYLAKE-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [3:1.00]
2522 ; SKYLAKE-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [10:1.00]
2523 ; SKYLAKE-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
2524 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
2526 ; SKX-LABEL: test_perm2i128:
2528 ; SKX-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [3:1.00]
2529 ; SKX-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [10:1.00]
2530 ; SKX-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
2531 ; SKX-NEXT: retq # sched: [7:1.00]
2533 ; ZNVER1-LABEL: test_perm2i128:
2535 ; ZNVER1-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [2:0.25]
2536 ; ZNVER1-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [9:0.50]
2537 ; ZNVER1-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.25]
2538 ; ZNVER1-NEXT: retq # sched: [1:0.50]
2539 %1 = shufflevector <4 x i64> %a0, <4 x i64> %a1, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
2540 %2 = load <4 x i64>, <4 x i64> *%a2, align 32
2541 %3 = shufflevector <4 x i64> %a0, <4 x i64> %2, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
2542 %4 = add <4 x i64> %1, %3
2546 define <8 x i32> @test_permd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
2547 ; GENERIC-LABEL: test_permd:
2549 ; GENERIC-NEXT: vpermd %ymm1, %ymm0, %ymm1 # sched: [1:1.00]
2550 ; GENERIC-NEXT: vpermd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
2551 ; GENERIC-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
2552 ; GENERIC-NEXT: retq # sched: [1:1.00]
2554 ; HASWELL-LABEL: test_permd:
2556 ; HASWELL-NEXT: vpermd %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
2557 ; HASWELL-NEXT: vpermd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
2558 ; HASWELL-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
2559 ; HASWELL-NEXT: retq # sched: [7:1.00]
2561 ; BROADWELL-LABEL: test_permd:
2562 ; BROADWELL: # %bb.0:
2563 ; BROADWELL-NEXT: vpermd %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
2564 ; BROADWELL-NEXT: vpermd (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
2565 ; BROADWELL-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
2566 ; BROADWELL-NEXT: retq # sched: [7:1.00]
2568 ; SKYLAKE-LABEL: test_permd:
2570 ; SKYLAKE-NEXT: vpermd %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
2571 ; SKYLAKE-NEXT: vpermd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
2572 ; SKYLAKE-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
2573 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
2575 ; SKX-LABEL: test_permd:
2577 ; SKX-NEXT: vpermd %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
2578 ; SKX-NEXT: vpermd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
2579 ; SKX-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
2580 ; SKX-NEXT: retq # sched: [7:1.00]
2582 ; ZNVER1-LABEL: test_permd:
2584 ; ZNVER1-NEXT: vpermd %ymm1, %ymm0, %ymm1 # sched: [2:0.25]
2585 ; ZNVER1-NEXT: vpermd (%rdi), %ymm0, %ymm0 # sched: [9:0.50]
2586 ; ZNVER1-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.25]
2587 ; ZNVER1-NEXT: retq # sched: [1:0.50]
2588 %1 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a1, <8 x i32> %a0)
2589 %2 = load <8 x i32>, <8 x i32> *%a2, align 32
2590 %3 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %2, <8 x i32> %a0)
2591 %4 = add <8 x i32> %1, %3
2594 declare <8 x i32> @llvm.x86.avx2.permd(<8 x i32>, <8 x i32>) nounwind readonly
2596 define <4 x double> @test_permpd(<4 x double> %a0, <4 x double> *%a1) {
2597 ; GENERIC-LABEL: test_permpd:
2599 ; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [1:1.00]
2600 ; GENERIC-NEXT: vpermpd {{.*#+}} ymm1 = mem[0,2,2,3] sched: [8:1.00]
2601 ; GENERIC-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
2602 ; GENERIC-NEXT: retq # sched: [1:1.00]
2604 ; HASWELL-LABEL: test_permpd:
2606 ; HASWELL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [3:1.00]
2607 ; HASWELL-NEXT: vpermpd {{.*#+}} ymm1 = mem[0,2,2,3] sched: [10:1.00]
2608 ; HASWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
2609 ; HASWELL-NEXT: retq # sched: [7:1.00]
2611 ; BROADWELL-LABEL: test_permpd:
2612 ; BROADWELL: # %bb.0:
2613 ; BROADWELL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [3:1.00]
2614 ; BROADWELL-NEXT: vpermpd {{.*#+}} ymm1 = mem[0,2,2,3] sched: [9:1.00]
2615 ; BROADWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
2616 ; BROADWELL-NEXT: retq # sched: [7:1.00]
2618 ; SKYLAKE-LABEL: test_permpd:
2620 ; SKYLAKE-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [3:1.00]
2621 ; SKYLAKE-NEXT: vpermpd {{.*#+}} ymm1 = mem[0,2,2,3] sched: [10:1.00]
2622 ; SKYLAKE-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
2623 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
2625 ; SKX-LABEL: test_permpd:
2627 ; SKX-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [3:1.00]
2628 ; SKX-NEXT: vpermpd {{.*#+}} ymm1 = mem[0,2,2,3] sched: [10:1.00]
2629 ; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
2630 ; SKX-NEXT: retq # sched: [7:1.00]
2632 ; ZNVER1-LABEL: test_permpd:
2634 ; ZNVER1-NEXT: vpermpd {{.*#+}} ymm1 = mem[0,2,2,3] sched: [107:0.50]
2635 ; ZNVER1-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [100:0.25]
2636 ; ZNVER1-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
2637 ; ZNVER1-NEXT: retq # sched: [1:0.50]
2638 %1 = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 2, i32 3>
2639 %2 = load <4 x double>, <4 x double> *%a1, align 32
2640 %3 = shufflevector <4 x double> %2, <4 x double> undef, <4 x i32> <i32 0, i32 2, i32 2, i32 3>
2641 %4 = fadd <4 x double> %1, %3
2645 define <8 x float> @test_permps(<8 x i32> %a0, <8 x float> %a1, <8 x float> *%a2) {
2646 ; GENERIC-LABEL: test_permps:
2648 ; GENERIC-NEXT: vpermps %ymm1, %ymm0, %ymm1 # sched: [1:1.00]
2649 ; GENERIC-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
2650 ; GENERIC-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
2651 ; GENERIC-NEXT: retq # sched: [1:1.00]
2653 ; HASWELL-LABEL: test_permps:
2655 ; HASWELL-NEXT: vpermps %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
2656 ; HASWELL-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
2657 ; HASWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
2658 ; HASWELL-NEXT: retq # sched: [7:1.00]
2660 ; BROADWELL-LABEL: test_permps:
2661 ; BROADWELL: # %bb.0:
2662 ; BROADWELL-NEXT: vpermps %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
2663 ; BROADWELL-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
2664 ; BROADWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
2665 ; BROADWELL-NEXT: retq # sched: [7:1.00]
2667 ; SKYLAKE-LABEL: test_permps:
2669 ; SKYLAKE-NEXT: vpermps %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
2670 ; SKYLAKE-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
2671 ; SKYLAKE-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
2672 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
2674 ; SKX-LABEL: test_permps:
2676 ; SKX-NEXT: vpermps %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
2677 ; SKX-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
2678 ; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
2679 ; SKX-NEXT: retq # sched: [7:1.00]
2681 ; ZNVER1-LABEL: test_permps:
2683 ; ZNVER1-NEXT: vpermps %ymm1, %ymm0, %ymm1 # sched: [100:0.25]
2684 ; ZNVER1-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [107:0.50]
2685 ; ZNVER1-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
2686 ; ZNVER1-NEXT: retq # sched: [1:0.50]
2687 %1 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a1, <8 x i32> %a0)
2688 %2 = load <8 x float>, <8 x float> *%a2, align 32
2689 %3 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %2, <8 x i32> %a0)
2690 %4 = fadd <8 x float> %1, %3
2693 declare <8 x float> @llvm.x86.avx2.permps(<8 x float>, <8 x i32>) nounwind readonly
2695 define <4 x i64> @test_permq(<4 x i64> %a0, <4 x i64> *%a1) {
2696 ; GENERIC-LABEL: test_permq:
2698 ; GENERIC-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [1:1.00]
2699 ; GENERIC-NEXT: vpermq {{.*#+}} ymm1 = mem[0,2,2,3] sched: [8:1.00]
2700 ; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2701 ; GENERIC-NEXT: retq # sched: [1:1.00]
2703 ; HASWELL-LABEL: test_permq:
2705 ; HASWELL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [3:1.00]
2706 ; HASWELL-NEXT: vpermq {{.*#+}} ymm1 = mem[0,2,2,3] sched: [10:1.00]
2707 ; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2708 ; HASWELL-NEXT: retq # sched: [7:1.00]
2710 ; BROADWELL-LABEL: test_permq:
2711 ; BROADWELL: # %bb.0:
2712 ; BROADWELL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [3:1.00]
2713 ; BROADWELL-NEXT: vpermq {{.*#+}} ymm1 = mem[0,2,2,3] sched: [9:1.00]
2714 ; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2715 ; BROADWELL-NEXT: retq # sched: [7:1.00]
2717 ; SKYLAKE-LABEL: test_permq:
2719 ; SKYLAKE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [3:1.00]
2720 ; SKYLAKE-NEXT: vpermq {{.*#+}} ymm1 = mem[0,2,2,3] sched: [10:1.00]
2721 ; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
2722 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
2724 ; SKX-LABEL: test_permq:
2726 ; SKX-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [3:1.00]
2727 ; SKX-NEXT: vpermq {{.*#+}} ymm1 = mem[0,2,2,3] sched: [10:1.00]
2728 ; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
2729 ; SKX-NEXT: retq # sched: [7:1.00]
2731 ; ZNVER1-LABEL: test_permq:
2733 ; ZNVER1-NEXT: vpermq {{.*#+}} ymm1 = mem[0,2,2,3] sched: [9:0.50]
2734 ; ZNVER1-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [2:0.25]
2735 ; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
2736 ; ZNVER1-NEXT: retq # sched: [1:0.50]
2737 %1 = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32> <i32 3, i32 2, i32 2, i32 3>
2738 %2 = load <4 x i64>, <4 x i64> *%a1, align 32
2739 %3 = shufflevector <4 x i64> %2, <4 x i64> undef, <4 x i32> <i32 0, i32 2, i32 2, i32 3>
2740 %4 = add <4 x i64> %1, %3
2744 define <4 x i32> @test_pgatherdd(<4 x i32> %a0, i8* %a1, <4 x i32> %a2, <4 x i32> %a3) {
2745 ; GENERIC-LABEL: test_pgatherdd:
2747 ; GENERIC-NEXT: vpgatherdd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [5:0.50]
2748 ; GENERIC-NEXT: retq # sched: [1:1.00]
2750 ; HASWELL-LABEL: test_pgatherdd:
2752 ; HASWELL-NEXT: vpgatherdd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [26:2.67]
2753 ; HASWELL-NEXT: retq # sched: [7:1.00]
2755 ; BROADWELL-LABEL: test_pgatherdd:
2756 ; BROADWELL: # %bb.0:
2757 ; BROADWELL-NEXT: vpgatherdd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [5:0.50]
2758 ; BROADWELL-NEXT: retq # sched: [7:1.00]
2760 ; SKYLAKE-LABEL: test_pgatherdd:
2762 ; SKYLAKE-NEXT: vpgatherdd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
2763 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
2765 ; SKX-LABEL: test_pgatherdd:
2767 ; SKX-NEXT: vpgatherdd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
2768 ; SKX-NEXT: retq # sched: [7:1.00]
2770 ; ZNVER1-LABEL: test_pgatherdd:
2772 ; ZNVER1-NEXT: vpgatherdd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [100:0.25]
2773 ; ZNVER1-NEXT: retq # sched: [1:0.50]
2774 %1 = call <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32> %a0, i8* %a1, <4 x i32> %a2, <4 x i32> %a3, i8 2)
2777 declare <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32>, i8*, <4 x i32>, <4 x i32>, i8) nounwind readonly
2779 define <8 x i32> @test_pgatherdd_ymm(<8 x i32> %a0, i8* %a1, <8 x i32> %a2, <8 x i32> %a3) {
2780 ; GENERIC-LABEL: test_pgatherdd_ymm:
2782 ; GENERIC-NEXT: vpgatherdd %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [5:0.50]
2783 ; GENERIC-NEXT: retq # sched: [1:1.00]
2785 ; HASWELL-LABEL: test_pgatherdd_ymm:
2787 ; HASWELL-NEXT: vpgatherdd %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [27:6.50]
2788 ; HASWELL-NEXT: retq # sched: [7:1.00]
2790 ; BROADWELL-LABEL: test_pgatherdd_ymm:
2791 ; BROADWELL: # %bb.0:
2792 ; BROADWELL-NEXT: vpgatherdd %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [5:0.50]
2793 ; BROADWELL-NEXT: retq # sched: [7:1.00]
2795 ; SKYLAKE-LABEL: test_pgatherdd_ymm:
2797 ; SKYLAKE-NEXT: vpgatherdd %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [25:1.00]
2798 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
2800 ; SKX-LABEL: test_pgatherdd_ymm:
2802 ; SKX-NEXT: vpgatherdd %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [25:1.00]
2803 ; SKX-NEXT: retq # sched: [7:1.00]
2805 ; ZNVER1-LABEL: test_pgatherdd_ymm:
2807 ; ZNVER1-NEXT: vpgatherdd %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [100:0.25]
2808 ; ZNVER1-NEXT: retq # sched: [1:0.50]
2809 %1 = call <8 x i32> @llvm.x86.avx2.gather.d.d.256(<8 x i32> %a0, i8* %a1, <8 x i32> %a2, <8 x i32> %a3, i8 2)
2812 declare <8 x i32> @llvm.x86.avx2.gather.d.d.256(<8 x i32>, i8*, <8 x i32>, <8 x i32>, i8) nounwind readonly
2814 define <2 x i64> @test_pgatherdq(<2 x i64> %a0, i8* %a1, <4 x i32> %a2, <2 x i64> %a3) {
2815 ; GENERIC-LABEL: test_pgatherdq:
2817 ; GENERIC-NEXT: vpgatherdq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [5:0.50]
2818 ; GENERIC-NEXT: retq # sched: [1:1.00]
2820 ; HASWELL-LABEL: test_pgatherdq:
2822 ; HASWELL-NEXT: vpgatherdq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [26:2.67]
2823 ; HASWELL-NEXT: retq # sched: [7:1.00]
2825 ; BROADWELL-LABEL: test_pgatherdq:
2826 ; BROADWELL: # %bb.0:
2827 ; BROADWELL-NEXT: vpgatherdq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [5:0.50]
2828 ; BROADWELL-NEXT: retq # sched: [7:1.00]
2830 ; SKYLAKE-LABEL: test_pgatherdq:
2832 ; SKYLAKE-NEXT: vpgatherdq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
2833 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
2835 ; SKX-LABEL: test_pgatherdq:
2837 ; SKX-NEXT: vpgatherdq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
2838 ; SKX-NEXT: retq # sched: [7:1.00]
2840 ; ZNVER1-LABEL: test_pgatherdq:
2842 ; ZNVER1-NEXT: vpgatherdq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [100:0.25]
2843 ; ZNVER1-NEXT: retq # sched: [1:0.50]
2844 %1 = call <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64> %a0, i8* %a1, <4 x i32> %a2, <2 x i64> %a3, i8 2)
2847 declare <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64>, i8*, <4 x i32>, <2 x i64>, i8) nounwind readonly
2849 define <4 x i64> @test_pgatherdq_ymm(<4 x i64> %a0, i8* %a1, <4 x i32> %a2, <4 x i64> %a3) {
2850 ; GENERIC-LABEL: test_pgatherdq_ymm:
2852 ; GENERIC-NEXT: vpgatherdq %ymm2, (%rdi,%xmm1,2), %ymm0 # sched: [5:0.50]
2853 ; GENERIC-NEXT: retq # sched: [1:1.00]
2855 ; HASWELL-LABEL: test_pgatherdq_ymm:
2857 ; HASWELL-NEXT: vpgatherdq %ymm2, (%rdi,%xmm1,2), %ymm0 # sched: [27:4.00]
2858 ; HASWELL-NEXT: retq # sched: [7:1.00]
2860 ; BROADWELL-LABEL: test_pgatherdq_ymm:
2861 ; BROADWELL: # %bb.0:
2862 ; BROADWELL-NEXT: vpgatherdq %ymm2, (%rdi,%xmm1,2), %ymm0 # sched: [5:0.50]
2863 ; BROADWELL-NEXT: retq # sched: [7:1.00]
2865 ; SKYLAKE-LABEL: test_pgatherdq_ymm:
2867 ; SKYLAKE-NEXT: vpgatherdq %ymm2, (%rdi,%xmm1,2), %ymm0 # sched: [25:1.00]
2868 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
2870 ; SKX-LABEL: test_pgatherdq_ymm:
2872 ; SKX-NEXT: vpgatherdq %ymm2, (%rdi,%xmm1,2), %ymm0 # sched: [25:1.00]
2873 ; SKX-NEXT: retq # sched: [7:1.00]
2875 ; ZNVER1-LABEL: test_pgatherdq_ymm:
2877 ; ZNVER1-NEXT: vpgatherdq %ymm2, (%rdi,%xmm1,2), %ymm0 # sched: [100:0.25]
2878 ; ZNVER1-NEXT: retq # sched: [1:0.50]
2879 %1 = call <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64> %a0, i8* %a1, <4 x i32> %a2, <4 x i64> %a3, i8 2)
2882 declare <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64>, i8*, <4 x i32>, <4 x i64>, i8) nounwind readonly
2884 define <4 x i32> @test_pgatherqd(<4 x i32> %a0, i8* %a1, <2 x i64> %a2, <4 x i32> %a3) {
2885 ; GENERIC-LABEL: test_pgatherqd:
2887 ; GENERIC-NEXT: vpgatherqd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [5:0.50]
2888 ; GENERIC-NEXT: retq # sched: [1:1.00]
2890 ; HASWELL-LABEL: test_pgatherqd:
2892 ; HASWELL-NEXT: vpgatherqd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [25:5.00]
2893 ; HASWELL-NEXT: retq # sched: [7:1.00]
2895 ; BROADWELL-LABEL: test_pgatherqd:
2896 ; BROADWELL: # %bb.0:
2897 ; BROADWELL-NEXT: vpgatherqd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [5:0.50]
2898 ; BROADWELL-NEXT: retq # sched: [7:1.00]
2900 ; SKYLAKE-LABEL: test_pgatherqd:
2902 ; SKYLAKE-NEXT: vpgatherqd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
2903 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
2905 ; SKX-LABEL: test_pgatherqd:
2907 ; SKX-NEXT: vpgatherqd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
2908 ; SKX-NEXT: retq # sched: [7:1.00]
2910 ; ZNVER1-LABEL: test_pgatherqd:
2912 ; ZNVER1-NEXT: vpgatherqd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [100:0.25]
2913 ; ZNVER1-NEXT: retq # sched: [1:0.50]
2914 %1 = call <4 x i32> @llvm.x86.avx2.gather.q.d(<4 x i32> %a0, i8* %a1, <2 x i64> %a2, <4 x i32> %a3, i8 2)
2917 declare <4 x i32> @llvm.x86.avx2.gather.q.d(<4 x i32>, i8*, <2 x i64>, <4 x i32>, i8) nounwind readonly
2919 define <4 x i32> @test_pgatherqd_ymm(<4 x i32> %a0, i8* %a1, <4 x i64> %a2, <4 x i32> %a3) {
2920 ; GENERIC-LABEL: test_pgatherqd_ymm:
2922 ; GENERIC-NEXT: vpgatherqd %xmm2, (%rdi,%ymm1,2), %xmm0 # sched: [5:0.50]
2923 ; GENERIC-NEXT: vzeroupper # sched: [1:1.00]
2924 ; GENERIC-NEXT: retq # sched: [1:1.00]
2926 ; HASWELL-LABEL: test_pgatherqd_ymm:
2928 ; HASWELL-NEXT: vpgatherqd %xmm2, (%rdi,%ymm1,2), %xmm0 # sched: [28:5.00]
2929 ; HASWELL-NEXT: vzeroupper # sched: [0:1.00]
2930 ; HASWELL-NEXT: retq # sched: [7:1.00]
2932 ; BROADWELL-LABEL: test_pgatherqd_ymm:
2933 ; BROADWELL: # %bb.0:
2934 ; BROADWELL-NEXT: vpgatherqd %xmm2, (%rdi,%ymm1,2), %xmm0 # sched: [5:0.50]
2935 ; BROADWELL-NEXT: vzeroupper # sched: [0:1.00]
2936 ; BROADWELL-NEXT: retq # sched: [7:1.00]
2938 ; SKYLAKE-LABEL: test_pgatherqd_ymm:
2940 ; SKYLAKE-NEXT: vpgatherqd %xmm2, (%rdi,%ymm1,2), %xmm0 # sched: [25:1.00]
2941 ; SKYLAKE-NEXT: vzeroupper # sched: [0:0.67]
2942 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
2944 ; SKX-LABEL: test_pgatherqd_ymm:
2946 ; SKX-NEXT: vpgatherqd %xmm2, (%rdi,%ymm1,2), %xmm0 # sched: [25:1.00]
2947 ; SKX-NEXT: vzeroupper # sched: [0:0.67]
2948 ; SKX-NEXT: retq # sched: [7:1.00]
2950 ; ZNVER1-LABEL: test_pgatherqd_ymm:
2952 ; ZNVER1-NEXT: vpgatherqd %xmm2, (%rdi,%ymm1,2), %xmm0 # sched: [100:0.25]
2953 ; ZNVER1-NEXT: vzeroupper # sched: [100:0.25]
2954 ; ZNVER1-NEXT: retq # sched: [1:0.50]
2955 %1 = call <4 x i32> @llvm.x86.avx2.gather.q.d.256(<4 x i32> %a0, i8* %a1, <4 x i64> %a2, <4 x i32> %a3, i8 2)
2958 declare <4 x i32> @llvm.x86.avx2.gather.q.d.256(<4 x i32>, i8*, <4 x i64>, <4 x i32>, i8) nounwind readonly
2960 define <2 x i64> @test_pgatherqq(<2 x i64> %a0, i8 *%a1, <2 x i64> %a2, <2 x i64> %a3) {
2961 ; GENERIC-LABEL: test_pgatherqq:
2963 ; GENERIC-NEXT: vpgatherqq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [5:0.50]
2964 ; GENERIC-NEXT: retq # sched: [1:1.00]
2966 ; HASWELL-LABEL: test_pgatherqq:
2968 ; HASWELL-NEXT: vpgatherqq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [23:3.33]
2969 ; HASWELL-NEXT: retq # sched: [7:1.00]
2971 ; BROADWELL-LABEL: test_pgatherqq:
2972 ; BROADWELL: # %bb.0:
2973 ; BROADWELL-NEXT: vpgatherqq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [5:0.50]
2974 ; BROADWELL-NEXT: retq # sched: [7:1.00]
2976 ; SKYLAKE-LABEL: test_pgatherqq:
2978 ; SKYLAKE-NEXT: vpgatherqq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
2979 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
2981 ; SKX-LABEL: test_pgatherqq:
2983 ; SKX-NEXT: vpgatherqq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
2984 ; SKX-NEXT: retq # sched: [7:1.00]
2986 ; ZNVER1-LABEL: test_pgatherqq:
2988 ; ZNVER1-NEXT: vpgatherqq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [100:0.25]
2989 ; ZNVER1-NEXT: retq # sched: [1:0.50]
2990 %1 = call <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64> %a0, i8* %a1, <2 x i64> %a2, <2 x i64> %a3, i8 2)
2993 declare <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64>, i8*, <2 x i64>, <2 x i64>, i8) nounwind readonly
2995 define <4 x i64> @test_pgatherqq_ymm(<4 x i64> %a0, i8 *%a1, <4 x i64> %a2, <4 x i64> %a3) {
2996 ; GENERIC-LABEL: test_pgatherqq_ymm:
2998 ; GENERIC-NEXT: vpgatherqq %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [5:0.50]
2999 ; GENERIC-NEXT: retq # sched: [1:1.00]
3001 ; HASWELL-LABEL: test_pgatherqq_ymm:
3003 ; HASWELL-NEXT: vpgatherqq %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [24:5.00]
3004 ; HASWELL-NEXT: retq # sched: [7:1.00]
3006 ; BROADWELL-LABEL: test_pgatherqq_ymm:
3007 ; BROADWELL: # %bb.0:
3008 ; BROADWELL-NEXT: vpgatherqq %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [5:0.50]
3009 ; BROADWELL-NEXT: retq # sched: [7:1.00]
3011 ; SKYLAKE-LABEL: test_pgatherqq_ymm:
3013 ; SKYLAKE-NEXT: vpgatherqq %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [25:1.00]
3014 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
3016 ; SKX-LABEL: test_pgatherqq_ymm:
3018 ; SKX-NEXT: vpgatherqq %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [25:1.00]
3019 ; SKX-NEXT: retq # sched: [7:1.00]
3021 ; ZNVER1-LABEL: test_pgatherqq_ymm:
3023 ; ZNVER1-NEXT: vpgatherqq %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [100:0.25]
3024 ; ZNVER1-NEXT: retq # sched: [1:0.50]
3025 %1 = call <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64> %a0, i8* %a1, <4 x i64> %a2, <4 x i64> %a3, i8 2)
3028 declare <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64>, i8*, <4 x i64>, <4 x i64>, i8) nounwind readonly
3030 define <8 x i32> @test_phaddd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
3031 ; GENERIC-LABEL: test_phaddd:
3033 ; GENERIC-NEXT: vphaddd %ymm1, %ymm0, %ymm0 # sched: [3:1.50]
3034 ; GENERIC-NEXT: vphaddd (%rdi), %ymm0, %ymm0 # sched: [10:1.50]
3035 ; GENERIC-NEXT: retq # sched: [1:1.00]
3037 ; HASWELL-LABEL: test_phaddd:
3039 ; HASWELL-NEXT: vphaddd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
3040 ; HASWELL-NEXT: vphaddd (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
3041 ; HASWELL-NEXT: retq # sched: [7:1.00]
3043 ; BROADWELL-LABEL: test_phaddd:
3044 ; BROADWELL: # %bb.0:
3045 ; BROADWELL-NEXT: vphaddd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
3046 ; BROADWELL-NEXT: vphaddd (%rdi), %ymm0, %ymm0 # sched: [9:2.00]
3047 ; BROADWELL-NEXT: retq # sched: [7:1.00]
3049 ; SKYLAKE-LABEL: test_phaddd:
3051 ; SKYLAKE-NEXT: vphaddd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
3052 ; SKYLAKE-NEXT: vphaddd (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
3053 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
3055 ; SKX-LABEL: test_phaddd:
3057 ; SKX-NEXT: vphaddd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
3058 ; SKX-NEXT: vphaddd (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
3059 ; SKX-NEXT: retq # sched: [7:1.00]
3061 ; ZNVER1-LABEL: test_phaddd:
3063 ; ZNVER1-NEXT: vphaddd %ymm1, %ymm0, %ymm0 # sched: [100:0.25]
3064 ; ZNVER1-NEXT: vphaddd (%rdi), %ymm0, %ymm0 # sched: [100:0.25]
3065 ; ZNVER1-NEXT: retq # sched: [1:0.50]
3066 %1 = call <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32> %a0, <8 x i32> %a1)
3067 %2 = load <8 x i32>, <8 x i32> *%a2, align 32
3068 %3 = call <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32> %1, <8 x i32> %2)
3071 declare <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32>, <8 x i32>) nounwind readnone
3073 define <16 x i16> @test_phaddsw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
3074 ; GENERIC-LABEL: test_phaddsw:
3076 ; GENERIC-NEXT: vphaddsw %ymm1, %ymm0, %ymm0 # sched: [3:1.50]
3077 ; GENERIC-NEXT: vphaddsw (%rdi), %ymm0, %ymm0 # sched: [10:1.50]
3078 ; GENERIC-NEXT: retq # sched: [1:1.00]
3080 ; HASWELL-LABEL: test_phaddsw:
3082 ; HASWELL-NEXT: vphaddsw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
3083 ; HASWELL-NEXT: vphaddsw (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
3084 ; HASWELL-NEXT: retq # sched: [7:1.00]
3086 ; BROADWELL-LABEL: test_phaddsw:
3087 ; BROADWELL: # %bb.0:
3088 ; BROADWELL-NEXT: vphaddsw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
3089 ; BROADWELL-NEXT: vphaddsw (%rdi), %ymm0, %ymm0 # sched: [9:2.00]
3090 ; BROADWELL-NEXT: retq # sched: [7:1.00]
3092 ; SKYLAKE-LABEL: test_phaddsw:
3094 ; SKYLAKE-NEXT: vphaddsw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
3095 ; SKYLAKE-NEXT: vphaddsw (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
3096 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
3098 ; SKX-LABEL: test_phaddsw:
3100 ; SKX-NEXT: vphaddsw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
3101 ; SKX-NEXT: vphaddsw (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
3102 ; SKX-NEXT: retq # sched: [7:1.00]
3104 ; ZNVER1-LABEL: test_phaddsw:
3106 ; ZNVER1-NEXT: vphaddsw %ymm1, %ymm0, %ymm0 # sched: [100:0.25]
3107 ; ZNVER1-NEXT: vphaddsw (%rdi), %ymm0, %ymm0 # sched: [100:0.25]
3108 ; ZNVER1-NEXT: retq # sched: [1:0.50]
3109 %1 = call <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16> %a0, <16 x i16> %a1)
3110 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
3111 %3 = call <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16> %1, <16 x i16> %2)
3114 declare <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16>, <16 x i16>) nounwind readnone
3116 define <16 x i16> @test_phaddw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
3117 ; GENERIC-LABEL: test_phaddw:
3119 ; GENERIC-NEXT: vphaddw %ymm1, %ymm0, %ymm0 # sched: [3:1.50]
3120 ; GENERIC-NEXT: vphaddw (%rdi), %ymm0, %ymm0 # sched: [10:1.50]
3121 ; GENERIC-NEXT: retq # sched: [1:1.00]
3123 ; HASWELL-LABEL: test_phaddw:
3125 ; HASWELL-NEXT: vphaddw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
3126 ; HASWELL-NEXT: vphaddw (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
3127 ; HASWELL-NEXT: retq # sched: [7:1.00]
3129 ; BROADWELL-LABEL: test_phaddw:
3130 ; BROADWELL: # %bb.0:
3131 ; BROADWELL-NEXT: vphaddw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
3132 ; BROADWELL-NEXT: vphaddw (%rdi), %ymm0, %ymm0 # sched: [9:2.00]
3133 ; BROADWELL-NEXT: retq # sched: [7:1.00]
3135 ; SKYLAKE-LABEL: test_phaddw:
3137 ; SKYLAKE-NEXT: vphaddw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
3138 ; SKYLAKE-NEXT: vphaddw (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
3139 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
3141 ; SKX-LABEL: test_phaddw:
3143 ; SKX-NEXT: vphaddw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
3144 ; SKX-NEXT: vphaddw (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
3145 ; SKX-NEXT: retq # sched: [7:1.00]
3147 ; ZNVER1-LABEL: test_phaddw:
3149 ; ZNVER1-NEXT: vphaddw %ymm1, %ymm0, %ymm0 # sched: [100:0.25]
3150 ; ZNVER1-NEXT: vphaddw (%rdi), %ymm0, %ymm0 # sched: [100:0.25]
3151 ; ZNVER1-NEXT: retq # sched: [1:0.50]
3152 %1 = call <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16> %a0, <16 x i16> %a1)
3153 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
3154 %3 = call <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16> %1, <16 x i16> %2)
3157 declare <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16>, <16 x i16>) nounwind readnone
3159 define <8 x i32> @test_phsubd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
3160 ; GENERIC-LABEL: test_phsubd:
3162 ; GENERIC-NEXT: vphsubd %ymm1, %ymm0, %ymm0 # sched: [3:1.50]
3163 ; GENERIC-NEXT: vphsubd (%rdi), %ymm0, %ymm0 # sched: [10:1.50]
3164 ; GENERIC-NEXT: retq # sched: [1:1.00]
3166 ; HASWELL-LABEL: test_phsubd:
3168 ; HASWELL-NEXT: vphsubd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
3169 ; HASWELL-NEXT: vphsubd (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
3170 ; HASWELL-NEXT: retq # sched: [7:1.00]
3172 ; BROADWELL-LABEL: test_phsubd:
3173 ; BROADWELL: # %bb.0:
3174 ; BROADWELL-NEXT: vphsubd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
3175 ; BROADWELL-NEXT: vphsubd (%rdi), %ymm0, %ymm0 # sched: [9:2.00]
3176 ; BROADWELL-NEXT: retq # sched: [7:1.00]
3178 ; SKYLAKE-LABEL: test_phsubd:
3180 ; SKYLAKE-NEXT: vphsubd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
3181 ; SKYLAKE-NEXT: vphsubd (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
3182 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
3184 ; SKX-LABEL: test_phsubd:
3186 ; SKX-NEXT: vphsubd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
3187 ; SKX-NEXT: vphsubd (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
3188 ; SKX-NEXT: retq # sched: [7:1.00]
3190 ; ZNVER1-LABEL: test_phsubd:
3192 ; ZNVER1-NEXT: vphsubd %ymm1, %ymm0, %ymm0 # sched: [100:0.25]
3193 ; ZNVER1-NEXT: vphsubd (%rdi), %ymm0, %ymm0 # sched: [100:0.25]
3194 ; ZNVER1-NEXT: retq # sched: [1:0.50]
3195 %1 = call <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32> %a0, <8 x i32> %a1)
3196 %2 = load <8 x i32>, <8 x i32> *%a2, align 32
3197 %3 = call <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32> %1, <8 x i32> %2)
3200 declare <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32>, <8 x i32>) nounwind readnone
3202 define <16 x i16> @test_phsubsw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
3203 ; GENERIC-LABEL: test_phsubsw:
3205 ; GENERIC-NEXT: vphsubsw %ymm1, %ymm0, %ymm0 # sched: [3:1.50]
3206 ; GENERIC-NEXT: vphsubsw (%rdi), %ymm0, %ymm0 # sched: [10:1.50]
3207 ; GENERIC-NEXT: retq # sched: [1:1.00]
3209 ; HASWELL-LABEL: test_phsubsw:
3211 ; HASWELL-NEXT: vphsubsw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
3212 ; HASWELL-NEXT: vphsubsw (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
3213 ; HASWELL-NEXT: retq # sched: [7:1.00]
3215 ; BROADWELL-LABEL: test_phsubsw:
3216 ; BROADWELL: # %bb.0:
3217 ; BROADWELL-NEXT: vphsubsw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
3218 ; BROADWELL-NEXT: vphsubsw (%rdi), %ymm0, %ymm0 # sched: [9:2.00]
3219 ; BROADWELL-NEXT: retq # sched: [7:1.00]
3221 ; SKYLAKE-LABEL: test_phsubsw:
3223 ; SKYLAKE-NEXT: vphsubsw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
3224 ; SKYLAKE-NEXT: vphsubsw (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
3225 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
3227 ; SKX-LABEL: test_phsubsw:
3229 ; SKX-NEXT: vphsubsw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
3230 ; SKX-NEXT: vphsubsw (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
3231 ; SKX-NEXT: retq # sched: [7:1.00]
3233 ; ZNVER1-LABEL: test_phsubsw:
3235 ; ZNVER1-NEXT: vphsubsw %ymm1, %ymm0, %ymm0 # sched: [100:0.25]
3236 ; ZNVER1-NEXT: vphsubsw (%rdi), %ymm0, %ymm0 # sched: [100:0.25]
3237 ; ZNVER1-NEXT: retq # sched: [1:0.50]
3238 %1 = call <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16> %a0, <16 x i16> %a1)
3239 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
3240 %3 = call <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16> %1, <16 x i16> %2)
3243 declare <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16>, <16 x i16>) nounwind readnone
3245 define <16 x i16> @test_phsubw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
3246 ; GENERIC-LABEL: test_phsubw:
3248 ; GENERIC-NEXT: vphsubw %ymm1, %ymm0, %ymm0 # sched: [3:1.50]
3249 ; GENERIC-NEXT: vphsubw (%rdi), %ymm0, %ymm0 # sched: [10:1.50]
3250 ; GENERIC-NEXT: retq # sched: [1:1.00]
3252 ; HASWELL-LABEL: test_phsubw:
3254 ; HASWELL-NEXT: vphsubw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
3255 ; HASWELL-NEXT: vphsubw (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
3256 ; HASWELL-NEXT: retq # sched: [7:1.00]
3258 ; BROADWELL-LABEL: test_phsubw:
3259 ; BROADWELL: # %bb.0:
3260 ; BROADWELL-NEXT: vphsubw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
3261 ; BROADWELL-NEXT: vphsubw (%rdi), %ymm0, %ymm0 # sched: [9:2.00]
3262 ; BROADWELL-NEXT: retq # sched: [7:1.00]
3264 ; SKYLAKE-LABEL: test_phsubw:
3266 ; SKYLAKE-NEXT: vphsubw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
3267 ; SKYLAKE-NEXT: vphsubw (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
3268 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
3270 ; SKX-LABEL: test_phsubw:
3272 ; SKX-NEXT: vphsubw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
3273 ; SKX-NEXT: vphsubw (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
3274 ; SKX-NEXT: retq # sched: [7:1.00]
3276 ; ZNVER1-LABEL: test_phsubw:
3278 ; ZNVER1-NEXT: vphsubw %ymm1, %ymm0, %ymm0 # sched: [100:0.25]
3279 ; ZNVER1-NEXT: vphsubw (%rdi), %ymm0, %ymm0 # sched: [100:0.25]
3280 ; ZNVER1-NEXT: retq # sched: [1:0.50]
3281 %1 = call <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16> %a0, <16 x i16> %a1)
3282 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
3283 %3 = call <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16> %1, <16 x i16> %2)
3286 declare <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16>, <16 x i16>) nounwind readnone
3288 define <16 x i16> @test_pmaddubsw(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
3289 ; GENERIC-LABEL: test_pmaddubsw:
3291 ; GENERIC-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
3292 ; GENERIC-NEXT: vpmaddubsw (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
3293 ; GENERIC-NEXT: retq # sched: [1:1.00]
3295 ; HASWELL-LABEL: test_pmaddubsw:
3297 ; HASWELL-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
3298 ; HASWELL-NEXT: vpmaddubsw (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
3299 ; HASWELL-NEXT: retq # sched: [7:1.00]
3301 ; BROADWELL-LABEL: test_pmaddubsw:
3302 ; BROADWELL: # %bb.0:
3303 ; BROADWELL-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
3304 ; BROADWELL-NEXT: vpmaddubsw (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
3305 ; BROADWELL-NEXT: retq # sched: [7:1.00]
3307 ; SKYLAKE-LABEL: test_pmaddubsw:
3309 ; SKYLAKE-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
3310 ; SKYLAKE-NEXT: vpmaddubsw (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
3311 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
3313 ; SKX-LABEL: test_pmaddubsw:
3315 ; SKX-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
3316 ; SKX-NEXT: vpmaddubsw (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
3317 ; SKX-NEXT: retq # sched: [7:1.00]
3319 ; ZNVER1-LABEL: test_pmaddubsw:
3321 ; ZNVER1-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm0 # sched: [4:1.00]
3322 ; ZNVER1-NEXT: vpmaddubsw (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
3323 ; ZNVER1-NEXT: retq # sched: [1:0.50]
3324 %1 = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> %a0, <32 x i8> %a1)
3325 %2 = bitcast <16 x i16> %1 to <32 x i8>
3326 %3 = load <32 x i8>, <32 x i8> *%a2, align 32
3327 %4 = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> %2, <32 x i8> %3)
3330 declare <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8>, <32 x i8>) nounwind readnone
3332 define <8 x i32> @test_pmaddwd(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
3333 ; GENERIC-LABEL: test_pmaddwd:
3335 ; GENERIC-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
3336 ; GENERIC-NEXT: vpmaddwd (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
3337 ; GENERIC-NEXT: retq # sched: [1:1.00]
3339 ; HASWELL-LABEL: test_pmaddwd:
3341 ; HASWELL-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
3342 ; HASWELL-NEXT: vpmaddwd (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
3343 ; HASWELL-NEXT: retq # sched: [7:1.00]
3345 ; BROADWELL-LABEL: test_pmaddwd:
3346 ; BROADWELL: # %bb.0:
3347 ; BROADWELL-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
3348 ; BROADWELL-NEXT: vpmaddwd (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
3349 ; BROADWELL-NEXT: retq # sched: [7:1.00]
3351 ; SKYLAKE-LABEL: test_pmaddwd:
3353 ; SKYLAKE-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
3354 ; SKYLAKE-NEXT: vpmaddwd (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
3355 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
3357 ; SKX-LABEL: test_pmaddwd:
3359 ; SKX-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
3360 ; SKX-NEXT: vpmaddwd (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
3361 ; SKX-NEXT: retq # sched: [7:1.00]
3363 ; ZNVER1-LABEL: test_pmaddwd:
3365 ; ZNVER1-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0 # sched: [4:1.00]
3366 ; ZNVER1-NEXT: vpmaddwd (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
3367 ; ZNVER1-NEXT: retq # sched: [1:0.50]
3368 %1 = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> %a0, <16 x i16> %a1)
3369 %2 = bitcast <8 x i32> %1 to <16 x i16>
3370 %3 = load <16 x i16>, <16 x i16> *%a2, align 32
3371 %4 = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> %2, <16 x i16> %3)
3374 declare <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16>, <16 x i16>) nounwind readnone
3376 define <4 x i32> @test_pmaskmovd(i8* %a0, <4 x i32> %a1, <4 x i32> %a2) {
3377 ; GENERIC-LABEL: test_pmaskmovd:
3379 ; GENERIC-NEXT: vpmaskmovd (%rdi), %xmm0, %xmm2 # sched: [8:1.00]
3380 ; GENERIC-NEXT: vpmaskmovd %xmm1, %xmm0, (%rdi) # sched: [5:1.00]
3381 ; GENERIC-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.33]
3382 ; GENERIC-NEXT: retq # sched: [1:1.00]
3384 ; HASWELL-LABEL: test_pmaskmovd:
3386 ; HASWELL-NEXT: vpmaskmovd (%rdi), %xmm0, %xmm2 # sched: [8:2.00]
3387 ; HASWELL-NEXT: vpmaskmovd %xmm1, %xmm0, (%rdi) # sched: [5:1.00]
3388 ; HASWELL-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.33]
3389 ; HASWELL-NEXT: retq # sched: [7:1.00]
3391 ; BROADWELL-LABEL: test_pmaskmovd:
3392 ; BROADWELL: # %bb.0:
3393 ; BROADWELL-NEXT: vpmaskmovd (%rdi), %xmm0, %xmm2 # sched: [7:2.00]
3394 ; BROADWELL-NEXT: vpmaskmovd %xmm1, %xmm0, (%rdi) # sched: [5:1.00]
3395 ; BROADWELL-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.33]
3396 ; BROADWELL-NEXT: retq # sched: [7:1.00]
3398 ; SKYLAKE-LABEL: test_pmaskmovd:
3400 ; SKYLAKE-NEXT: vpmaskmovd (%rdi), %xmm0, %xmm2 # sched: [7:0.50]
3401 ; SKYLAKE-NEXT: vpmaskmovd %xmm1, %xmm0, (%rdi) # sched: [2:1.00]
3402 ; SKYLAKE-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.33]
3403 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
3405 ; SKX-LABEL: test_pmaskmovd:
3407 ; SKX-NEXT: vpmaskmovd (%rdi), %xmm0, %xmm2 # sched: [7:0.50]
3408 ; SKX-NEXT: vpmaskmovd %xmm1, %xmm0, (%rdi) # sched: [2:1.00]
3409 ; SKX-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.33]
3410 ; SKX-NEXT: retq # sched: [7:1.00]
3412 ; ZNVER1-LABEL: test_pmaskmovd:
3414 ; ZNVER1-NEXT: vpmaskmovd (%rdi), %xmm0, %xmm2 # sched: [100:0.25]
3415 ; ZNVER1-NEXT: vpmaskmovd %xmm1, %xmm0, (%rdi) # sched: [100:0.25]
3416 ; ZNVER1-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.25]
3417 ; ZNVER1-NEXT: retq # sched: [1:0.50]
3418 %1 = call <4 x i32> @llvm.x86.avx2.maskload.d(i8* %a0, <4 x i32> %a1)
3419 call void @llvm.x86.avx2.maskstore.d(i8* %a0, <4 x i32> %a1, <4 x i32> %a2)
3422 declare <4 x i32> @llvm.x86.avx2.maskload.d(i8*, <4 x i32>) nounwind readonly
3423 declare void @llvm.x86.avx2.maskstore.d(i8*, <4 x i32>, <4 x i32>) nounwind
3425 define <8 x i32> @test_pmaskmovd_ymm(i8* %a0, <8 x i32> %a1, <8 x i32> %a2) {
3426 ; GENERIC-LABEL: test_pmaskmovd_ymm:
3428 ; GENERIC-NEXT: vpmaskmovd (%rdi), %ymm0, %ymm2 # sched: [9:1.00]
3429 ; GENERIC-NEXT: vpmaskmovd %ymm1, %ymm0, (%rdi) # sched: [5:1.00]
3430 ; GENERIC-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.50]
3431 ; GENERIC-NEXT: retq # sched: [1:1.00]
3433 ; HASWELL-LABEL: test_pmaskmovd_ymm:
3435 ; HASWELL-NEXT: vpmaskmovd (%rdi), %ymm0, %ymm2 # sched: [9:2.00]
3436 ; HASWELL-NEXT: vpmaskmovd %ymm1, %ymm0, (%rdi) # sched: [5:1.00]
3437 ; HASWELL-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.33]
3438 ; HASWELL-NEXT: retq # sched: [7:1.00]
3440 ; BROADWELL-LABEL: test_pmaskmovd_ymm:
3441 ; BROADWELL: # %bb.0:
3442 ; BROADWELL-NEXT: vpmaskmovd (%rdi), %ymm0, %ymm2 # sched: [8:2.00]
3443 ; BROADWELL-NEXT: vpmaskmovd %ymm1, %ymm0, (%rdi) # sched: [5:1.00]
3444 ; BROADWELL-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.33]
3445 ; BROADWELL-NEXT: retq # sched: [7:1.00]
3447 ; SKYLAKE-LABEL: test_pmaskmovd_ymm:
3449 ; SKYLAKE-NEXT: vpmaskmovd (%rdi), %ymm0, %ymm2 # sched: [8:0.50]
3450 ; SKYLAKE-NEXT: vpmaskmovd %ymm1, %ymm0, (%rdi) # sched: [2:1.00]
3451 ; SKYLAKE-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.33]
3452 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
3454 ; SKX-LABEL: test_pmaskmovd_ymm:
3456 ; SKX-NEXT: vpmaskmovd (%rdi), %ymm0, %ymm2 # sched: [8:0.50]
3457 ; SKX-NEXT: vpmaskmovd %ymm1, %ymm0, (%rdi) # sched: [2:1.00]
3458 ; SKX-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.33]
3459 ; SKX-NEXT: retq # sched: [7:1.00]
3461 ; ZNVER1-LABEL: test_pmaskmovd_ymm:
3463 ; ZNVER1-NEXT: vpmaskmovd (%rdi), %ymm0, %ymm2 # sched: [100:0.25]
3464 ; ZNVER1-NEXT: vpmaskmovd %ymm1, %ymm0, (%rdi) # sched: [100:0.25]
3465 ; ZNVER1-NEXT: vmovdqa %ymm2, %ymm0 # sched: [2:0.25]
3466 ; ZNVER1-NEXT: retq # sched: [1:0.50]
3467 %1 = call <8 x i32> @llvm.x86.avx2.maskload.d.256(i8* %a0, <8 x i32> %a1)
3468 call void @llvm.x86.avx2.maskstore.d.256(i8* %a0, <8 x i32> %a1, <8 x i32> %a2)
3471 declare <8 x i32> @llvm.x86.avx2.maskload.d.256(i8*, <8 x i32>) nounwind readonly
3472 declare void @llvm.x86.avx2.maskstore.d.256(i8*, <8 x i32>, <8 x i32>) nounwind
3474 define <2 x i64> @test_pmaskmovq(i8* %a0, <2 x i64> %a1, <2 x i64> %a2) {
3475 ; GENERIC-LABEL: test_pmaskmovq:
3477 ; GENERIC-NEXT: vpmaskmovq (%rdi), %xmm0, %xmm2 # sched: [8:1.00]
3478 ; GENERIC-NEXT: vpmaskmovq %xmm1, %xmm0, (%rdi) # sched: [5:1.00]
3479 ; GENERIC-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.33]
3480 ; GENERIC-NEXT: retq # sched: [1:1.00]
3482 ; HASWELL-LABEL: test_pmaskmovq:
3484 ; HASWELL-NEXT: vpmaskmovq (%rdi), %xmm0, %xmm2 # sched: [8:2.00]
3485 ; HASWELL-NEXT: vpmaskmovq %xmm1, %xmm0, (%rdi) # sched: [5:1.00]
3486 ; HASWELL-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.33]
3487 ; HASWELL-NEXT: retq # sched: [7:1.00]
3489 ; BROADWELL-LABEL: test_pmaskmovq:
3490 ; BROADWELL: # %bb.0:
3491 ; BROADWELL-NEXT: vpmaskmovq (%rdi), %xmm0, %xmm2 # sched: [7:2.00]
3492 ; BROADWELL-NEXT: vpmaskmovq %xmm1, %xmm0, (%rdi) # sched: [5:1.00]
3493 ; BROADWELL-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.33]
3494 ; BROADWELL-NEXT: retq # sched: [7:1.00]
3496 ; SKYLAKE-LABEL: test_pmaskmovq:
3498 ; SKYLAKE-NEXT: vpmaskmovq (%rdi), %xmm0, %xmm2 # sched: [7:0.50]
3499 ; SKYLAKE-NEXT: vpmaskmovq %xmm1, %xmm0, (%rdi) # sched: [2:1.00]
3500 ; SKYLAKE-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.33]
3501 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
3503 ; SKX-LABEL: test_pmaskmovq:
3505 ; SKX-NEXT: vpmaskmovq (%rdi), %xmm0, %xmm2 # sched: [7:0.50]
3506 ; SKX-NEXT: vpmaskmovq %xmm1, %xmm0, (%rdi) # sched: [2:1.00]
3507 ; SKX-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.33]
3508 ; SKX-NEXT: retq # sched: [7:1.00]
3510 ; ZNVER1-LABEL: test_pmaskmovq:
3512 ; ZNVER1-NEXT: vpmaskmovq (%rdi), %xmm0, %xmm2 # sched: [8:1.00]
3513 ; ZNVER1-NEXT: vpmaskmovq %xmm1, %xmm0, (%rdi) # sched: [100:0.25]
3514 ; ZNVER1-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.25]
3515 ; ZNVER1-NEXT: retq # sched: [1:0.50]
3516 %1 = call <2 x i64> @llvm.x86.avx2.maskload.q(i8* %a0, <2 x i64> %a1)
3517 call void @llvm.x86.avx2.maskstore.q(i8* %a0, <2 x i64> %a1, <2 x i64> %a2)
3520 declare <2 x i64> @llvm.x86.avx2.maskload.q(i8*, <2 x i64>) nounwind readonly
3521 declare void @llvm.x86.avx2.maskstore.q(i8*, <2 x i64>, <2 x i64>) nounwind
3523 define <4 x i64> @test_pmaskmovq_ymm(i8* %a0, <4 x i64> %a1, <4 x i64> %a2) {
3524 ; GENERIC-LABEL: test_pmaskmovq_ymm:
3526 ; GENERIC-NEXT: vpmaskmovq (%rdi), %ymm0, %ymm2 # sched: [9:1.00]
3527 ; GENERIC-NEXT: vpmaskmovq %ymm1, %ymm0, (%rdi) # sched: [5:1.00]
3528 ; GENERIC-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.50]
3529 ; GENERIC-NEXT: retq # sched: [1:1.00]
3531 ; HASWELL-LABEL: test_pmaskmovq_ymm:
3533 ; HASWELL-NEXT: vpmaskmovq (%rdi), %ymm0, %ymm2 # sched: [9:2.00]
3534 ; HASWELL-NEXT: vpmaskmovq %ymm1, %ymm0, (%rdi) # sched: [5:1.00]
3535 ; HASWELL-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.33]
3536 ; HASWELL-NEXT: retq # sched: [7:1.00]
3538 ; BROADWELL-LABEL: test_pmaskmovq_ymm:
3539 ; BROADWELL: # %bb.0:
3540 ; BROADWELL-NEXT: vpmaskmovq (%rdi), %ymm0, %ymm2 # sched: [8:2.00]
3541 ; BROADWELL-NEXT: vpmaskmovq %ymm1, %ymm0, (%rdi) # sched: [5:1.00]
3542 ; BROADWELL-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.33]
3543 ; BROADWELL-NEXT: retq # sched: [7:1.00]
3545 ; SKYLAKE-LABEL: test_pmaskmovq_ymm:
3547 ; SKYLAKE-NEXT: vpmaskmovq (%rdi), %ymm0, %ymm2 # sched: [8:0.50]
3548 ; SKYLAKE-NEXT: vpmaskmovq %ymm1, %ymm0, (%rdi) # sched: [2:1.00]
3549 ; SKYLAKE-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.33]
3550 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
3552 ; SKX-LABEL: test_pmaskmovq_ymm:
3554 ; SKX-NEXT: vpmaskmovq (%rdi), %ymm0, %ymm2 # sched: [8:0.50]
3555 ; SKX-NEXT: vpmaskmovq %ymm1, %ymm0, (%rdi) # sched: [2:1.00]
3556 ; SKX-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.33]
3557 ; SKX-NEXT: retq # sched: [7:1.00]
3559 ; ZNVER1-LABEL: test_pmaskmovq_ymm:
3561 ; ZNVER1-NEXT: vpmaskmovq (%rdi), %ymm0, %ymm2 # sched: [9:1.50]
3562 ; ZNVER1-NEXT: vpmaskmovq %ymm1, %ymm0, (%rdi) # sched: [100:0.25]
3563 ; ZNVER1-NEXT: vmovdqa %ymm2, %ymm0 # sched: [2:0.25]
3564 ; ZNVER1-NEXT: retq # sched: [1:0.50]
3565 %1 = call <4 x i64> @llvm.x86.avx2.maskload.q.256(i8* %a0, <4 x i64> %a1)
3566 call void @llvm.x86.avx2.maskstore.q.256(i8* %a0, <4 x i64> %a1, <4 x i64> %a2)
3569 declare <4 x i64> @llvm.x86.avx2.maskload.q.256(i8*, <4 x i64>) nounwind readonly
3570 declare void @llvm.x86.avx2.maskstore.q.256(i8*, <4 x i64>, <4 x i64>) nounwind
3572 define <32 x i8> @test_pmaxsb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
3573 ; GENERIC-LABEL: test_pmaxsb:
3575 ; GENERIC-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3576 ; GENERIC-NEXT: vpmaxsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3577 ; GENERIC-NEXT: retq # sched: [1:1.00]
3579 ; HASWELL-LABEL: test_pmaxsb:
3581 ; HASWELL-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3582 ; HASWELL-NEXT: vpmaxsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3583 ; HASWELL-NEXT: retq # sched: [7:1.00]
3585 ; BROADWELL-LABEL: test_pmaxsb:
3586 ; BROADWELL: # %bb.0:
3587 ; BROADWELL-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3588 ; BROADWELL-NEXT: vpmaxsb (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
3589 ; BROADWELL-NEXT: retq # sched: [7:1.00]
3591 ; SKYLAKE-LABEL: test_pmaxsb:
3593 ; SKYLAKE-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3594 ; SKYLAKE-NEXT: vpmaxsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3595 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
3597 ; SKX-LABEL: test_pmaxsb:
3599 ; SKX-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3600 ; SKX-NEXT: vpmaxsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3601 ; SKX-NEXT: retq # sched: [7:1.00]
3603 ; ZNVER1-LABEL: test_pmaxsb:
3605 ; ZNVER1-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
3606 ; ZNVER1-NEXT: vpmaxsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3607 ; ZNVER1-NEXT: retq # sched: [1:0.50]
3608 %1 = call <32 x i8> @llvm.x86.avx2.pmaxs.b(<32 x i8> %a0, <32 x i8> %a1)
3609 %2 = load <32 x i8>, <32 x i8> *%a2, align 32
3610 %3 = call <32 x i8> @llvm.x86.avx2.pmaxs.b(<32 x i8> %1, <32 x i8> %2)
3613 declare <32 x i8> @llvm.x86.avx2.pmaxs.b(<32 x i8>, <32 x i8>) nounwind readnone
3615 define <8 x i32> @test_pmaxsd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
3616 ; GENERIC-LABEL: test_pmaxsd:
3618 ; GENERIC-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3619 ; GENERIC-NEXT: vpmaxsd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3620 ; GENERIC-NEXT: retq # sched: [1:1.00]
3622 ; HASWELL-LABEL: test_pmaxsd:
3624 ; HASWELL-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3625 ; HASWELL-NEXT: vpmaxsd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3626 ; HASWELL-NEXT: retq # sched: [7:1.00]
3628 ; BROADWELL-LABEL: test_pmaxsd:
3629 ; BROADWELL: # %bb.0:
3630 ; BROADWELL-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3631 ; BROADWELL-NEXT: vpmaxsd (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
3632 ; BROADWELL-NEXT: retq # sched: [7:1.00]
3634 ; SKYLAKE-LABEL: test_pmaxsd:
3636 ; SKYLAKE-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3637 ; SKYLAKE-NEXT: vpmaxsd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3638 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
3640 ; SKX-LABEL: test_pmaxsd:
3642 ; SKX-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3643 ; SKX-NEXT: vpmaxsd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3644 ; SKX-NEXT: retq # sched: [7:1.00]
3646 ; ZNVER1-LABEL: test_pmaxsd:
3648 ; ZNVER1-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
3649 ; ZNVER1-NEXT: vpmaxsd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3650 ; ZNVER1-NEXT: retq # sched: [1:0.50]
3651 %1 = call <8 x i32> @llvm.x86.avx2.pmaxs.d(<8 x i32> %a0, <8 x i32> %a1)
3652 %2 = load <8 x i32>, <8 x i32> *%a2, align 32
3653 %3 = call <8 x i32> @llvm.x86.avx2.pmaxs.d(<8 x i32> %1, <8 x i32> %2)
3656 declare <8 x i32> @llvm.x86.avx2.pmaxs.d(<8 x i32>, <8 x i32>) nounwind readnone
3658 define <16 x i16> @test_pmaxsw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
3659 ; GENERIC-LABEL: test_pmaxsw:
3661 ; GENERIC-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3662 ; GENERIC-NEXT: vpmaxsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3663 ; GENERIC-NEXT: retq # sched: [1:1.00]
3665 ; HASWELL-LABEL: test_pmaxsw:
3667 ; HASWELL-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3668 ; HASWELL-NEXT: vpmaxsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3669 ; HASWELL-NEXT: retq # sched: [7:1.00]
3671 ; BROADWELL-LABEL: test_pmaxsw:
3672 ; BROADWELL: # %bb.0:
3673 ; BROADWELL-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3674 ; BROADWELL-NEXT: vpmaxsw (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
3675 ; BROADWELL-NEXT: retq # sched: [7:1.00]
3677 ; SKYLAKE-LABEL: test_pmaxsw:
3679 ; SKYLAKE-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3680 ; SKYLAKE-NEXT: vpmaxsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3681 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
3683 ; SKX-LABEL: test_pmaxsw:
3685 ; SKX-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3686 ; SKX-NEXT: vpmaxsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3687 ; SKX-NEXT: retq # sched: [7:1.00]
3689 ; ZNVER1-LABEL: test_pmaxsw:
3691 ; ZNVER1-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
3692 ; ZNVER1-NEXT: vpmaxsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3693 ; ZNVER1-NEXT: retq # sched: [1:0.50]
3694 %1 = call <16 x i16> @llvm.x86.avx2.pmaxs.w(<16 x i16> %a0, <16 x i16> %a1)
3695 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
3696 %3 = call <16 x i16> @llvm.x86.avx2.pmaxs.w(<16 x i16> %1, <16 x i16> %2)
3699 declare <16 x i16> @llvm.x86.avx2.pmaxs.w(<16 x i16>, <16 x i16>) nounwind readnone
3701 define <32 x i8> @test_pmaxub(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
3702 ; GENERIC-LABEL: test_pmaxub:
3704 ; GENERIC-NEXT: vpmaxub %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3705 ; GENERIC-NEXT: vpmaxub (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3706 ; GENERIC-NEXT: retq # sched: [1:1.00]
3708 ; HASWELL-LABEL: test_pmaxub:
3710 ; HASWELL-NEXT: vpmaxub %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3711 ; HASWELL-NEXT: vpmaxub (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3712 ; HASWELL-NEXT: retq # sched: [7:1.00]
3714 ; BROADWELL-LABEL: test_pmaxub:
3715 ; BROADWELL: # %bb.0:
3716 ; BROADWELL-NEXT: vpmaxub %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3717 ; BROADWELL-NEXT: vpmaxub (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
3718 ; BROADWELL-NEXT: retq # sched: [7:1.00]
3720 ; SKYLAKE-LABEL: test_pmaxub:
3722 ; SKYLAKE-NEXT: vpmaxub %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3723 ; SKYLAKE-NEXT: vpmaxub (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3724 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
3726 ; SKX-LABEL: test_pmaxub:
3728 ; SKX-NEXT: vpmaxub %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3729 ; SKX-NEXT: vpmaxub (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3730 ; SKX-NEXT: retq # sched: [7:1.00]
3732 ; ZNVER1-LABEL: test_pmaxub:
3734 ; ZNVER1-NEXT: vpmaxub %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
3735 ; ZNVER1-NEXT: vpmaxub (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3736 ; ZNVER1-NEXT: retq # sched: [1:0.50]
3737 %1 = call <32 x i8> @llvm.x86.avx2.pmaxu.b(<32 x i8> %a0, <32 x i8> %a1)
3738 %2 = load <32 x i8>, <32 x i8> *%a2, align 32
3739 %3 = call <32 x i8> @llvm.x86.avx2.pmaxu.b(<32 x i8> %1, <32 x i8> %2)
3742 declare <32 x i8> @llvm.x86.avx2.pmaxu.b(<32 x i8>, <32 x i8>) nounwind readnone
3744 define <8 x i32> @test_pmaxud(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
3745 ; GENERIC-LABEL: test_pmaxud:
3747 ; GENERIC-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3748 ; GENERIC-NEXT: vpmaxud (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3749 ; GENERIC-NEXT: retq # sched: [1:1.00]
3751 ; HASWELL-LABEL: test_pmaxud:
3753 ; HASWELL-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3754 ; HASWELL-NEXT: vpmaxud (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3755 ; HASWELL-NEXT: retq # sched: [7:1.00]
3757 ; BROADWELL-LABEL: test_pmaxud:
3758 ; BROADWELL: # %bb.0:
3759 ; BROADWELL-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3760 ; BROADWELL-NEXT: vpmaxud (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
3761 ; BROADWELL-NEXT: retq # sched: [7:1.00]
3763 ; SKYLAKE-LABEL: test_pmaxud:
3765 ; SKYLAKE-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3766 ; SKYLAKE-NEXT: vpmaxud (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3767 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
3769 ; SKX-LABEL: test_pmaxud:
3771 ; SKX-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3772 ; SKX-NEXT: vpmaxud (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3773 ; SKX-NEXT: retq # sched: [7:1.00]
3775 ; ZNVER1-LABEL: test_pmaxud:
3777 ; ZNVER1-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
3778 ; ZNVER1-NEXT: vpmaxud (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3779 ; ZNVER1-NEXT: retq # sched: [1:0.50]
3780 %1 = call <8 x i32> @llvm.x86.avx2.pmaxu.d(<8 x i32> %a0, <8 x i32> %a1)
3781 %2 = load <8 x i32>, <8 x i32> *%a2, align 32
3782 %3 = call <8 x i32> @llvm.x86.avx2.pmaxu.d(<8 x i32> %1, <8 x i32> %2)
3785 declare <8 x i32> @llvm.x86.avx2.pmaxu.d(<8 x i32>, <8 x i32>) nounwind readnone
3787 define <16 x i16> @test_pmaxuw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
3788 ; GENERIC-LABEL: test_pmaxuw:
3790 ; GENERIC-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3791 ; GENERIC-NEXT: vpmaxuw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3792 ; GENERIC-NEXT: retq # sched: [1:1.00]
3794 ; HASWELL-LABEL: test_pmaxuw:
3796 ; HASWELL-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3797 ; HASWELL-NEXT: vpmaxuw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3798 ; HASWELL-NEXT: retq # sched: [7:1.00]
3800 ; BROADWELL-LABEL: test_pmaxuw:
3801 ; BROADWELL: # %bb.0:
3802 ; BROADWELL-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3803 ; BROADWELL-NEXT: vpmaxuw (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
3804 ; BROADWELL-NEXT: retq # sched: [7:1.00]
3806 ; SKYLAKE-LABEL: test_pmaxuw:
3808 ; SKYLAKE-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3809 ; SKYLAKE-NEXT: vpmaxuw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3810 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
3812 ; SKX-LABEL: test_pmaxuw:
3814 ; SKX-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3815 ; SKX-NEXT: vpmaxuw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3816 ; SKX-NEXT: retq # sched: [7:1.00]
3818 ; ZNVER1-LABEL: test_pmaxuw:
3820 ; ZNVER1-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
3821 ; ZNVER1-NEXT: vpmaxuw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3822 ; ZNVER1-NEXT: retq # sched: [1:0.50]
3823 %1 = call <16 x i16> @llvm.x86.avx2.pmaxu.w(<16 x i16> %a0, <16 x i16> %a1)
3824 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
3825 %3 = call <16 x i16> @llvm.x86.avx2.pmaxu.w(<16 x i16> %1, <16 x i16> %2)
3828 declare <16 x i16> @llvm.x86.avx2.pmaxu.w(<16 x i16>, <16 x i16>) nounwind readnone
3830 define <32 x i8> @test_pminsb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
3831 ; GENERIC-LABEL: test_pminsb:
3833 ; GENERIC-NEXT: vpminsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3834 ; GENERIC-NEXT: vpminsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3835 ; GENERIC-NEXT: retq # sched: [1:1.00]
3837 ; HASWELL-LABEL: test_pminsb:
3839 ; HASWELL-NEXT: vpminsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3840 ; HASWELL-NEXT: vpminsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3841 ; HASWELL-NEXT: retq # sched: [7:1.00]
3843 ; BROADWELL-LABEL: test_pminsb:
3844 ; BROADWELL: # %bb.0:
3845 ; BROADWELL-NEXT: vpminsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3846 ; BROADWELL-NEXT: vpminsb (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
3847 ; BROADWELL-NEXT: retq # sched: [7:1.00]
3849 ; SKYLAKE-LABEL: test_pminsb:
3851 ; SKYLAKE-NEXT: vpminsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3852 ; SKYLAKE-NEXT: vpminsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3853 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
3855 ; SKX-LABEL: test_pminsb:
3857 ; SKX-NEXT: vpminsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3858 ; SKX-NEXT: vpminsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3859 ; SKX-NEXT: retq # sched: [7:1.00]
3861 ; ZNVER1-LABEL: test_pminsb:
3863 ; ZNVER1-NEXT: vpminsb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
3864 ; ZNVER1-NEXT: vpminsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3865 ; ZNVER1-NEXT: retq # sched: [1:0.50]
3866 %1 = call <32 x i8> @llvm.x86.avx2.pmins.b(<32 x i8> %a0, <32 x i8> %a1)
3867 %2 = load <32 x i8>, <32 x i8> *%a2, align 32
3868 %3 = call <32 x i8> @llvm.x86.avx2.pmins.b(<32 x i8> %1, <32 x i8> %2)
3871 declare <32 x i8> @llvm.x86.avx2.pmins.b(<32 x i8>, <32 x i8>) nounwind readnone
3873 define <8 x i32> @test_pminsd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
3874 ; GENERIC-LABEL: test_pminsd:
3876 ; GENERIC-NEXT: vpminsd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3877 ; GENERIC-NEXT: vpminsd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3878 ; GENERIC-NEXT: retq # sched: [1:1.00]
3880 ; HASWELL-LABEL: test_pminsd:
3882 ; HASWELL-NEXT: vpminsd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3883 ; HASWELL-NEXT: vpminsd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3884 ; HASWELL-NEXT: retq # sched: [7:1.00]
3886 ; BROADWELL-LABEL: test_pminsd:
3887 ; BROADWELL: # %bb.0:
3888 ; BROADWELL-NEXT: vpminsd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3889 ; BROADWELL-NEXT: vpminsd (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
3890 ; BROADWELL-NEXT: retq # sched: [7:1.00]
3892 ; SKYLAKE-LABEL: test_pminsd:
3894 ; SKYLAKE-NEXT: vpminsd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3895 ; SKYLAKE-NEXT: vpminsd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3896 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
3898 ; SKX-LABEL: test_pminsd:
3900 ; SKX-NEXT: vpminsd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3901 ; SKX-NEXT: vpminsd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3902 ; SKX-NEXT: retq # sched: [7:1.00]
3904 ; ZNVER1-LABEL: test_pminsd:
3906 ; ZNVER1-NEXT: vpminsd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
3907 ; ZNVER1-NEXT: vpminsd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3908 ; ZNVER1-NEXT: retq # sched: [1:0.50]
3909 %1 = call <8 x i32> @llvm.x86.avx2.pmins.d(<8 x i32> %a0, <8 x i32> %a1)
3910 %2 = load <8 x i32>, <8 x i32> *%a2, align 32
3911 %3 = call <8 x i32> @llvm.x86.avx2.pmins.d(<8 x i32> %1, <8 x i32> %2)
3914 declare <8 x i32> @llvm.x86.avx2.pmins.d(<8 x i32>, <8 x i32>) nounwind readnone
3916 define <16 x i16> @test_pminsw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
3917 ; GENERIC-LABEL: test_pminsw:
3919 ; GENERIC-NEXT: vpminsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3920 ; GENERIC-NEXT: vpminsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3921 ; GENERIC-NEXT: retq # sched: [1:1.00]
3923 ; HASWELL-LABEL: test_pminsw:
3925 ; HASWELL-NEXT: vpminsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3926 ; HASWELL-NEXT: vpminsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3927 ; HASWELL-NEXT: retq # sched: [7:1.00]
3929 ; BROADWELL-LABEL: test_pminsw:
3930 ; BROADWELL: # %bb.0:
3931 ; BROADWELL-NEXT: vpminsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3932 ; BROADWELL-NEXT: vpminsw (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
3933 ; BROADWELL-NEXT: retq # sched: [7:1.00]
3935 ; SKYLAKE-LABEL: test_pminsw:
3937 ; SKYLAKE-NEXT: vpminsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3938 ; SKYLAKE-NEXT: vpminsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3939 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
3941 ; SKX-LABEL: test_pminsw:
3943 ; SKX-NEXT: vpminsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3944 ; SKX-NEXT: vpminsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3945 ; SKX-NEXT: retq # sched: [7:1.00]
3947 ; ZNVER1-LABEL: test_pminsw:
3949 ; ZNVER1-NEXT: vpminsw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
3950 ; ZNVER1-NEXT: vpminsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3951 ; ZNVER1-NEXT: retq # sched: [1:0.50]
3952 %1 = call <16 x i16> @llvm.x86.avx2.pmins.w(<16 x i16> %a0, <16 x i16> %a1)
3953 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
3954 %3 = call <16 x i16> @llvm.x86.avx2.pmins.w(<16 x i16> %1, <16 x i16> %2)
3957 declare <16 x i16> @llvm.x86.avx2.pmins.w(<16 x i16>, <16 x i16>) nounwind readnone
3959 define <32 x i8> @test_pminub(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
3960 ; GENERIC-LABEL: test_pminub:
3962 ; GENERIC-NEXT: vpminub %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3963 ; GENERIC-NEXT: vpminub (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3964 ; GENERIC-NEXT: retq # sched: [1:1.00]
3966 ; HASWELL-LABEL: test_pminub:
3968 ; HASWELL-NEXT: vpminub %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3969 ; HASWELL-NEXT: vpminub (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3970 ; HASWELL-NEXT: retq # sched: [7:1.00]
3972 ; BROADWELL-LABEL: test_pminub:
3973 ; BROADWELL: # %bb.0:
3974 ; BROADWELL-NEXT: vpminub %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3975 ; BROADWELL-NEXT: vpminub (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
3976 ; BROADWELL-NEXT: retq # sched: [7:1.00]
3978 ; SKYLAKE-LABEL: test_pminub:
3980 ; SKYLAKE-NEXT: vpminub %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3981 ; SKYLAKE-NEXT: vpminub (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3982 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
3984 ; SKX-LABEL: test_pminub:
3986 ; SKX-NEXT: vpminub %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3987 ; SKX-NEXT: vpminub (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3988 ; SKX-NEXT: retq # sched: [7:1.00]
3990 ; ZNVER1-LABEL: test_pminub:
3992 ; ZNVER1-NEXT: vpminub %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
3993 ; ZNVER1-NEXT: vpminub (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3994 ; ZNVER1-NEXT: retq # sched: [1:0.50]
3995 %1 = call <32 x i8> @llvm.x86.avx2.pminu.b(<32 x i8> %a0, <32 x i8> %a1)
3996 %2 = load <32 x i8>, <32 x i8> *%a2, align 32
3997 %3 = call <32 x i8> @llvm.x86.avx2.pminu.b(<32 x i8> %1, <32 x i8> %2)
4000 declare <32 x i8> @llvm.x86.avx2.pminu.b(<32 x i8>, <32 x i8>) nounwind readnone
4002 define <8 x i32> @test_pminud(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
4003 ; GENERIC-LABEL: test_pminud:
4005 ; GENERIC-NEXT: vpminud %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4006 ; GENERIC-NEXT: vpminud (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
4007 ; GENERIC-NEXT: retq # sched: [1:1.00]
4009 ; HASWELL-LABEL: test_pminud:
4011 ; HASWELL-NEXT: vpminud %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4012 ; HASWELL-NEXT: vpminud (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
4013 ; HASWELL-NEXT: retq # sched: [7:1.00]
4015 ; BROADWELL-LABEL: test_pminud:
4016 ; BROADWELL: # %bb.0:
4017 ; BROADWELL-NEXT: vpminud %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4018 ; BROADWELL-NEXT: vpminud (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
4019 ; BROADWELL-NEXT: retq # sched: [7:1.00]
4021 ; SKYLAKE-LABEL: test_pminud:
4023 ; SKYLAKE-NEXT: vpminud %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4024 ; SKYLAKE-NEXT: vpminud (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
4025 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
4027 ; SKX-LABEL: test_pminud:
4029 ; SKX-NEXT: vpminud %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4030 ; SKX-NEXT: vpminud (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
4031 ; SKX-NEXT: retq # sched: [7:1.00]
4033 ; ZNVER1-LABEL: test_pminud:
4035 ; ZNVER1-NEXT: vpminud %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
4036 ; ZNVER1-NEXT: vpminud (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
4037 ; ZNVER1-NEXT: retq # sched: [1:0.50]
4038 %1 = call <8 x i32> @llvm.x86.avx2.pminu.d(<8 x i32> %a0, <8 x i32> %a1)
4039 %2 = load <8 x i32>, <8 x i32> *%a2, align 32
4040 %3 = call <8 x i32> @llvm.x86.avx2.pminu.d(<8 x i32> %1, <8 x i32> %2)
4043 declare <8 x i32> @llvm.x86.avx2.pminu.d(<8 x i32>, <8 x i32>) nounwind readnone
4045 define <16 x i16> @test_pminuw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
4046 ; GENERIC-LABEL: test_pminuw:
4048 ; GENERIC-NEXT: vpminuw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4049 ; GENERIC-NEXT: vpminuw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
4050 ; GENERIC-NEXT: retq # sched: [1:1.00]
4052 ; HASWELL-LABEL: test_pminuw:
4054 ; HASWELL-NEXT: vpminuw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4055 ; HASWELL-NEXT: vpminuw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
4056 ; HASWELL-NEXT: retq # sched: [7:1.00]
4058 ; BROADWELL-LABEL: test_pminuw:
4059 ; BROADWELL: # %bb.0:
4060 ; BROADWELL-NEXT: vpminuw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4061 ; BROADWELL-NEXT: vpminuw (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
4062 ; BROADWELL-NEXT: retq # sched: [7:1.00]
4064 ; SKYLAKE-LABEL: test_pminuw:
4066 ; SKYLAKE-NEXT: vpminuw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4067 ; SKYLAKE-NEXT: vpminuw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
4068 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
4070 ; SKX-LABEL: test_pminuw:
4072 ; SKX-NEXT: vpminuw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4073 ; SKX-NEXT: vpminuw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
4074 ; SKX-NEXT: retq # sched: [7:1.00]
4076 ; ZNVER1-LABEL: test_pminuw:
4078 ; ZNVER1-NEXT: vpminuw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
4079 ; ZNVER1-NEXT: vpminuw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
4080 ; ZNVER1-NEXT: retq # sched: [1:0.50]
4081 %1 = call <16 x i16> @llvm.x86.avx2.pminu.w(<16 x i16> %a0, <16 x i16> %a1)
4082 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
4083 %3 = call <16 x i16> @llvm.x86.avx2.pminu.w(<16 x i16> %1, <16 x i16> %2)
4086 declare <16 x i16> @llvm.x86.avx2.pminu.w(<16 x i16>, <16 x i16>) nounwind readnone
4088 define i32 @test_pmovmskb(<32 x i8> %a0) {
4089 ; GENERIC-LABEL: test_pmovmskb:
4091 ; GENERIC-NEXT: vpmovmskb %ymm0, %eax # sched: [2:1.00]
4092 ; GENERIC-NEXT: vzeroupper # sched: [1:1.00]
4093 ; GENERIC-NEXT: retq # sched: [1:1.00]
4095 ; HASWELL-LABEL: test_pmovmskb:
4097 ; HASWELL-NEXT: vpmovmskb %ymm0, %eax # sched: [3:1.00]
4098 ; HASWELL-NEXT: vzeroupper # sched: [0:1.00]
4099 ; HASWELL-NEXT: retq # sched: [7:1.00]
4101 ; BROADWELL-LABEL: test_pmovmskb:
4102 ; BROADWELL: # %bb.0:
4103 ; BROADWELL-NEXT: vpmovmskb %ymm0, %eax # sched: [3:1.00]
4104 ; BROADWELL-NEXT: vzeroupper # sched: [0:1.00]
4105 ; BROADWELL-NEXT: retq # sched: [7:1.00]
4107 ; SKYLAKE-LABEL: test_pmovmskb:
4109 ; SKYLAKE-NEXT: vpmovmskb %ymm0, %eax # sched: [2:1.00]
4110 ; SKYLAKE-NEXT: vzeroupper # sched: [0:0.67]
4111 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
4113 ; SKX-LABEL: test_pmovmskb:
4115 ; SKX-NEXT: vpmovmskb %ymm0, %eax # sched: [2:1.00]
4116 ; SKX-NEXT: vzeroupper # sched: [0:0.67]
4117 ; SKX-NEXT: retq # sched: [7:1.00]
4119 ; ZNVER1-LABEL: test_pmovmskb:
4121 ; ZNVER1-NEXT: vpmovmskb %ymm0, %eax # sched: [2:2.00]
4122 ; ZNVER1-NEXT: vzeroupper # sched: [100:0.25]
4123 ; ZNVER1-NEXT: retq # sched: [1:0.50]
4124 %1 = call i32 @llvm.x86.avx2.pmovmskb(<32 x i8> %a0)
4127 declare i32 @llvm.x86.avx2.pmovmskb(<32 x i8>) nounwind readnone
4129 define <8 x i32> @test_pmovsxbd(<16 x i8> %a0, <16 x i8> *%a1) {
4130 ; GENERIC-LABEL: test_pmovsxbd:
4132 ; GENERIC-NEXT: vpmovsxbd %xmm0, %ymm0 # sched: [1:1.00]
4133 ; GENERIC-NEXT: vpmovsxbd (%rdi), %ymm1 # sched: [8:1.00]
4134 ; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4135 ; GENERIC-NEXT: retq # sched: [1:1.00]
4137 ; HASWELL-LABEL: test_pmovsxbd:
4139 ; HASWELL-NEXT: vpmovsxbd %xmm0, %ymm0 # sched: [3:1.00]
4140 ; HASWELL-NEXT: vpmovsxbd (%rdi), %ymm1 # sched: [8:1.00]
4141 ; HASWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4142 ; HASWELL-NEXT: retq # sched: [7:1.00]
4144 ; BROADWELL-LABEL: test_pmovsxbd:
4145 ; BROADWELL: # %bb.0:
4146 ; BROADWELL-NEXT: vpmovsxbd %xmm0, %ymm0 # sched: [3:1.00]
4147 ; BROADWELL-NEXT: vpmovsxbd (%rdi), %ymm1 # sched: [8:1.00]
4148 ; BROADWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4149 ; BROADWELL-NEXT: retq # sched: [7:1.00]
4151 ; SKYLAKE-LABEL: test_pmovsxbd:
4153 ; SKYLAKE-NEXT: vpmovsxbd %xmm0, %ymm0 # sched: [3:1.00]
4154 ; SKYLAKE-NEXT: vpmovsxbd (%rdi), %ymm1 # sched: [8:1.00]
4155 ; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4156 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
4158 ; SKX-LABEL: test_pmovsxbd:
4160 ; SKX-NEXT: vpmovsxbd %xmm0, %ymm0 # sched: [3:1.00]
4161 ; SKX-NEXT: vpmovsxbd (%rdi), %ymm1 # sched: [8:1.00]
4162 ; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4163 ; SKX-NEXT: retq # sched: [7:1.00]
4165 ; ZNVER1-LABEL: test_pmovsxbd:
4167 ; ZNVER1-NEXT: vpmovsxbd (%rdi), %ymm1 # sched: [8:0.50]
4168 ; ZNVER1-NEXT: vpmovsxbd %xmm0, %ymm0 # sched: [1:0.50]
4169 ; ZNVER1-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
4170 ; ZNVER1-NEXT: retq # sched: [1:0.50]
4171 %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
4172 %2 = sext <8 x i8> %1 to <8 x i32>
4173 %3 = load <16 x i8>, <16 x i8> *%a1, align 16
4174 %4 = shufflevector <16 x i8> %3, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
4175 %5 = sext <8 x i8> %4 to <8 x i32>
4176 %6 = add <8 x i32> %2, %5
4180 define <4 x i64> @test_pmovsxbq(<16 x i8> %a0, <16 x i8> *%a1) {
4181 ; GENERIC-LABEL: test_pmovsxbq:
4183 ; GENERIC-NEXT: vpmovsxbq %xmm0, %ymm0 # sched: [1:1.00]
4184 ; GENERIC-NEXT: vpmovsxbq (%rdi), %ymm1 # sched: [8:1.00]
4185 ; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4186 ; GENERIC-NEXT: retq # sched: [1:1.00]
4188 ; HASWELL-LABEL: test_pmovsxbq:
4190 ; HASWELL-NEXT: vpmovsxbq %xmm0, %ymm0 # sched: [3:1.00]
4191 ; HASWELL-NEXT: vpmovsxbq (%rdi), %ymm1 # sched: [8:1.00]
4192 ; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4193 ; HASWELL-NEXT: retq # sched: [7:1.00]
4195 ; BROADWELL-LABEL: test_pmovsxbq:
4196 ; BROADWELL: # %bb.0:
4197 ; BROADWELL-NEXT: vpmovsxbq %xmm0, %ymm0 # sched: [3:1.00]
4198 ; BROADWELL-NEXT: vpmovsxbq (%rdi), %ymm1 # sched: [8:1.00]
4199 ; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4200 ; BROADWELL-NEXT: retq # sched: [7:1.00]
4202 ; SKYLAKE-LABEL: test_pmovsxbq:
4204 ; SKYLAKE-NEXT: vpmovsxbq %xmm0, %ymm0 # sched: [3:1.00]
4205 ; SKYLAKE-NEXT: vpmovsxbq (%rdi), %ymm1 # sched: [8:1.00]
4206 ; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4207 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
4209 ; SKX-LABEL: test_pmovsxbq:
4211 ; SKX-NEXT: vpmovsxbq %xmm0, %ymm0 # sched: [3:1.00]
4212 ; SKX-NEXT: vpmovsxbq (%rdi), %ymm1 # sched: [8:1.00]
4213 ; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4214 ; SKX-NEXT: retq # sched: [7:1.00]
4216 ; ZNVER1-LABEL: test_pmovsxbq:
4218 ; ZNVER1-NEXT: vpmovsxbq (%rdi), %ymm1 # sched: [8:0.50]
4219 ; ZNVER1-NEXT: vpmovsxbq %xmm0, %ymm0 # sched: [1:0.50]
4220 ; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
4221 ; ZNVER1-NEXT: retq # sched: [1:0.50]
4222 %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4223 %2 = sext <4 x i8> %1 to <4 x i64>
4224 %3 = load <16 x i8>, <16 x i8> *%a1, align 16
4225 %4 = shufflevector <16 x i8> %3, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4226 %5 = sext <4 x i8> %4 to <4 x i64>
4227 %6 = add <4 x i64> %2, %5
4231 define <16 x i16> @test_pmovsxbw(<16 x i8> %a0, <16 x i8> *%a1) {
4232 ; GENERIC-LABEL: test_pmovsxbw:
4234 ; GENERIC-NEXT: vpmovsxbw %xmm0, %ymm0 # sched: [1:1.00]
4235 ; GENERIC-NEXT: vpmovsxbw (%rdi), %ymm1 # sched: [8:1.00]
4236 ; GENERIC-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4237 ; GENERIC-NEXT: retq # sched: [1:1.00]
4239 ; HASWELL-LABEL: test_pmovsxbw:
4241 ; HASWELL-NEXT: vpmovsxbw %xmm0, %ymm0 # sched: [3:1.00]
4242 ; HASWELL-NEXT: vpmovsxbw (%rdi), %ymm1 # sched: [9:1.00]
4243 ; HASWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4244 ; HASWELL-NEXT: retq # sched: [7:1.00]
4246 ; BROADWELL-LABEL: test_pmovsxbw:
4247 ; BROADWELL: # %bb.0:
4248 ; BROADWELL-NEXT: vpmovsxbw %xmm0, %ymm0 # sched: [3:1.00]
4249 ; BROADWELL-NEXT: vpmovsxbw (%rdi), %ymm1 # sched: [8:1.00]
4250 ; BROADWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4251 ; BROADWELL-NEXT: retq # sched: [7:1.00]
4253 ; SKYLAKE-LABEL: test_pmovsxbw:
4255 ; SKYLAKE-NEXT: vpmovsxbw %xmm0, %ymm0 # sched: [3:1.00]
4256 ; SKYLAKE-NEXT: vpmovsxbw (%rdi), %ymm1 # sched: [9:1.00]
4257 ; SKYLAKE-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4258 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
4260 ; SKX-LABEL: test_pmovsxbw:
4262 ; SKX-NEXT: vpmovsxbw %xmm0, %ymm0 # sched: [3:1.00]
4263 ; SKX-NEXT: vpmovsxbw (%rdi), %ymm1 # sched: [9:1.00]
4264 ; SKX-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4265 ; SKX-NEXT: retq # sched: [7:1.00]
4267 ; ZNVER1-LABEL: test_pmovsxbw:
4269 ; ZNVER1-NEXT: vpmovsxbw (%rdi), %ymm1 # sched: [8:0.50]
4270 ; ZNVER1-NEXT: vpmovsxbw %xmm0, %ymm0 # sched: [1:0.50]
4271 ; ZNVER1-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
4272 ; ZNVER1-NEXT: retq # sched: [1:0.50]
4273 %1 = sext <16 x i8> %a0 to <16 x i16>
4274 %2 = load <16 x i8>, <16 x i8> *%a1, align 16
4275 %3 = sext <16 x i8> %2 to <16 x i16>
4276 %4 = add <16 x i16> %1, %3
4280 define <4 x i64> @test_pmovsxdq(<4 x i32> %a0, <4 x i32> *%a1) {
4281 ; GENERIC-LABEL: test_pmovsxdq:
4283 ; GENERIC-NEXT: vpmovsxdq %xmm0, %ymm0 # sched: [1:1.00]
4284 ; GENERIC-NEXT: vpmovsxdq (%rdi), %ymm1 # sched: [8:1.00]
4285 ; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4286 ; GENERIC-NEXT: retq # sched: [1:1.00]
4288 ; HASWELL-LABEL: test_pmovsxdq:
4290 ; HASWELL-NEXT: vpmovsxdq %xmm0, %ymm0 # sched: [3:1.00]
4291 ; HASWELL-NEXT: vpmovsxdq (%rdi), %ymm1 # sched: [9:1.00]
4292 ; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4293 ; HASWELL-NEXT: retq # sched: [7:1.00]
4295 ; BROADWELL-LABEL: test_pmovsxdq:
4296 ; BROADWELL: # %bb.0:
4297 ; BROADWELL-NEXT: vpmovsxdq %xmm0, %ymm0 # sched: [3:1.00]
4298 ; BROADWELL-NEXT: vpmovsxdq (%rdi), %ymm1 # sched: [8:1.00]
4299 ; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4300 ; BROADWELL-NEXT: retq # sched: [7:1.00]
4302 ; SKYLAKE-LABEL: test_pmovsxdq:
4304 ; SKYLAKE-NEXT: vpmovsxdq %xmm0, %ymm0 # sched: [3:1.00]
4305 ; SKYLAKE-NEXT: vpmovsxdq (%rdi), %ymm1 # sched: [9:1.00]
4306 ; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4307 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
4309 ; SKX-LABEL: test_pmovsxdq:
4311 ; SKX-NEXT: vpmovsxdq %xmm0, %ymm0 # sched: [3:1.00]
4312 ; SKX-NEXT: vpmovsxdq (%rdi), %ymm1 # sched: [9:1.00]
4313 ; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4314 ; SKX-NEXT: retq # sched: [7:1.00]
4316 ; ZNVER1-LABEL: test_pmovsxdq:
4318 ; ZNVER1-NEXT: vpmovsxdq (%rdi), %ymm1 # sched: [8:0.50]
4319 ; ZNVER1-NEXT: vpmovsxdq %xmm0, %ymm0 # sched: [1:0.50]
4320 ; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
4321 ; ZNVER1-NEXT: retq # sched: [1:0.50]
4322 %1 = sext <4 x i32> %a0 to <4 x i64>
4323 %2 = load <4 x i32>, <4 x i32> *%a1, align 16
4324 %3 = sext <4 x i32> %2 to <4 x i64>
4325 %4 = add <4 x i64> %1, %3
4329 define <8 x i32> @test_pmovsxwd(<8 x i16> %a0, <8 x i16> *%a1) {
4330 ; GENERIC-LABEL: test_pmovsxwd:
4332 ; GENERIC-NEXT: vpmovsxwd %xmm0, %ymm0 # sched: [1:1.00]
4333 ; GENERIC-NEXT: vpmovsxwd (%rdi), %ymm1 # sched: [8:1.00]
4334 ; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4335 ; GENERIC-NEXT: retq # sched: [1:1.00]
4337 ; HASWELL-LABEL: test_pmovsxwd:
4339 ; HASWELL-NEXT: vpmovsxwd %xmm0, %ymm0 # sched: [3:1.00]
4340 ; HASWELL-NEXT: vpmovsxwd (%rdi), %ymm1 # sched: [9:1.00]
4341 ; HASWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4342 ; HASWELL-NEXT: retq # sched: [7:1.00]
4344 ; BROADWELL-LABEL: test_pmovsxwd:
4345 ; BROADWELL: # %bb.0:
4346 ; BROADWELL-NEXT: vpmovsxwd %xmm0, %ymm0 # sched: [3:1.00]
4347 ; BROADWELL-NEXT: vpmovsxwd (%rdi), %ymm1 # sched: [8:1.00]
4348 ; BROADWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4349 ; BROADWELL-NEXT: retq # sched: [7:1.00]
4351 ; SKYLAKE-LABEL: test_pmovsxwd:
4353 ; SKYLAKE-NEXT: vpmovsxwd %xmm0, %ymm0 # sched: [3:1.00]
4354 ; SKYLAKE-NEXT: vpmovsxwd (%rdi), %ymm1 # sched: [9:1.00]
4355 ; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4356 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
4358 ; SKX-LABEL: test_pmovsxwd:
4360 ; SKX-NEXT: vpmovsxwd %xmm0, %ymm0 # sched: [3:1.00]
4361 ; SKX-NEXT: vpmovsxwd (%rdi), %ymm1 # sched: [9:1.00]
4362 ; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4363 ; SKX-NEXT: retq # sched: [7:1.00]
4365 ; ZNVER1-LABEL: test_pmovsxwd:
4367 ; ZNVER1-NEXT: vpmovsxwd (%rdi), %ymm1 # sched: [8:0.50]
4368 ; ZNVER1-NEXT: vpmovsxwd %xmm0, %ymm0 # sched: [1:0.50]
4369 ; ZNVER1-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
4370 ; ZNVER1-NEXT: retq # sched: [1:0.50]
4371 %1 = sext <8 x i16> %a0 to <8 x i32>
4372 %2 = load <8 x i16>, <8 x i16> *%a1, align 16
4373 %3 = sext <8 x i16> %2 to <8 x i32>
4374 %4 = add <8 x i32> %1, %3
4378 define <4 x i64> @test_pmovsxwq(<8 x i16> %a0, <8 x i16> *%a1) {
4379 ; GENERIC-LABEL: test_pmovsxwq:
4381 ; GENERIC-NEXT: vpmovsxwq %xmm0, %ymm0 # sched: [1:1.00]
4382 ; GENERIC-NEXT: vpmovsxwq (%rdi), %ymm1 # sched: [8:1.00]
4383 ; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4384 ; GENERIC-NEXT: retq # sched: [1:1.00]
4386 ; HASWELL-LABEL: test_pmovsxwq:
4388 ; HASWELL-NEXT: vpmovsxwq %xmm0, %ymm0 # sched: [3:1.00]
4389 ; HASWELL-NEXT: vpmovsxwq (%rdi), %ymm1 # sched: [8:1.00]
4390 ; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4391 ; HASWELL-NEXT: retq # sched: [7:1.00]
4393 ; BROADWELL-LABEL: test_pmovsxwq:
4394 ; BROADWELL: # %bb.0:
4395 ; BROADWELL-NEXT: vpmovsxwq %xmm0, %ymm0 # sched: [3:1.00]
4396 ; BROADWELL-NEXT: vpmovsxwq (%rdi), %ymm1 # sched: [8:1.00]
4397 ; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4398 ; BROADWELL-NEXT: retq # sched: [7:1.00]
4400 ; SKYLAKE-LABEL: test_pmovsxwq:
4402 ; SKYLAKE-NEXT: vpmovsxwq %xmm0, %ymm0 # sched: [3:1.00]
4403 ; SKYLAKE-NEXT: vpmovsxwq (%rdi), %ymm1 # sched: [8:1.00]
4404 ; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4405 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
4407 ; SKX-LABEL: test_pmovsxwq:
4409 ; SKX-NEXT: vpmovsxwq %xmm0, %ymm0 # sched: [3:1.00]
4410 ; SKX-NEXT: vpmovsxwq (%rdi), %ymm1 # sched: [8:1.00]
4411 ; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4412 ; SKX-NEXT: retq # sched: [7:1.00]
4414 ; ZNVER1-LABEL: test_pmovsxwq:
4416 ; ZNVER1-NEXT: vpmovsxwq (%rdi), %ymm1 # sched: [8:0.50]
4417 ; ZNVER1-NEXT: vpmovsxwq %xmm0, %ymm0 # sched: [1:0.50]
4418 ; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
4419 ; ZNVER1-NEXT: retq # sched: [1:0.50]
4420 %1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4421 %2 = sext <4 x i16> %1 to <4 x i64>
4422 %3 = load <8 x i16>, <8 x i16> *%a1, align 16
4423 %4 = shufflevector <8 x i16> %3, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4424 %5 = sext <4 x i16> %4 to <4 x i64>
4425 %6 = add <4 x i64> %2, %5
4429 define <8 x i32> @test_pmovzxbd(<16 x i8> %a0, <16 x i8> *%a1) {
4430 ; GENERIC-LABEL: test_pmovzxbd:
4432 ; GENERIC-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [1:1.00]
4433 ; GENERIC-NEXT: vpmovzxbd {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [8:1.00]
4434 ; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4435 ; GENERIC-NEXT: retq # sched: [1:1.00]
4437 ; HASWELL-LABEL: test_pmovzxbd:
4439 ; HASWELL-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [3:1.00]
4440 ; HASWELL-NEXT: vpmovzxbd {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [10:1.00]
4441 ; HASWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4442 ; HASWELL-NEXT: retq # sched: [7:1.00]
4444 ; BROADWELL-LABEL: test_pmovzxbd:
4445 ; BROADWELL: # %bb.0:
4446 ; BROADWELL-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [3:1.00]
4447 ; BROADWELL-NEXT: vpmovzxbd {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [9:1.00]
4448 ; BROADWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4449 ; BROADWELL-NEXT: retq # sched: [7:1.00]
4451 ; SKYLAKE-LABEL: test_pmovzxbd:
4453 ; SKYLAKE-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [3:1.00]
4454 ; SKYLAKE-NEXT: vpmovzxbd {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [10:1.00]
4455 ; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4456 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
4458 ; SKX-LABEL: test_pmovzxbd:
4460 ; SKX-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [3:1.00]
4461 ; SKX-NEXT: vpmovzxbd {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [10:1.00]
4462 ; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4463 ; SKX-NEXT: retq # sched: [7:1.00]
4465 ; ZNVER1-LABEL: test_pmovzxbd:
4467 ; ZNVER1-NEXT: vpmovzxbd {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [8:0.50]
4468 ; ZNVER1-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [1:0.50]
4469 ; ZNVER1-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
4470 ; ZNVER1-NEXT: retq # sched: [1:0.50]
4471 %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
4472 %2 = zext <8 x i8> %1 to <8 x i32>
4473 %3 = load <16 x i8>, <16 x i8> *%a1, align 16
4474 %4 = shufflevector <16 x i8> %3, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
4475 %5 = zext <8 x i8> %4 to <8 x i32>
4476 %6 = add <8 x i32> %2, %5
4480 define <4 x i64> @test_pmovzxbq(<16 x i8> %a0, <16 x i8> *%a1) {
4481 ; GENERIC-LABEL: test_pmovzxbq:
4483 ; GENERIC-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00]
4484 ; GENERIC-NEXT: vpmovzxbq {{.*#+}} ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [8:1.00]
4485 ; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4486 ; GENERIC-NEXT: retq # sched: [1:1.00]
4488 ; HASWELL-LABEL: test_pmovzxbq:
4490 ; HASWELL-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero sched: [3:1.00]
4491 ; HASWELL-NEXT: vpmovzxbq {{.*#+}} ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [10:1.00]
4492 ; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4493 ; HASWELL-NEXT: retq # sched: [7:1.00]
4495 ; BROADWELL-LABEL: test_pmovzxbq:
4496 ; BROADWELL: # %bb.0:
4497 ; BROADWELL-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero sched: [3:1.00]
4498 ; BROADWELL-NEXT: vpmovzxbq {{.*#+}} ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [9:1.00]
4499 ; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4500 ; BROADWELL-NEXT: retq # sched: [7:1.00]
4502 ; SKYLAKE-LABEL: test_pmovzxbq:
4504 ; SKYLAKE-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero sched: [3:1.00]
4505 ; SKYLAKE-NEXT: vpmovzxbq {{.*#+}} ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [10:1.00]
4506 ; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4507 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
4509 ; SKX-LABEL: test_pmovzxbq:
4511 ; SKX-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero sched: [3:1.00]
4512 ; SKX-NEXT: vpmovzxbq {{.*#+}} ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [10:1.00]
4513 ; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4514 ; SKX-NEXT: retq # sched: [7:1.00]
4516 ; ZNVER1-LABEL: test_pmovzxbq:
4518 ; ZNVER1-NEXT: vpmovzxbq {{.*#+}} ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [8:0.50]
4519 ; ZNVER1-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero sched: [1:0.50]
4520 ; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
4521 ; ZNVER1-NEXT: retq # sched: [1:0.50]
4522 %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4523 %2 = zext <4 x i8> %1 to <4 x i64>
4524 %3 = load <16 x i8>, <16 x i8> *%a1, align 16
4525 %4 = shufflevector <16 x i8> %3, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4526 %5 = zext <4 x i8> %4 to <4 x i64>
4527 %6 = add <4 x i64> %2, %5
4531 define <16 x i16> @test_pmovzxbw(<16 x i8> %a0, <16 x i8> *%a1) {
4532 ; GENERIC-LABEL: test_pmovzxbw:
4534 ; GENERIC-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [1:1.00]
4535 ; GENERIC-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [8:1.00]
4536 ; GENERIC-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4537 ; GENERIC-NEXT: retq # sched: [1:1.00]
4539 ; HASWELL-LABEL: test_pmovzxbw:
4541 ; HASWELL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [3:1.00]
4542 ; HASWELL-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [10:1.00]
4543 ; HASWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4544 ; HASWELL-NEXT: retq # sched: [7:1.00]
4546 ; BROADWELL-LABEL: test_pmovzxbw:
4547 ; BROADWELL: # %bb.0:
4548 ; BROADWELL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [3:1.00]
4549 ; BROADWELL-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [9:1.00]
4550 ; BROADWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4551 ; BROADWELL-NEXT: retq # sched: [7:1.00]
4553 ; SKYLAKE-LABEL: test_pmovzxbw:
4555 ; SKYLAKE-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [3:1.00]
4556 ; SKYLAKE-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [10:1.00]
4557 ; SKYLAKE-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4558 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
4560 ; SKX-LABEL: test_pmovzxbw:
4562 ; SKX-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [3:1.00]
4563 ; SKX-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [10:1.00]
4564 ; SKX-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4565 ; SKX-NEXT: retq # sched: [7:1.00]
4567 ; ZNVER1-LABEL: test_pmovzxbw:
4569 ; ZNVER1-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [8:0.50]
4570 ; ZNVER1-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [1:0.50]
4571 ; ZNVER1-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
4572 ; ZNVER1-NEXT: retq # sched: [1:0.50]
4573 %1 = zext <16 x i8> %a0 to <16 x i16>
4574 %2 = load <16 x i8>, <16 x i8> *%a1, align 16
4575 %3 = zext <16 x i8> %2 to <16 x i16>
4576 %4 = add <16 x i16> %1, %3
4580 define <4 x i64> @test_pmovzxdq(<4 x i32> %a0, <4 x i32> *%a1) {
4581 ; GENERIC-LABEL: test_pmovzxdq:
4583 ; GENERIC-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00]
4584 ; GENERIC-NEXT: vpmovzxdq {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [8:1.00]
4585 ; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4586 ; GENERIC-NEXT: retq # sched: [1:1.00]
4588 ; HASWELL-LABEL: test_pmovzxdq:
4590 ; HASWELL-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [3:1.00]
4591 ; HASWELL-NEXT: vpmovzxdq {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [10:1.00]
4592 ; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4593 ; HASWELL-NEXT: retq # sched: [7:1.00]
4595 ; BROADWELL-LABEL: test_pmovzxdq:
4596 ; BROADWELL: # %bb.0:
4597 ; BROADWELL-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [3:1.00]
4598 ; BROADWELL-NEXT: vpmovzxdq {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [9:1.00]
4599 ; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4600 ; BROADWELL-NEXT: retq # sched: [7:1.00]
4602 ; SKYLAKE-LABEL: test_pmovzxdq:
4604 ; SKYLAKE-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [3:1.00]
4605 ; SKYLAKE-NEXT: vpmovzxdq {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [10:1.00]
4606 ; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4607 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
4609 ; SKX-LABEL: test_pmovzxdq:
4611 ; SKX-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [3:1.00]
4612 ; SKX-NEXT: vpmovzxdq {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [10:1.00]
4613 ; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4614 ; SKX-NEXT: retq # sched: [7:1.00]
4616 ; ZNVER1-LABEL: test_pmovzxdq:
4618 ; ZNVER1-NEXT: vpmovzxdq {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [8:0.50]
4619 ; ZNVER1-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:0.50]
4620 ; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
4621 ; ZNVER1-NEXT: retq # sched: [1:0.50]
4622 %1 = zext <4 x i32> %a0 to <4 x i64>
4623 %2 = load <4 x i32>, <4 x i32> *%a1, align 16
4624 %3 = zext <4 x i32> %2 to <4 x i64>
4625 %4 = add <4 x i64> %1, %3
4629 define <8 x i32> @test_pmovzxwd(<8 x i16> %a0, <8 x i16> *%a1) {
4630 ; GENERIC-LABEL: test_pmovzxwd:
4632 ; GENERIC-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00]
4633 ; GENERIC-NEXT: vpmovzxwd {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [8:1.00]
4634 ; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4635 ; GENERIC-NEXT: retq # sched: [1:1.00]
4637 ; HASWELL-LABEL: test_pmovzxwd:
4639 ; HASWELL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00]
4640 ; HASWELL-NEXT: vpmovzxwd {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [9:1.00]
4641 ; HASWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4642 ; HASWELL-NEXT: retq # sched: [7:1.00]
4644 ; BROADWELL-LABEL: test_pmovzxwd:
4645 ; BROADWELL: # %bb.0:
4646 ; BROADWELL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00]
4647 ; BROADWELL-NEXT: vpmovzxwd {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [8:1.00]
4648 ; BROADWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4649 ; BROADWELL-NEXT: retq # sched: [7:1.00]
4651 ; SKYLAKE-LABEL: test_pmovzxwd:
4653 ; SKYLAKE-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00]
4654 ; SKYLAKE-NEXT: vpmovzxwd {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [9:1.00]
4655 ; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4656 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
4658 ; SKX-LABEL: test_pmovzxwd:
4660 ; SKX-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00]
4661 ; SKX-NEXT: vpmovzxwd {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [9:1.00]
4662 ; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4663 ; SKX-NEXT: retq # sched: [7:1.00]
4665 ; ZNVER1-LABEL: test_pmovzxwd:
4667 ; ZNVER1-NEXT: vpmovzxwd {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [8:0.50]
4668 ; ZNVER1-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:0.50]
4669 ; ZNVER1-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
4670 ; ZNVER1-NEXT: retq # sched: [1:0.50]
4671 %1 = zext <8 x i16> %a0 to <8 x i32>
4672 %2 = load <8 x i16>, <8 x i16> *%a1, align 16
4673 %3 = zext <8 x i16> %2 to <8 x i32>
4674 %4 = add <8 x i32> %1, %3
4678 define <4 x i64> @test_pmovzxwq(<8 x i16> %a0, <8 x i16> *%a1) {
4679 ; GENERIC-LABEL: test_pmovzxwq:
4681 ; GENERIC-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00]
4682 ; GENERIC-NEXT: vpmovzxwq {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [8:1.00]
4683 ; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4684 ; GENERIC-NEXT: retq # sched: [1:1.00]
4686 ; HASWELL-LABEL: test_pmovzxwq:
4688 ; HASWELL-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [3:1.00]
4689 ; HASWELL-NEXT: vpmovzxwq {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [10:1.00]
4690 ; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4691 ; HASWELL-NEXT: retq # sched: [7:1.00]
4693 ; BROADWELL-LABEL: test_pmovzxwq:
4694 ; BROADWELL: # %bb.0:
4695 ; BROADWELL-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [3:1.00]
4696 ; BROADWELL-NEXT: vpmovzxwq {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [9:1.00]
4697 ; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4698 ; BROADWELL-NEXT: retq # sched: [7:1.00]
4700 ; SKYLAKE-LABEL: test_pmovzxwq:
4702 ; SKYLAKE-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [3:1.00]
4703 ; SKYLAKE-NEXT: vpmovzxwq {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [10:1.00]
4704 ; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4705 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
4707 ; SKX-LABEL: test_pmovzxwq:
4709 ; SKX-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [3:1.00]
4710 ; SKX-NEXT: vpmovzxwq {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [10:1.00]
4711 ; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4712 ; SKX-NEXT: retq # sched: [7:1.00]
4714 ; ZNVER1-LABEL: test_pmovzxwq:
4716 ; ZNVER1-NEXT: vpmovzxwq {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [8:0.50]
4717 ; ZNVER1-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:0.50]
4718 ; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
4719 ; ZNVER1-NEXT: retq # sched: [1:0.50]
4720 %1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4721 %2 = zext <4 x i16> %1 to <4 x i64>
4722 %3 = load <8 x i16>, <8 x i16> *%a1, align 16
4723 %4 = shufflevector <8 x i16> %3, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4724 %5 = zext <4 x i16> %4 to <4 x i64>
4725 %6 = add <4 x i64> %2, %5
4729 define <4 x i64> @test_pmuldq(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> %a2, <8 x i32> *%a3) {
4730 ; GENERIC-LABEL: test_pmuldq:
4732 ; GENERIC-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
4733 ; GENERIC-NEXT: vpmuldq (%rdi), %ymm2, %ymm1 # sched: [12:1.00]
4734 ; GENERIC-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4735 ; GENERIC-NEXT: retq # sched: [1:1.00]
4737 ; HASWELL-LABEL: test_pmuldq:
4739 ; HASWELL-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
4740 ; HASWELL-NEXT: vpmuldq (%rdi), %ymm2, %ymm1 # sched: [12:1.00]
4741 ; HASWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4742 ; HASWELL-NEXT: retq # sched: [7:1.00]
4744 ; BROADWELL-LABEL: test_pmuldq:
4745 ; BROADWELL: # %bb.0:
4746 ; BROADWELL-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
4747 ; BROADWELL-NEXT: vpmuldq (%rdi), %ymm2, %ymm1 # sched: [11:1.00]
4748 ; BROADWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4749 ; BROADWELL-NEXT: retq # sched: [7:1.00]
4751 ; SKYLAKE-LABEL: test_pmuldq:
4753 ; SKYLAKE-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
4754 ; SKYLAKE-NEXT: vpmuldq (%rdi), %ymm2, %ymm1 # sched: [11:0.50]
4755 ; SKYLAKE-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4756 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
4758 ; SKX-LABEL: test_pmuldq:
4760 ; SKX-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
4761 ; SKX-NEXT: vpmuldq (%rdi), %ymm2, %ymm1 # sched: [11:0.50]
4762 ; SKX-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4763 ; SKX-NEXT: retq # sched: [7:1.00]
4765 ; ZNVER1-LABEL: test_pmuldq:
4767 ; ZNVER1-NEXT: vpmuldq (%rdi), %ymm2, %ymm2 # sched: [11:1.00]
4768 ; ZNVER1-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 # sched: [4:1.00]
4769 ; ZNVER1-NEXT: vpor %ymm2, %ymm0, %ymm0 # sched: [1:0.25]
4770 ; ZNVER1-NEXT: retq # sched: [1:0.50]
4771 %1 = call <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32> %a0, <8 x i32> %a1)
4772 %2 = load <8 x i32>, <8 x i32> *%a3, align 32
4773 %3 = call <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32> %a2, <8 x i32> %2)
4774 %4 = or <4 x i64> %1, %3
4777 declare <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32>, <8 x i32>) nounwind readnone
4779 define <16 x i16> @test_pmulhrsw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
4780 ; GENERIC-LABEL: test_pmulhrsw:
4782 ; GENERIC-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
4783 ; GENERIC-NEXT: vpmulhrsw (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
4784 ; GENERIC-NEXT: retq # sched: [1:1.00]
4786 ; HASWELL-LABEL: test_pmulhrsw:
4788 ; HASWELL-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
4789 ; HASWELL-NEXT: vpmulhrsw (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
4790 ; HASWELL-NEXT: retq # sched: [7:1.00]
4792 ; BROADWELL-LABEL: test_pmulhrsw:
4793 ; BROADWELL: # %bb.0:
4794 ; BROADWELL-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
4795 ; BROADWELL-NEXT: vpmulhrsw (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
4796 ; BROADWELL-NEXT: retq # sched: [7:1.00]
4798 ; SKYLAKE-LABEL: test_pmulhrsw:
4800 ; SKYLAKE-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
4801 ; SKYLAKE-NEXT: vpmulhrsw (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
4802 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
4804 ; SKX-LABEL: test_pmulhrsw:
4806 ; SKX-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
4807 ; SKX-NEXT: vpmulhrsw (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
4808 ; SKX-NEXT: retq # sched: [7:1.00]
4810 ; ZNVER1-LABEL: test_pmulhrsw:
4812 ; ZNVER1-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm0 # sched: [4:1.00]
4813 ; ZNVER1-NEXT: vpmulhrsw (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
4814 ; ZNVER1-NEXT: retq # sched: [1:0.50]
4815 %1 = call <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16> %a0, <16 x i16> %a1)
4816 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
4817 %3 = call <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16> %1, <16 x i16> %2)
4820 declare <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16>, <16 x i16>) nounwind readnone
4822 define <16 x i16> @test_pmulhuw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
4823 ; GENERIC-LABEL: test_pmulhuw:
4825 ; GENERIC-NEXT: vpmulhuw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
4826 ; GENERIC-NEXT: vpmulhuw (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
4827 ; GENERIC-NEXT: retq # sched: [1:1.00]
4829 ; HASWELL-LABEL: test_pmulhuw:
4831 ; HASWELL-NEXT: vpmulhuw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
4832 ; HASWELL-NEXT: vpmulhuw (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
4833 ; HASWELL-NEXT: retq # sched: [7:1.00]
4835 ; BROADWELL-LABEL: test_pmulhuw:
4836 ; BROADWELL: # %bb.0:
4837 ; BROADWELL-NEXT: vpmulhuw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
4838 ; BROADWELL-NEXT: vpmulhuw (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
4839 ; BROADWELL-NEXT: retq # sched: [7:1.00]
4841 ; SKYLAKE-LABEL: test_pmulhuw:
4843 ; SKYLAKE-NEXT: vpmulhuw %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
4844 ; SKYLAKE-NEXT: vpmulhuw (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
4845 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
4847 ; SKX-LABEL: test_pmulhuw:
4849 ; SKX-NEXT: vpmulhuw %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
4850 ; SKX-NEXT: vpmulhuw (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
4851 ; SKX-NEXT: retq # sched: [7:1.00]
4853 ; ZNVER1-LABEL: test_pmulhuw:
4855 ; ZNVER1-NEXT: vpmulhuw %ymm1, %ymm0, %ymm0 # sched: [4:1.00]
4856 ; ZNVER1-NEXT: vpmulhuw (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
4857 ; ZNVER1-NEXT: retq # sched: [1:0.50]
4858 %1 = call <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16> %a0, <16 x i16> %a1)
4859 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
4860 %3 = call <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16> %1, <16 x i16> %2)
4863 declare <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16>, <16 x i16>) nounwind readnone
4865 define <16 x i16> @test_pmulhw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
4866 ; GENERIC-LABEL: test_pmulhw:
4868 ; GENERIC-NEXT: vpmulhw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
4869 ; GENERIC-NEXT: vpmulhw (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
4870 ; GENERIC-NEXT: retq # sched: [1:1.00]
4872 ; HASWELL-LABEL: test_pmulhw:
4874 ; HASWELL-NEXT: vpmulhw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
4875 ; HASWELL-NEXT: vpmulhw (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
4876 ; HASWELL-NEXT: retq # sched: [7:1.00]
4878 ; BROADWELL-LABEL: test_pmulhw:
4879 ; BROADWELL: # %bb.0:
4880 ; BROADWELL-NEXT: vpmulhw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
4881 ; BROADWELL-NEXT: vpmulhw (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
4882 ; BROADWELL-NEXT: retq # sched: [7:1.00]
4884 ; SKYLAKE-LABEL: test_pmulhw:
4886 ; SKYLAKE-NEXT: vpmulhw %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
4887 ; SKYLAKE-NEXT: vpmulhw (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
4888 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
4890 ; SKX-LABEL: test_pmulhw:
4892 ; SKX-NEXT: vpmulhw %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
4893 ; SKX-NEXT: vpmulhw (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
4894 ; SKX-NEXT: retq # sched: [7:1.00]
4896 ; ZNVER1-LABEL: test_pmulhw:
4898 ; ZNVER1-NEXT: vpmulhw %ymm1, %ymm0, %ymm0 # sched: [4:1.00]
4899 ; ZNVER1-NEXT: vpmulhw (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
4900 ; ZNVER1-NEXT: retq # sched: [1:0.50]
4901 %1 = call <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16> %a0, <16 x i16> %a1)
4902 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
4903 %3 = call <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16> %1, <16 x i16> %2)
4906 declare <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16>, <16 x i16>) nounwind readnone
4908 define <8 x i32> @test_pmulld(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
4909 ; GENERIC-LABEL: test_pmulld:
4911 ; GENERIC-NEXT: vpmulld %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
4912 ; GENERIC-NEXT: vpmulld (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
4913 ; GENERIC-NEXT: retq # sched: [1:1.00]
4915 ; HASWELL-LABEL: test_pmulld:
4917 ; HASWELL-NEXT: vpmulld %ymm1, %ymm0, %ymm0 # sched: [10:2.00]
4918 ; HASWELL-NEXT: vpmulld (%rdi), %ymm0, %ymm0 # sched: [17:2.00]
4919 ; HASWELL-NEXT: retq # sched: [7:1.00]
4921 ; BROADWELL-LABEL: test_pmulld:
4922 ; BROADWELL: # %bb.0:
4923 ; BROADWELL-NEXT: vpmulld %ymm1, %ymm0, %ymm0 # sched: [10:2.00]
4924 ; BROADWELL-NEXT: vpmulld (%rdi), %ymm0, %ymm0 # sched: [16:2.00]
4925 ; BROADWELL-NEXT: retq # sched: [7:1.00]
4927 ; SKYLAKE-LABEL: test_pmulld:
4929 ; SKYLAKE-NEXT: vpmulld %ymm1, %ymm0, %ymm0 # sched: [10:1.00]
4930 ; SKYLAKE-NEXT: vpmulld (%rdi), %ymm0, %ymm0 # sched: [17:1.00]
4931 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
4933 ; SKX-LABEL: test_pmulld:
4935 ; SKX-NEXT: vpmulld %ymm1, %ymm0, %ymm0 # sched: [10:1.00]
4936 ; SKX-NEXT: vpmulld (%rdi), %ymm0, %ymm0 # sched: [17:1.00]
4937 ; SKX-NEXT: retq # sched: [7:1.00]
4939 ; ZNVER1-LABEL: test_pmulld:
4941 ; ZNVER1-NEXT: vpmulld %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
4942 ; ZNVER1-NEXT: vpmulld (%rdi), %ymm0, %ymm0 # sched: [12:2.00]
4943 ; ZNVER1-NEXT: retq # sched: [1:0.50]
4944 %1 = mul <8 x i32> %a0, %a1
4945 %2 = load <8 x i32>, <8 x i32> *%a2, align 32
4946 %3 = mul <8 x i32> %1, %2
4950 define <16 x i16> @test_pmullw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
4951 ; GENERIC-LABEL: test_pmullw:
4953 ; GENERIC-NEXT: vpmullw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
4954 ; GENERIC-NEXT: vpmullw (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
4955 ; GENERIC-NEXT: retq # sched: [1:1.00]
4957 ; HASWELL-LABEL: test_pmullw:
4959 ; HASWELL-NEXT: vpmullw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
4960 ; HASWELL-NEXT: vpmullw (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
4961 ; HASWELL-NEXT: retq # sched: [7:1.00]
4963 ; BROADWELL-LABEL: test_pmullw:
4964 ; BROADWELL: # %bb.0:
4965 ; BROADWELL-NEXT: vpmullw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
4966 ; BROADWELL-NEXT: vpmullw (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
4967 ; BROADWELL-NEXT: retq # sched: [7:1.00]
4969 ; SKYLAKE-LABEL: test_pmullw:
4971 ; SKYLAKE-NEXT: vpmullw %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
4972 ; SKYLAKE-NEXT: vpmullw (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
4973 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
4975 ; SKX-LABEL: test_pmullw:
4977 ; SKX-NEXT: vpmullw %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
4978 ; SKX-NEXT: vpmullw (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
4979 ; SKX-NEXT: retq # sched: [7:1.00]
4981 ; ZNVER1-LABEL: test_pmullw:
4983 ; ZNVER1-NEXT: vpmullw %ymm1, %ymm0, %ymm0 # sched: [4:1.00]
4984 ; ZNVER1-NEXT: vpmullw (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
4985 ; ZNVER1-NEXT: retq # sched: [1:0.50]
4986 %1 = mul <16 x i16> %a0, %a1
4987 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
4988 %3 = mul <16 x i16> %1, %2
4992 define <4 x i64> @test_pmuludq(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
4993 ; GENERIC-LABEL: test_pmuludq:
4995 ; GENERIC-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
4996 ; GENERIC-NEXT: vpmuludq (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
4997 ; GENERIC-NEXT: retq # sched: [1:1.00]
4999 ; HASWELL-LABEL: test_pmuludq:
5001 ; HASWELL-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
5002 ; HASWELL-NEXT: vpmuludq (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
5003 ; HASWELL-NEXT: retq # sched: [7:1.00]
5005 ; BROADWELL-LABEL: test_pmuludq:
5006 ; BROADWELL: # %bb.0:
5007 ; BROADWELL-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
5008 ; BROADWELL-NEXT: vpmuludq (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
5009 ; BROADWELL-NEXT: retq # sched: [7:1.00]
5011 ; SKYLAKE-LABEL: test_pmuludq:
5013 ; SKYLAKE-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
5014 ; SKYLAKE-NEXT: vpmuludq (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
5015 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
5017 ; SKX-LABEL: test_pmuludq:
5019 ; SKX-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
5020 ; SKX-NEXT: vpmuludq (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
5021 ; SKX-NEXT: retq # sched: [7:1.00]
5023 ; ZNVER1-LABEL: test_pmuludq:
5025 ; ZNVER1-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 # sched: [4:1.00]
5026 ; ZNVER1-NEXT: vpmuludq (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
5027 ; ZNVER1-NEXT: retq # sched: [1:0.50]
5028 %1 = call <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32> %a0, <8 x i32> %a1)
5029 %2 = bitcast <4 x i64> %1 to <8 x i32>
5030 %3 = load <8 x i32>, <8 x i32> *%a2, align 32
5031 %4 = call <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32> %2, <8 x i32> %3)
5034 declare <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32>, <8 x i32>) nounwind readnone
5036 define <4 x i64> @test_por(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
5037 ; GENERIC-LABEL: test_por:
5039 ; GENERIC-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
5040 ; GENERIC-NEXT: vpor (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5041 ; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5042 ; GENERIC-NEXT: retq # sched: [1:1.00]
5044 ; HASWELL-LABEL: test_por:
5046 ; HASWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
5047 ; HASWELL-NEXT: vpor (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5048 ; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5049 ; HASWELL-NEXT: retq # sched: [7:1.00]
5051 ; BROADWELL-LABEL: test_por:
5052 ; BROADWELL: # %bb.0:
5053 ; BROADWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
5054 ; BROADWELL-NEXT: vpor (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
5055 ; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5056 ; BROADWELL-NEXT: retq # sched: [7:1.00]
5058 ; SKYLAKE-LABEL: test_por:
5060 ; SKYLAKE-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
5061 ; SKYLAKE-NEXT: vpor (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5062 ; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
5063 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
5065 ; SKX-LABEL: test_por:
5067 ; SKX-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
5068 ; SKX-NEXT: vpor (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5069 ; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
5070 ; SKX-NEXT: retq # sched: [7:1.00]
5072 ; ZNVER1-LABEL: test_por:
5074 ; ZNVER1-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
5075 ; ZNVER1-NEXT: vpor (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5076 ; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
5077 ; ZNVER1-NEXT: retq # sched: [1:0.50]
5078 %1 = or <4 x i64> %a0, %a1
5079 %2 = load <4 x i64>, <4 x i64> *%a2, align 32
5080 %3 = or <4 x i64> %1, %2
5081 %4 = add <4 x i64> %3, %a1
5085 define <4 x i64> @test_psadbw(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
5086 ; GENERIC-LABEL: test_psadbw:
5088 ; GENERIC-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
5089 ; GENERIC-NEXT: vpsadbw (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
5090 ; GENERIC-NEXT: retq # sched: [1:1.00]
5092 ; HASWELL-LABEL: test_psadbw:
5094 ; HASWELL-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
5095 ; HASWELL-NEXT: vpsadbw (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
5096 ; HASWELL-NEXT: retq # sched: [7:1.00]
5098 ; BROADWELL-LABEL: test_psadbw:
5099 ; BROADWELL: # %bb.0:
5100 ; BROADWELL-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
5101 ; BROADWELL-NEXT: vpsadbw (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
5102 ; BROADWELL-NEXT: retq # sched: [7:1.00]
5104 ; SKYLAKE-LABEL: test_psadbw:
5106 ; SKYLAKE-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
5107 ; SKYLAKE-NEXT: vpsadbw (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
5108 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
5110 ; SKX-LABEL: test_psadbw:
5112 ; SKX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
5113 ; SKX-NEXT: vpsadbw (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
5114 ; SKX-NEXT: retq # sched: [7:1.00]
5116 ; ZNVER1-LABEL: test_psadbw:
5118 ; ZNVER1-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
5119 ; ZNVER1-NEXT: vpsadbw (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
5120 ; ZNVER1-NEXT: retq # sched: [1:0.50]
5121 %1 = call <4 x i64> @llvm.x86.avx2.psad.bw(<32 x i8> %a0, <32 x i8> %a1)
5122 %2 = bitcast <4 x i64> %1 to <32 x i8>
5123 %3 = load <32 x i8>, <32 x i8> *%a2, align 32
5124 %4 = call <4 x i64> @llvm.x86.avx2.psad.bw(<32 x i8> %2, <32 x i8> %3)
5127 declare <4 x i64> @llvm.x86.avx2.psad.bw(<32 x i8>, <32 x i8>) nounwind readnone
5129 define <32 x i8> @test_pshufb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
5130 ; GENERIC-LABEL: test_pshufb:
5132 ; GENERIC-NEXT: vpshufb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5133 ; GENERIC-NEXT: vpshufb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5134 ; GENERIC-NEXT: retq # sched: [1:1.00]
5136 ; HASWELL-LABEL: test_pshufb:
5138 ; HASWELL-NEXT: vpshufb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
5139 ; HASWELL-NEXT: vpshufb (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
5140 ; HASWELL-NEXT: retq # sched: [7:1.00]
5142 ; BROADWELL-LABEL: test_pshufb:
5143 ; BROADWELL: # %bb.0:
5144 ; BROADWELL-NEXT: vpshufb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
5145 ; BROADWELL-NEXT: vpshufb (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
5146 ; BROADWELL-NEXT: retq # sched: [7:1.00]
5148 ; SKYLAKE-LABEL: test_pshufb:
5150 ; SKYLAKE-NEXT: vpshufb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
5151 ; SKYLAKE-NEXT: vpshufb (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
5152 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
5154 ; SKX-LABEL: test_pshufb:
5156 ; SKX-NEXT: vpshufb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
5157 ; SKX-NEXT: vpshufb (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
5158 ; SKX-NEXT: retq # sched: [7:1.00]
5160 ; ZNVER1-LABEL: test_pshufb:
5162 ; ZNVER1-NEXT: vpshufb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
5163 ; ZNVER1-NEXT: vpshufb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5164 ; ZNVER1-NEXT: retq # sched: [1:0.50]
5165 %1 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> %a1)
5166 %2 = load <32 x i8>, <32 x i8> *%a2, align 32
5167 %3 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %1, <32 x i8> %2)
5170 declare <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8>, <32 x i8>) nounwind readnone
5172 define <8 x i32> @test_pshufd(<8 x i32> %a0, <8 x i32> *%a1) {
5173 ; GENERIC-LABEL: test_pshufd:
5175 ; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00]
5176 ; GENERIC-NEXT: vpshufd {{.*#+}} ymm1 = mem[1,0,3,2,5,4,7,6] sched: [8:1.00]
5177 ; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5178 ; GENERIC-NEXT: retq # sched: [1:1.00]
5180 ; HASWELL-LABEL: test_pshufd:
5182 ; HASWELL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00]
5183 ; HASWELL-NEXT: vpshufd {{.*#+}} ymm1 = mem[1,0,3,2,5,4,7,6] sched: [8:1.00]
5184 ; HASWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5185 ; HASWELL-NEXT: retq # sched: [7:1.00]
5187 ; BROADWELL-LABEL: test_pshufd:
5188 ; BROADWELL: # %bb.0:
5189 ; BROADWELL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00]
5190 ; BROADWELL-NEXT: vpshufd {{.*#+}} ymm1 = mem[1,0,3,2,5,4,7,6] sched: [7:1.00]
5191 ; BROADWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5192 ; BROADWELL-NEXT: retq # sched: [7:1.00]
5194 ; SKYLAKE-LABEL: test_pshufd:
5196 ; SKYLAKE-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00]
5197 ; SKYLAKE-NEXT: vpshufd {{.*#+}} ymm1 = mem[1,0,3,2,5,4,7,6] sched: [8:1.00]
5198 ; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
5199 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
5201 ; SKX-LABEL: test_pshufd:
5203 ; SKX-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00]
5204 ; SKX-NEXT: vpshufd {{.*#+}} ymm1 = mem[1,0,3,2,5,4,7,6] sched: [8:1.00]
5205 ; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
5206 ; SKX-NEXT: retq # sched: [7:1.00]
5208 ; ZNVER1-LABEL: test_pshufd:
5210 ; ZNVER1-NEXT: vpshufd {{.*#+}} ymm1 = mem[1,0,3,2,5,4,7,6] sched: [8:0.50]
5211 ; ZNVER1-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:0.25]
5212 ; ZNVER1-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
5213 ; ZNVER1-NEXT: retq # sched: [1:0.50]
5214 %1 = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
5215 %2 = load <8 x i32>, <8 x i32> *%a1, align 32
5216 %3 = shufflevector <8 x i32> %2, <8 x i32> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
5217 %4 = add <8 x i32> %1, %3
5221 define <16 x i16> @test_pshufhw(<16 x i16> %a0, <16 x i16> *%a1) {
5222 ; GENERIC-LABEL: test_pshufhw:
5224 ; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,6,5,4,8,9,10,11,15,14,13,12] sched: [1:1.00]
5225 ; GENERIC-NEXT: vpshufhw {{.*#+}} ymm1 = mem[0,1,2,3,5,4,7,6,8,9,10,11,13,12,15,14] sched: [8:1.00]
5226 ; GENERIC-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
5227 ; GENERIC-NEXT: retq # sched: [1:1.00]
5229 ; HASWELL-LABEL: test_pshufhw:
5231 ; HASWELL-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,6,5,4,8,9,10,11,15,14,13,12] sched: [1:1.00]
5232 ; HASWELL-NEXT: vpshufhw {{.*#+}} ymm1 = mem[0,1,2,3,5,4,7,6,8,9,10,11,13,12,15,14] sched: [8:1.00]
5233 ; HASWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
5234 ; HASWELL-NEXT: retq # sched: [7:1.00]
5236 ; BROADWELL-LABEL: test_pshufhw:
5237 ; BROADWELL: # %bb.0:
5238 ; BROADWELL-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,6,5,4,8,9,10,11,15,14,13,12] sched: [1:1.00]
5239 ; BROADWELL-NEXT: vpshufhw {{.*#+}} ymm1 = mem[0,1,2,3,5,4,7,6,8,9,10,11,13,12,15,14] sched: [7:1.00]
5240 ; BROADWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
5241 ; BROADWELL-NEXT: retq # sched: [7:1.00]
5243 ; SKYLAKE-LABEL: test_pshufhw:
5245 ; SKYLAKE-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,6,5,4,8,9,10,11,15,14,13,12] sched: [1:1.00]
5246 ; SKYLAKE-NEXT: vpshufhw {{.*#+}} ymm1 = mem[0,1,2,3,5,4,7,6,8,9,10,11,13,12,15,14] sched: [8:1.00]
5247 ; SKYLAKE-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
5248 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
5250 ; SKX-LABEL: test_pshufhw:
5252 ; SKX-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,6,5,4,8,9,10,11,15,14,13,12] sched: [1:1.00]
5253 ; SKX-NEXT: vpshufhw {{.*#+}} ymm1 = mem[0,1,2,3,5,4,7,6,8,9,10,11,13,12,15,14] sched: [8:1.00]
5254 ; SKX-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
5255 ; SKX-NEXT: retq # sched: [7:1.00]
5257 ; ZNVER1-LABEL: test_pshufhw:
5259 ; ZNVER1-NEXT: vpshufhw {{.*#+}} ymm1 = mem[0,1,2,3,5,4,7,6,8,9,10,11,13,12,15,14] sched: [8:0.50]
5260 ; ZNVER1-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,6,5,4,8,9,10,11,15,14,13,12] sched: [1:0.25]
5261 ; ZNVER1-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
5262 ; ZNVER1-NEXT: retq # sched: [1:0.50]
5263 %1 = shufflevector <16 x i16> %a0, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 6, i32 5, i32 4, i32 8, i32 9, i32 10, i32 11, i32 15, i32 14, i32 13, i32 12>
5264 %2 = load <16 x i16>, <16 x i16> *%a1, align 32
5265 %3 = shufflevector <16 x i16> %2, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 7, i32 6, i32 8, i32 9, i32 10, i32 11, i32 13, i32 12, i32 15, i32 14>
5266 %4 = or <16 x i16> %1, %3
5270 define <16 x i16> @test_pshuflw(<16 x i16> %a0, <16 x i16> *%a1) {
5271 ; GENERIC-LABEL: test_pshuflw:
5273 ; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15] sched: [1:1.00]
5274 ; GENERIC-NEXT: vpshuflw {{.*#+}} ymm1 = mem[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15] sched: [8:1.00]
5275 ; GENERIC-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
5276 ; GENERIC-NEXT: retq # sched: [1:1.00]
5278 ; HASWELL-LABEL: test_pshuflw:
5280 ; HASWELL-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15] sched: [1:1.00]
5281 ; HASWELL-NEXT: vpshuflw {{.*#+}} ymm1 = mem[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15] sched: [8:1.00]
5282 ; HASWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
5283 ; HASWELL-NEXT: retq # sched: [7:1.00]
5285 ; BROADWELL-LABEL: test_pshuflw:
5286 ; BROADWELL: # %bb.0:
5287 ; BROADWELL-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15] sched: [1:1.00]
5288 ; BROADWELL-NEXT: vpshuflw {{.*#+}} ymm1 = mem[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15] sched: [7:1.00]
5289 ; BROADWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
5290 ; BROADWELL-NEXT: retq # sched: [7:1.00]
5292 ; SKYLAKE-LABEL: test_pshuflw:
5294 ; SKYLAKE-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15] sched: [1:1.00]
5295 ; SKYLAKE-NEXT: vpshuflw {{.*#+}} ymm1 = mem[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15] sched: [8:1.00]
5296 ; SKYLAKE-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
5297 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
5299 ; SKX-LABEL: test_pshuflw:
5301 ; SKX-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15] sched: [1:1.00]
5302 ; SKX-NEXT: vpshuflw {{.*#+}} ymm1 = mem[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15] sched: [8:1.00]
5303 ; SKX-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
5304 ; SKX-NEXT: retq # sched: [7:1.00]
5306 ; ZNVER1-LABEL: test_pshuflw:
5308 ; ZNVER1-NEXT: vpshuflw {{.*#+}} ymm1 = mem[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15] sched: [8:0.50]
5309 ; ZNVER1-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15] sched: [1:0.25]
5310 ; ZNVER1-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
5311 ; ZNVER1-NEXT: retq # sched: [1:0.50]
5312 %1 = shufflevector <16 x i16> %a0, <16 x i16> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7, i32 11, i32 10, i32 9, i32 8, i32 12, i32 13, i32 14, i32 15>
5313 %2 = load <16 x i16>, <16 x i16> *%a1, align 32
5314 %3 = shufflevector <16 x i16> %2, <16 x i16> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7, i32 9, i32 8, i32 11, i32 10, i32 12, i32 13, i32 14, i32 15>
5315 %4 = or <16 x i16> %1, %3
5319 define <32 x i8> @test_psignb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
5320 ; GENERIC-LABEL: test_psignb:
5322 ; GENERIC-NEXT: vpsignb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5323 ; GENERIC-NEXT: vpsignb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5324 ; GENERIC-NEXT: retq # sched: [1:1.00]
5326 ; HASWELL-LABEL: test_psignb:
5328 ; HASWELL-NEXT: vpsignb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5329 ; HASWELL-NEXT: vpsignb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5330 ; HASWELL-NEXT: retq # sched: [7:1.00]
5332 ; BROADWELL-LABEL: test_psignb:
5333 ; BROADWELL: # %bb.0:
5334 ; BROADWELL-NEXT: vpsignb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5335 ; BROADWELL-NEXT: vpsignb (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
5336 ; BROADWELL-NEXT: retq # sched: [7:1.00]
5338 ; SKYLAKE-LABEL: test_psignb:
5340 ; SKYLAKE-NEXT: vpsignb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5341 ; SKYLAKE-NEXT: vpsignb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5342 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
5344 ; SKX-LABEL: test_psignb:
5346 ; SKX-NEXT: vpsignb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5347 ; SKX-NEXT: vpsignb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5348 ; SKX-NEXT: retq # sched: [7:1.00]
5350 ; ZNVER1-LABEL: test_psignb:
5352 ; ZNVER1-NEXT: vpsignb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
5353 ; ZNVER1-NEXT: vpsignb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5354 ; ZNVER1-NEXT: retq # sched: [1:0.50]
5355 %1 = call <32 x i8> @llvm.x86.avx2.psign.b(<32 x i8> %a0, <32 x i8> %a1)
5356 %2 = load <32 x i8>, <32 x i8> *%a2, align 32
5357 %3 = call <32 x i8> @llvm.x86.avx2.psign.b(<32 x i8> %1, <32 x i8> %2)
5360 declare <32 x i8> @llvm.x86.avx2.psign.b(<32 x i8>, <32 x i8>) nounwind readnone
5362 define <8 x i32> @test_psignd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
5363 ; GENERIC-LABEL: test_psignd:
5365 ; GENERIC-NEXT: vpsignd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5366 ; GENERIC-NEXT: vpsignd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5367 ; GENERIC-NEXT: retq # sched: [1:1.00]
5369 ; HASWELL-LABEL: test_psignd:
5371 ; HASWELL-NEXT: vpsignd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5372 ; HASWELL-NEXT: vpsignd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5373 ; HASWELL-NEXT: retq # sched: [7:1.00]
5375 ; BROADWELL-LABEL: test_psignd:
5376 ; BROADWELL: # %bb.0:
5377 ; BROADWELL-NEXT: vpsignd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5378 ; BROADWELL-NEXT: vpsignd (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
5379 ; BROADWELL-NEXT: retq # sched: [7:1.00]
5381 ; SKYLAKE-LABEL: test_psignd:
5383 ; SKYLAKE-NEXT: vpsignd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5384 ; SKYLAKE-NEXT: vpsignd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5385 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
5387 ; SKX-LABEL: test_psignd:
5389 ; SKX-NEXT: vpsignd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5390 ; SKX-NEXT: vpsignd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5391 ; SKX-NEXT: retq # sched: [7:1.00]
5393 ; ZNVER1-LABEL: test_psignd:
5395 ; ZNVER1-NEXT: vpsignd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
5396 ; ZNVER1-NEXT: vpsignd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5397 ; ZNVER1-NEXT: retq # sched: [1:0.50]
5398 %1 = call <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32> %a0, <8 x i32> %a1)
5399 %2 = load <8 x i32>, <8 x i32> *%a2, align 32
5400 %3 = call <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32> %1, <8 x i32> %2)
5403 declare <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32>, <8 x i32>) nounwind readnone
5405 define <16 x i16> @test_psignw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
5406 ; GENERIC-LABEL: test_psignw:
5408 ; GENERIC-NEXT: vpsignw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5409 ; GENERIC-NEXT: vpsignw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5410 ; GENERIC-NEXT: retq # sched: [1:1.00]
5412 ; HASWELL-LABEL: test_psignw:
5414 ; HASWELL-NEXT: vpsignw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5415 ; HASWELL-NEXT: vpsignw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5416 ; HASWELL-NEXT: retq # sched: [7:1.00]
5418 ; BROADWELL-LABEL: test_psignw:
5419 ; BROADWELL: # %bb.0:
5420 ; BROADWELL-NEXT: vpsignw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5421 ; BROADWELL-NEXT: vpsignw (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
5422 ; BROADWELL-NEXT: retq # sched: [7:1.00]
5424 ; SKYLAKE-LABEL: test_psignw:
5426 ; SKYLAKE-NEXT: vpsignw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5427 ; SKYLAKE-NEXT: vpsignw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5428 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
5430 ; SKX-LABEL: test_psignw:
5432 ; SKX-NEXT: vpsignw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5433 ; SKX-NEXT: vpsignw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5434 ; SKX-NEXT: retq # sched: [7:1.00]
5436 ; ZNVER1-LABEL: test_psignw:
5438 ; ZNVER1-NEXT: vpsignw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
5439 ; ZNVER1-NEXT: vpsignw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5440 ; ZNVER1-NEXT: retq # sched: [1:0.50]
5441 %1 = call <16 x i16> @llvm.x86.avx2.psign.w(<16 x i16> %a0, <16 x i16> %a1)
5442 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
5443 %3 = call <16 x i16> @llvm.x86.avx2.psign.w(<16 x i16> %1, <16 x i16> %2)
5446 declare <16 x i16> @llvm.x86.avx2.psign.w(<16 x i16>, <16 x i16>) nounwind readnone
5448 define <8 x i32> @test_pslld(<8 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
5449 ; GENERIC-LABEL: test_pslld:
5451 ; GENERIC-NEXT: vpslld %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
5452 ; GENERIC-NEXT: vpslld (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
5453 ; GENERIC-NEXT: vpslld $2, %ymm0, %ymm0 # sched: [1:1.00]
5454 ; GENERIC-NEXT: retq # sched: [1:1.00]
5456 ; HASWELL-LABEL: test_pslld:
5458 ; HASWELL-NEXT: vpslld %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
5459 ; HASWELL-NEXT: vpslld (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
5460 ; HASWELL-NEXT: vpslld $2, %ymm0, %ymm0 # sched: [1:1.00]
5461 ; HASWELL-NEXT: retq # sched: [7:1.00]
5463 ; BROADWELL-LABEL: test_pslld:
5464 ; BROADWELL: # %bb.0:
5465 ; BROADWELL-NEXT: vpslld %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
5466 ; BROADWELL-NEXT: vpslld (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
5467 ; BROADWELL-NEXT: vpslld $2, %ymm0, %ymm0 # sched: [1:1.00]
5468 ; BROADWELL-NEXT: retq # sched: [7:1.00]
5470 ; SKYLAKE-LABEL: test_pslld:
5472 ; SKYLAKE-NEXT: vpslld %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
5473 ; SKYLAKE-NEXT: vpslld (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5474 ; SKYLAKE-NEXT: vpslld $2, %ymm0, %ymm0 # sched: [1:0.50]
5475 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
5477 ; SKX-LABEL: test_pslld:
5479 ; SKX-NEXT: vpslld %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
5480 ; SKX-NEXT: vpslld (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5481 ; SKX-NEXT: vpslld $2, %ymm0, %ymm0 # sched: [1:0.50]
5482 ; SKX-NEXT: retq # sched: [7:1.00]
5484 ; ZNVER1-LABEL: test_pslld:
5486 ; ZNVER1-NEXT: vpslld %xmm1, %ymm0, %ymm0 # sched: [2:1.00]
5487 ; ZNVER1-NEXT: vpslld (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
5488 ; ZNVER1-NEXT: vpslld $2, %ymm0, %ymm0 # sched: [1:0.25]
5489 ; ZNVER1-NEXT: retq # sched: [1:0.50]
5490 %1 = call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %a0, <4 x i32> %a1)
5491 %2 = load <4 x i32>, <4 x i32> *%a2, align 16
5492 %3 = call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %1, <4 x i32> %2)
5493 %4 = shl <8 x i32> %3, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
5496 declare <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32>, <4 x i32>) nounwind readnone
5498 define <32 x i8> @test_pslldq(<32 x i8> %a0) {
5499 ; GENERIC-LABEL: test_pslldq:
5501 ; GENERIC-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12],zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28] sched: [1:1.00]
5502 ; GENERIC-NEXT: retq # sched: [1:1.00]
5504 ; HASWELL-LABEL: test_pslldq:
5506 ; HASWELL-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12],zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28] sched: [1:1.00]
5507 ; HASWELL-NEXT: retq # sched: [7:1.00]
5509 ; BROADWELL-LABEL: test_pslldq:
5510 ; BROADWELL: # %bb.0:
5511 ; BROADWELL-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12],zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28] sched: [1:1.00]
5512 ; BROADWELL-NEXT: retq # sched: [7:1.00]
5514 ; SKYLAKE-LABEL: test_pslldq:
5516 ; SKYLAKE-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12],zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28] sched: [1:1.00]
5517 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
5519 ; SKX-LABEL: test_pslldq:
5521 ; SKX-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12],zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28] sched: [1:1.00]
5522 ; SKX-NEXT: retq # sched: [7:1.00]
5524 ; ZNVER1-LABEL: test_pslldq:
5526 ; ZNVER1-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12],zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28] sched: [2:1.00]
5527 ; ZNVER1-NEXT: retq # sched: [1:0.50]
5528 %1 = shufflevector <32 x i8> zeroinitializer, <32 x i8> %a0, <32 x i32> <i32 13, i32 14, i32 15, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 29, i32 30, i32 31, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60>
5532 define <4 x i64> @test_psllq(<4 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
5533 ; GENERIC-LABEL: test_psllq:
5535 ; GENERIC-NEXT: vpsllq %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
5536 ; GENERIC-NEXT: vpsllq (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
5537 ; GENERIC-NEXT: vpsllq $2, %ymm0, %ymm0 # sched: [1:1.00]
5538 ; GENERIC-NEXT: retq # sched: [1:1.00]
5540 ; HASWELL-LABEL: test_psllq:
5542 ; HASWELL-NEXT: vpsllq %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
5543 ; HASWELL-NEXT: vpsllq (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
5544 ; HASWELL-NEXT: vpsllq $2, %ymm0, %ymm0 # sched: [1:1.00]
5545 ; HASWELL-NEXT: retq # sched: [7:1.00]
5547 ; BROADWELL-LABEL: test_psllq:
5548 ; BROADWELL: # %bb.0:
5549 ; BROADWELL-NEXT: vpsllq %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
5550 ; BROADWELL-NEXT: vpsllq (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
5551 ; BROADWELL-NEXT: vpsllq $2, %ymm0, %ymm0 # sched: [1:1.00]
5552 ; BROADWELL-NEXT: retq # sched: [7:1.00]
5554 ; SKYLAKE-LABEL: test_psllq:
5556 ; SKYLAKE-NEXT: vpsllq %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
5557 ; SKYLAKE-NEXT: vpsllq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5558 ; SKYLAKE-NEXT: vpsllq $2, %ymm0, %ymm0 # sched: [1:0.50]
5559 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
5561 ; SKX-LABEL: test_psllq:
5563 ; SKX-NEXT: vpsllq %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
5564 ; SKX-NEXT: vpsllq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5565 ; SKX-NEXT: vpsllq $2, %ymm0, %ymm0 # sched: [1:0.50]
5566 ; SKX-NEXT: retq # sched: [7:1.00]
5568 ; ZNVER1-LABEL: test_psllq:
5570 ; ZNVER1-NEXT: vpsllq %xmm1, %ymm0, %ymm0 # sched: [2:1.00]
5571 ; ZNVER1-NEXT: vpsllq (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
5572 ; ZNVER1-NEXT: vpsllq $2, %ymm0, %ymm0 # sched: [1:0.25]
5573 ; ZNVER1-NEXT: retq # sched: [1:0.50]
5574 %1 = call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %a0, <2 x i64> %a1)
5575 %2 = load <2 x i64>, <2 x i64> *%a2, align 16
5576 %3 = call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %1, <2 x i64> %2)
5577 %4 = shl <4 x i64> %3, <i64 2, i64 2, i64 2, i64 2>
5580 declare <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64>, <2 x i64>) nounwind readnone
5582 define <4 x i32> @test_psllvd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
5583 ; GENERIC-LABEL: test_psllvd:
5585 ; GENERIC-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
5586 ; GENERIC-NEXT: vpsllvd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
5587 ; GENERIC-NEXT: retq # sched: [1:1.00]
5589 ; HASWELL-LABEL: test_psllvd:
5591 ; HASWELL-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
5592 ; HASWELL-NEXT: vpsllvd (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
5593 ; HASWELL-NEXT: retq # sched: [7:1.00]
5595 ; BROADWELL-LABEL: test_psllvd:
5596 ; BROADWELL: # %bb.0:
5597 ; BROADWELL-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
5598 ; BROADWELL-NEXT: vpsllvd (%rdi), %xmm0, %xmm0 # sched: [8:2.00]
5599 ; BROADWELL-NEXT: retq # sched: [7:1.00]
5601 ; SKYLAKE-LABEL: test_psllvd:
5603 ; SKYLAKE-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
5604 ; SKYLAKE-NEXT: vpsllvd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
5605 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
5607 ; SKX-LABEL: test_psllvd:
5609 ; SKX-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
5610 ; SKX-NEXT: vpsllvd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
5611 ; SKX-NEXT: retq # sched: [7:1.00]
5613 ; ZNVER1-LABEL: test_psllvd:
5615 ; ZNVER1-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
5616 ; ZNVER1-NEXT: vpsllvd (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
5617 ; ZNVER1-NEXT: retq # sched: [1:0.50]
5618 %1 = call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %a0, <4 x i32> %a1)
5619 %2 = load <4 x i32>, <4 x i32> *%a2, align 16
5620 %3 = call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %1, <4 x i32> %2)
5623 declare <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32>, <4 x i32>) nounwind readnone
5625 define <8 x i32> @test_psllvd_ymm(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
5626 ; GENERIC-LABEL: test_psllvd_ymm:
5628 ; GENERIC-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
5629 ; GENERIC-NEXT: vpsllvd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
5630 ; GENERIC-NEXT: retq # sched: [1:1.00]
5632 ; HASWELL-LABEL: test_psllvd_ymm:
5634 ; HASWELL-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
5635 ; HASWELL-NEXT: vpsllvd (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
5636 ; HASWELL-NEXT: retq # sched: [7:1.00]
5638 ; BROADWELL-LABEL: test_psllvd_ymm:
5639 ; BROADWELL: # %bb.0:
5640 ; BROADWELL-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
5641 ; BROADWELL-NEXT: vpsllvd (%rdi), %ymm0, %ymm0 # sched: [9:2.00]
5642 ; BROADWELL-NEXT: retq # sched: [7:1.00]
5644 ; SKYLAKE-LABEL: test_psllvd_ymm:
5646 ; SKYLAKE-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5647 ; SKYLAKE-NEXT: vpsllvd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5648 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
5650 ; SKX-LABEL: test_psllvd_ymm:
5652 ; SKX-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5653 ; SKX-NEXT: vpsllvd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5654 ; SKX-NEXT: retq # sched: [7:1.00]
5656 ; ZNVER1-LABEL: test_psllvd_ymm:
5658 ; ZNVER1-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5659 ; ZNVER1-NEXT: vpsllvd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5660 ; ZNVER1-NEXT: retq # sched: [1:0.50]
5661 %1 = call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %a0, <8 x i32> %a1)
5662 %2 = load <8 x i32>, <8 x i32> *%a2, align 32
5663 %3 = call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %1, <8 x i32> %2)
5666 declare <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32>, <8 x i32>) nounwind readnone
5668 define <2 x i64> @test_psllvq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
5669 ; GENERIC-LABEL: test_psllvq:
5671 ; GENERIC-NEXT: vpsllvq %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
5672 ; GENERIC-NEXT: vpsllvq (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
5673 ; GENERIC-NEXT: retq # sched: [1:1.00]
5675 ; HASWELL-LABEL: test_psllvq:
5677 ; HASWELL-NEXT: vpsllvq %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
5678 ; HASWELL-NEXT: vpsllvq (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
5679 ; HASWELL-NEXT: retq # sched: [7:1.00]
5681 ; BROADWELL-LABEL: test_psllvq:
5682 ; BROADWELL: # %bb.0:
5683 ; BROADWELL-NEXT: vpsllvq %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
5684 ; BROADWELL-NEXT: vpsllvq (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
5685 ; BROADWELL-NEXT: retq # sched: [7:1.00]
5687 ; SKYLAKE-LABEL: test_psllvq:
5689 ; SKYLAKE-NEXT: vpsllvq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
5690 ; SKYLAKE-NEXT: vpsllvq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
5691 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
5693 ; SKX-LABEL: test_psllvq:
5695 ; SKX-NEXT: vpsllvq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
5696 ; SKX-NEXT: vpsllvq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
5697 ; SKX-NEXT: retq # sched: [7:1.00]
5699 ; ZNVER1-LABEL: test_psllvq:
5701 ; ZNVER1-NEXT: vpsllvq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
5702 ; ZNVER1-NEXT: vpsllvq (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
5703 ; ZNVER1-NEXT: retq # sched: [1:0.50]
5704 %1 = call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %a0, <2 x i64> %a1)
5705 %2 = load <2 x i64>, <2 x i64> *%a2, align 16
5706 %3 = call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %1, <2 x i64> %2)
5709 declare <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64>, <2 x i64>) nounwind readnone
5711 define <4 x i64> @test_psllvq_ymm(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
5712 ; GENERIC-LABEL: test_psllvq_ymm:
5714 ; GENERIC-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
5715 ; GENERIC-NEXT: vpsllvq (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
5716 ; GENERIC-NEXT: retq # sched: [1:1.00]
5718 ; HASWELL-LABEL: test_psllvq_ymm:
5720 ; HASWELL-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
5721 ; HASWELL-NEXT: vpsllvq (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
5722 ; HASWELL-NEXT: retq # sched: [7:1.00]
5724 ; BROADWELL-LABEL: test_psllvq_ymm:
5725 ; BROADWELL: # %bb.0:
5726 ; BROADWELL-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
5727 ; BROADWELL-NEXT: vpsllvq (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
5728 ; BROADWELL-NEXT: retq # sched: [7:1.00]
5730 ; SKYLAKE-LABEL: test_psllvq_ymm:
5732 ; SKYLAKE-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5733 ; SKYLAKE-NEXT: vpsllvq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5734 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
5736 ; SKX-LABEL: test_psllvq_ymm:
5738 ; SKX-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5739 ; SKX-NEXT: vpsllvq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5740 ; SKX-NEXT: retq # sched: [7:1.00]
5742 ; ZNVER1-LABEL: test_psllvq_ymm:
5744 ; ZNVER1-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5745 ; ZNVER1-NEXT: vpsllvq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5746 ; ZNVER1-NEXT: retq # sched: [1:0.50]
5747 %1 = call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %a0, <4 x i64> %a1)
5748 %2 = load <4 x i64>, <4 x i64> *%a2, align 32
5749 %3 = call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %1, <4 x i64> %2)
5752 declare <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64>, <4 x i64>) nounwind readnone
5754 define <16 x i16> @test_psllw(<16 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
5755 ; GENERIC-LABEL: test_psllw:
5757 ; GENERIC-NEXT: vpsllw %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
5758 ; GENERIC-NEXT: vpsllw (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
5759 ; GENERIC-NEXT: vpsllw $2, %ymm0, %ymm0 # sched: [1:1.00]
5760 ; GENERIC-NEXT: retq # sched: [1:1.00]
5762 ; HASWELL-LABEL: test_psllw:
5764 ; HASWELL-NEXT: vpsllw %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
5765 ; HASWELL-NEXT: vpsllw (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
5766 ; HASWELL-NEXT: vpsllw $2, %ymm0, %ymm0 # sched: [1:1.00]
5767 ; HASWELL-NEXT: retq # sched: [7:1.00]
5769 ; BROADWELL-LABEL: test_psllw:
5770 ; BROADWELL: # %bb.0:
5771 ; BROADWELL-NEXT: vpsllw %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
5772 ; BROADWELL-NEXT: vpsllw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
5773 ; BROADWELL-NEXT: vpsllw $2, %ymm0, %ymm0 # sched: [1:1.00]
5774 ; BROADWELL-NEXT: retq # sched: [7:1.00]
5776 ; SKYLAKE-LABEL: test_psllw:
5778 ; SKYLAKE-NEXT: vpsllw %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
5779 ; SKYLAKE-NEXT: vpsllw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5780 ; SKYLAKE-NEXT: vpsllw $2, %ymm0, %ymm0 # sched: [1:0.50]
5781 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
5783 ; SKX-LABEL: test_psllw:
5785 ; SKX-NEXT: vpsllw %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
5786 ; SKX-NEXT: vpsllw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5787 ; SKX-NEXT: vpsllw $2, %ymm0, %ymm0 # sched: [1:0.50]
5788 ; SKX-NEXT: retq # sched: [7:1.00]
5790 ; ZNVER1-LABEL: test_psllw:
5792 ; ZNVER1-NEXT: vpsllw %xmm1, %ymm0, %ymm0 # sched: [2:1.00]
5793 ; ZNVER1-NEXT: vpsllw (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
5794 ; ZNVER1-NEXT: vpsllw $2, %ymm0, %ymm0 # sched: [1:0.25]
5795 ; ZNVER1-NEXT: retq # sched: [1:0.50]
5796 %1 = call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %a0, <8 x i16> %a1)
5797 %2 = load <8 x i16>, <8 x i16> *%a2, align 16
5798 %3 = call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %1, <8 x i16> %2)
5799 %4 = shl <16 x i16> %3, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
5802 declare <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16>, <8 x i16>) nounwind readnone
5804 define <8 x i32> @test_psrad(<8 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
5805 ; GENERIC-LABEL: test_psrad:
5807 ; GENERIC-NEXT: vpsrad %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
5808 ; GENERIC-NEXT: vpsrad (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
5809 ; GENERIC-NEXT: vpsrad $2, %ymm0, %ymm0 # sched: [1:1.00]
5810 ; GENERIC-NEXT: retq # sched: [1:1.00]
5812 ; HASWELL-LABEL: test_psrad:
5814 ; HASWELL-NEXT: vpsrad %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
5815 ; HASWELL-NEXT: vpsrad (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
5816 ; HASWELL-NEXT: vpsrad $2, %ymm0, %ymm0 # sched: [1:1.00]
5817 ; HASWELL-NEXT: retq # sched: [7:1.00]
5819 ; BROADWELL-LABEL: test_psrad:
5820 ; BROADWELL: # %bb.0:
5821 ; BROADWELL-NEXT: vpsrad %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
5822 ; BROADWELL-NEXT: vpsrad (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
5823 ; BROADWELL-NEXT: vpsrad $2, %ymm0, %ymm0 # sched: [1:1.00]
5824 ; BROADWELL-NEXT: retq # sched: [7:1.00]
5826 ; SKYLAKE-LABEL: test_psrad:
5828 ; SKYLAKE-NEXT: vpsrad %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
5829 ; SKYLAKE-NEXT: vpsrad (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5830 ; SKYLAKE-NEXT: vpsrad $2, %ymm0, %ymm0 # sched: [1:0.50]
5831 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
5833 ; SKX-LABEL: test_psrad:
5835 ; SKX-NEXT: vpsrad %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
5836 ; SKX-NEXT: vpsrad (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5837 ; SKX-NEXT: vpsrad $2, %ymm0, %ymm0 # sched: [1:0.50]
5838 ; SKX-NEXT: retq # sched: [7:1.00]
5840 ; ZNVER1-LABEL: test_psrad:
5842 ; ZNVER1-NEXT: vpsrad %xmm1, %ymm0, %ymm0 # sched: [2:1.00]
5843 ; ZNVER1-NEXT: vpsrad (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
5844 ; ZNVER1-NEXT: vpsrad $2, %ymm0, %ymm0 # sched: [1:0.25]
5845 ; ZNVER1-NEXT: retq # sched: [1:0.50]
5846 %1 = call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %a0, <4 x i32> %a1)
5847 %2 = load <4 x i32>, <4 x i32> *%a2, align 16
5848 %3 = call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %1, <4 x i32> %2)
5849 %4 = ashr <8 x i32> %3, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
5852 declare <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32>, <4 x i32>) nounwind readnone
5854 define <4 x i32> @test_psravd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
5855 ; GENERIC-LABEL: test_psravd:
5857 ; GENERIC-NEXT: vpsravd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
5858 ; GENERIC-NEXT: vpsravd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
5859 ; GENERIC-NEXT: retq # sched: [1:1.00]
5861 ; HASWELL-LABEL: test_psravd:
5863 ; HASWELL-NEXT: vpsravd %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
5864 ; HASWELL-NEXT: vpsravd (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
5865 ; HASWELL-NEXT: retq # sched: [7:1.00]
5867 ; BROADWELL-LABEL: test_psravd:
5868 ; BROADWELL: # %bb.0:
5869 ; BROADWELL-NEXT: vpsravd %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
5870 ; BROADWELL-NEXT: vpsravd (%rdi), %xmm0, %xmm0 # sched: [8:2.00]
5871 ; BROADWELL-NEXT: retq # sched: [7:1.00]
5873 ; SKYLAKE-LABEL: test_psravd:
5875 ; SKYLAKE-NEXT: vpsravd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
5876 ; SKYLAKE-NEXT: vpsravd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
5877 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
5879 ; SKX-LABEL: test_psravd:
5881 ; SKX-NEXT: vpsravd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
5882 ; SKX-NEXT: vpsravd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
5883 ; SKX-NEXT: retq # sched: [7:1.00]
5885 ; ZNVER1-LABEL: test_psravd:
5887 ; ZNVER1-NEXT: vpsravd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
5888 ; ZNVER1-NEXT: vpsravd (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
5889 ; ZNVER1-NEXT: retq # sched: [1:0.50]
5890 %1 = call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %a0, <4 x i32> %a1)
5891 %2 = load <4 x i32>, <4 x i32> *%a2, align 16
5892 %3 = call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %1, <4 x i32> %2)
5895 declare <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32>, <4 x i32>) nounwind readnone
5897 define <8 x i32> @test_psravd_ymm(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
5898 ; GENERIC-LABEL: test_psravd_ymm:
5900 ; GENERIC-NEXT: vpsravd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
5901 ; GENERIC-NEXT: vpsravd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
5902 ; GENERIC-NEXT: retq # sched: [1:1.00]
5904 ; HASWELL-LABEL: test_psravd_ymm:
5906 ; HASWELL-NEXT: vpsravd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
5907 ; HASWELL-NEXT: vpsravd (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
5908 ; HASWELL-NEXT: retq # sched: [7:1.00]
5910 ; BROADWELL-LABEL: test_psravd_ymm:
5911 ; BROADWELL: # %bb.0:
5912 ; BROADWELL-NEXT: vpsravd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
5913 ; BROADWELL-NEXT: vpsravd (%rdi), %ymm0, %ymm0 # sched: [9:2.00]
5914 ; BROADWELL-NEXT: retq # sched: [7:1.00]
5916 ; SKYLAKE-LABEL: test_psravd_ymm:
5918 ; SKYLAKE-NEXT: vpsravd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5919 ; SKYLAKE-NEXT: vpsravd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5920 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
5922 ; SKX-LABEL: test_psravd_ymm:
5924 ; SKX-NEXT: vpsravd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5925 ; SKX-NEXT: vpsravd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5926 ; SKX-NEXT: retq # sched: [7:1.00]
5928 ; ZNVER1-LABEL: test_psravd_ymm:
5930 ; ZNVER1-NEXT: vpsravd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5931 ; ZNVER1-NEXT: vpsravd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5932 ; ZNVER1-NEXT: retq # sched: [1:0.50]
5933 %1 = call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %a0, <8 x i32> %a1)
5934 %2 = load <8 x i32>, <8 x i32> *%a2, align 32
5935 %3 = call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %1, <8 x i32> %2)
5938 declare <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32>, <8 x i32>) nounwind readnone
5940 define <16 x i16> @test_psraw(<16 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
5941 ; GENERIC-LABEL: test_psraw:
5943 ; GENERIC-NEXT: vpsraw %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
5944 ; GENERIC-NEXT: vpsraw (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
5945 ; GENERIC-NEXT: vpsraw $2, %ymm0, %ymm0 # sched: [1:1.00]
5946 ; GENERIC-NEXT: retq # sched: [1:1.00]
5948 ; HASWELL-LABEL: test_psraw:
5950 ; HASWELL-NEXT: vpsraw %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
5951 ; HASWELL-NEXT: vpsraw (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
5952 ; HASWELL-NEXT: vpsraw $2, %ymm0, %ymm0 # sched: [1:1.00]
5953 ; HASWELL-NEXT: retq # sched: [7:1.00]
5955 ; BROADWELL-LABEL: test_psraw:
5956 ; BROADWELL: # %bb.0:
5957 ; BROADWELL-NEXT: vpsraw %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
5958 ; BROADWELL-NEXT: vpsraw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
5959 ; BROADWELL-NEXT: vpsraw $2, %ymm0, %ymm0 # sched: [1:1.00]
5960 ; BROADWELL-NEXT: retq # sched: [7:1.00]
5962 ; SKYLAKE-LABEL: test_psraw:
5964 ; SKYLAKE-NEXT: vpsraw %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
5965 ; SKYLAKE-NEXT: vpsraw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5966 ; SKYLAKE-NEXT: vpsraw $2, %ymm0, %ymm0 # sched: [1:0.50]
5967 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
5969 ; SKX-LABEL: test_psraw:
5971 ; SKX-NEXT: vpsraw %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
5972 ; SKX-NEXT: vpsraw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5973 ; SKX-NEXT: vpsraw $2, %ymm0, %ymm0 # sched: [1:0.50]
5974 ; SKX-NEXT: retq # sched: [7:1.00]
5976 ; ZNVER1-LABEL: test_psraw:
5978 ; ZNVER1-NEXT: vpsraw %xmm1, %ymm0, %ymm0 # sched: [2:1.00]
5979 ; ZNVER1-NEXT: vpsraw (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
5980 ; ZNVER1-NEXT: vpsraw $2, %ymm0, %ymm0 # sched: [1:0.25]
5981 ; ZNVER1-NEXT: retq # sched: [1:0.50]
5982 %1 = call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %a0, <8 x i16> %a1)
5983 %2 = load <8 x i16>, <8 x i16> *%a2, align 16
5984 %3 = call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %1, <8 x i16> %2)
5985 %4 = ashr <16 x i16> %3, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
5988 declare <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16>, <8 x i16>) nounwind readnone
5990 define <8 x i32> @test_psrld(<8 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
5991 ; GENERIC-LABEL: test_psrld:
5993 ; GENERIC-NEXT: vpsrld %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
5994 ; GENERIC-NEXT: vpsrld (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
5995 ; GENERIC-NEXT: vpsrld $2, %ymm0, %ymm0 # sched: [1:1.00]
5996 ; GENERIC-NEXT: retq # sched: [1:1.00]
5998 ; HASWELL-LABEL: test_psrld:
6000 ; HASWELL-NEXT: vpsrld %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
6001 ; HASWELL-NEXT: vpsrld (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
6002 ; HASWELL-NEXT: vpsrld $2, %ymm0, %ymm0 # sched: [1:1.00]
6003 ; HASWELL-NEXT: retq # sched: [7:1.00]
6005 ; BROADWELL-LABEL: test_psrld:
6006 ; BROADWELL: # %bb.0:
6007 ; BROADWELL-NEXT: vpsrld %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
6008 ; BROADWELL-NEXT: vpsrld (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
6009 ; BROADWELL-NEXT: vpsrld $2, %ymm0, %ymm0 # sched: [1:1.00]
6010 ; BROADWELL-NEXT: retq # sched: [7:1.00]
6012 ; SKYLAKE-LABEL: test_psrld:
6014 ; SKYLAKE-NEXT: vpsrld %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
6015 ; SKYLAKE-NEXT: vpsrld (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6016 ; SKYLAKE-NEXT: vpsrld $2, %ymm0, %ymm0 # sched: [1:0.50]
6017 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
6019 ; SKX-LABEL: test_psrld:
6021 ; SKX-NEXT: vpsrld %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
6022 ; SKX-NEXT: vpsrld (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6023 ; SKX-NEXT: vpsrld $2, %ymm0, %ymm0 # sched: [1:0.50]
6024 ; SKX-NEXT: retq # sched: [7:1.00]
6026 ; ZNVER1-LABEL: test_psrld:
6028 ; ZNVER1-NEXT: vpsrld %xmm1, %ymm0, %ymm0 # sched: [2:1.00]
6029 ; ZNVER1-NEXT: vpsrld (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
6030 ; ZNVER1-NEXT: vpsrld $2, %ymm0, %ymm0 # sched: [1:0.25]
6031 ; ZNVER1-NEXT: retq # sched: [1:0.50]
6032 %1 = call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %a0, <4 x i32> %a1)
6033 %2 = load <4 x i32>, <4 x i32> *%a2, align 16
6034 %3 = call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %1, <4 x i32> %2)
6035 %4 = lshr <8 x i32> %3, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
6038 declare <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32>, <4 x i32>) nounwind readnone
6040 define <32 x i8> @test_psrldq(<32 x i8> %a0) {
6041 ; GENERIC-LABEL: test_psrldq:
6043 ; GENERIC-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,ymm0[19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero sched: [1:1.00]
6044 ; GENERIC-NEXT: retq # sched: [1:1.00]
6046 ; HASWELL-LABEL: test_psrldq:
6048 ; HASWELL-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,ymm0[19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero sched: [1:1.00]
6049 ; HASWELL-NEXT: retq # sched: [7:1.00]
6051 ; BROADWELL-LABEL: test_psrldq:
6052 ; BROADWELL: # %bb.0:
6053 ; BROADWELL-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,ymm0[19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero sched: [1:1.00]
6054 ; BROADWELL-NEXT: retq # sched: [7:1.00]
6056 ; SKYLAKE-LABEL: test_psrldq:
6058 ; SKYLAKE-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,ymm0[19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero sched: [1:1.00]
6059 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
6061 ; SKX-LABEL: test_psrldq:
6063 ; SKX-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,ymm0[19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero sched: [1:1.00]
6064 ; SKX-NEXT: retq # sched: [7:1.00]
6066 ; ZNVER1-LABEL: test_psrldq:
6068 ; ZNVER1-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,ymm0[19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero sched: [2:1.00]
6069 ; ZNVER1-NEXT: retq # sched: [1:0.50]
6070 %1 = shufflevector <32 x i8> %a0, <32 x i8> zeroinitializer, <32 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 32, i32 33, i32 34, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 48, i32 49, i32 50>
6074 define <4 x i64> @test_psrlq(<4 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
6075 ; GENERIC-LABEL: test_psrlq:
6077 ; GENERIC-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
6078 ; GENERIC-NEXT: vpsrlq (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
6079 ; GENERIC-NEXT: vpsrlq $2, %ymm0, %ymm0 # sched: [1:1.00]
6080 ; GENERIC-NEXT: retq # sched: [1:1.00]
6082 ; HASWELL-LABEL: test_psrlq:
6084 ; HASWELL-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
6085 ; HASWELL-NEXT: vpsrlq (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
6086 ; HASWELL-NEXT: vpsrlq $2, %ymm0, %ymm0 # sched: [1:1.00]
6087 ; HASWELL-NEXT: retq # sched: [7:1.00]
6089 ; BROADWELL-LABEL: test_psrlq:
6090 ; BROADWELL: # %bb.0:
6091 ; BROADWELL-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
6092 ; BROADWELL-NEXT: vpsrlq (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
6093 ; BROADWELL-NEXT: vpsrlq $2, %ymm0, %ymm0 # sched: [1:1.00]
6094 ; BROADWELL-NEXT: retq # sched: [7:1.00]
6096 ; SKYLAKE-LABEL: test_psrlq:
6098 ; SKYLAKE-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
6099 ; SKYLAKE-NEXT: vpsrlq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6100 ; SKYLAKE-NEXT: vpsrlq $2, %ymm0, %ymm0 # sched: [1:0.50]
6101 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
6103 ; SKX-LABEL: test_psrlq:
6105 ; SKX-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
6106 ; SKX-NEXT: vpsrlq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6107 ; SKX-NEXT: vpsrlq $2, %ymm0, %ymm0 # sched: [1:0.50]
6108 ; SKX-NEXT: retq # sched: [7:1.00]
6110 ; ZNVER1-LABEL: test_psrlq:
6112 ; ZNVER1-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 # sched: [2:1.00]
6113 ; ZNVER1-NEXT: vpsrlq (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
6114 ; ZNVER1-NEXT: vpsrlq $2, %ymm0, %ymm0 # sched: [1:0.25]
6115 ; ZNVER1-NEXT: retq # sched: [1:0.50]
6116 %1 = call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %a0, <2 x i64> %a1)
6117 %2 = load <2 x i64>, <2 x i64> *%a2, align 16
6118 %3 = call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %1, <2 x i64> %2)
6119 %4 = lshr <4 x i64> %3, <i64 2, i64 2, i64 2, i64 2>
6122 declare <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64>, <2 x i64>) nounwind readnone
6124 define <4 x i32> @test_psrlvd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
6125 ; GENERIC-LABEL: test_psrlvd:
6127 ; GENERIC-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
6128 ; GENERIC-NEXT: vpsrlvd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
6129 ; GENERIC-NEXT: retq # sched: [1:1.00]
6131 ; HASWELL-LABEL: test_psrlvd:
6133 ; HASWELL-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
6134 ; HASWELL-NEXT: vpsrlvd (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
6135 ; HASWELL-NEXT: retq # sched: [7:1.00]
6137 ; BROADWELL-LABEL: test_psrlvd:
6138 ; BROADWELL: # %bb.0:
6139 ; BROADWELL-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
6140 ; BROADWELL-NEXT: vpsrlvd (%rdi), %xmm0, %xmm0 # sched: [8:2.00]
6141 ; BROADWELL-NEXT: retq # sched: [7:1.00]
6143 ; SKYLAKE-LABEL: test_psrlvd:
6145 ; SKYLAKE-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
6146 ; SKYLAKE-NEXT: vpsrlvd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
6147 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
6149 ; SKX-LABEL: test_psrlvd:
6151 ; SKX-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
6152 ; SKX-NEXT: vpsrlvd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
6153 ; SKX-NEXT: retq # sched: [7:1.00]
6155 ; ZNVER1-LABEL: test_psrlvd:
6157 ; ZNVER1-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
6158 ; ZNVER1-NEXT: vpsrlvd (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
6159 ; ZNVER1-NEXT: retq # sched: [1:0.50]
6160 %1 = call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %a0, <4 x i32> %a1)
6161 %2 = load <4 x i32>, <4 x i32> *%a2, align 16
6162 %3 = call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %1, <4 x i32> %2)
6165 declare <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32>, <4 x i32>) nounwind readnone
6167 define <8 x i32> @test_psrlvd_ymm(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
6168 ; GENERIC-LABEL: test_psrlvd_ymm:
6170 ; GENERIC-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
6171 ; GENERIC-NEXT: vpsrlvd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
6172 ; GENERIC-NEXT: retq # sched: [1:1.00]
6174 ; HASWELL-LABEL: test_psrlvd_ymm:
6176 ; HASWELL-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
6177 ; HASWELL-NEXT: vpsrlvd (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
6178 ; HASWELL-NEXT: retq # sched: [7:1.00]
6180 ; BROADWELL-LABEL: test_psrlvd_ymm:
6181 ; BROADWELL: # %bb.0:
6182 ; BROADWELL-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
6183 ; BROADWELL-NEXT: vpsrlvd (%rdi), %ymm0, %ymm0 # sched: [9:2.00]
6184 ; BROADWELL-NEXT: retq # sched: [7:1.00]
6186 ; SKYLAKE-LABEL: test_psrlvd_ymm:
6188 ; SKYLAKE-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6189 ; SKYLAKE-NEXT: vpsrlvd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6190 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
6192 ; SKX-LABEL: test_psrlvd_ymm:
6194 ; SKX-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6195 ; SKX-NEXT: vpsrlvd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6196 ; SKX-NEXT: retq # sched: [7:1.00]
6198 ; ZNVER1-LABEL: test_psrlvd_ymm:
6200 ; ZNVER1-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6201 ; ZNVER1-NEXT: vpsrlvd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6202 ; ZNVER1-NEXT: retq # sched: [1:0.50]
6203 %1 = call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %a0, <8 x i32> %a1)
6204 %2 = load <8 x i32>, <8 x i32> *%a2, align 32
6205 %3 = call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %1, <8 x i32> %2)
6208 declare <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32>, <8 x i32>) nounwind readnone
6210 define <2 x i64> @test_psrlvq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
6211 ; GENERIC-LABEL: test_psrlvq:
6213 ; GENERIC-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
6214 ; GENERIC-NEXT: vpsrlvq (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
6215 ; GENERIC-NEXT: retq # sched: [1:1.00]
6217 ; HASWELL-LABEL: test_psrlvq:
6219 ; HASWELL-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
6220 ; HASWELL-NEXT: vpsrlvq (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
6221 ; HASWELL-NEXT: retq # sched: [7:1.00]
6223 ; BROADWELL-LABEL: test_psrlvq:
6224 ; BROADWELL: # %bb.0:
6225 ; BROADWELL-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
6226 ; BROADWELL-NEXT: vpsrlvq (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
6227 ; BROADWELL-NEXT: retq # sched: [7:1.00]
6229 ; SKYLAKE-LABEL: test_psrlvq:
6231 ; SKYLAKE-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
6232 ; SKYLAKE-NEXT: vpsrlvq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
6233 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
6235 ; SKX-LABEL: test_psrlvq:
6237 ; SKX-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
6238 ; SKX-NEXT: vpsrlvq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
6239 ; SKX-NEXT: retq # sched: [7:1.00]
6241 ; ZNVER1-LABEL: test_psrlvq:
6243 ; ZNVER1-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
6244 ; ZNVER1-NEXT: vpsrlvq (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
6245 ; ZNVER1-NEXT: retq # sched: [1:0.50]
6246 %1 = call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %a0, <2 x i64> %a1)
6247 %2 = load <2 x i64>, <2 x i64> *%a2, align 16
6248 %3 = call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %1, <2 x i64> %2)
6251 declare <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64>, <2 x i64>) nounwind readnone
6253 define <4 x i64> @test_psrlvq_ymm(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
6254 ; GENERIC-LABEL: test_psrlvq_ymm:
6256 ; GENERIC-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
6257 ; GENERIC-NEXT: vpsrlvq (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
6258 ; GENERIC-NEXT: retq # sched: [1:1.00]
6260 ; HASWELL-LABEL: test_psrlvq_ymm:
6262 ; HASWELL-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
6263 ; HASWELL-NEXT: vpsrlvq (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
6264 ; HASWELL-NEXT: retq # sched: [7:1.00]
6266 ; BROADWELL-LABEL: test_psrlvq_ymm:
6267 ; BROADWELL: # %bb.0:
6268 ; BROADWELL-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
6269 ; BROADWELL-NEXT: vpsrlvq (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
6270 ; BROADWELL-NEXT: retq # sched: [7:1.00]
6272 ; SKYLAKE-LABEL: test_psrlvq_ymm:
6274 ; SKYLAKE-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6275 ; SKYLAKE-NEXT: vpsrlvq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6276 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
6278 ; SKX-LABEL: test_psrlvq_ymm:
6280 ; SKX-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6281 ; SKX-NEXT: vpsrlvq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6282 ; SKX-NEXT: retq # sched: [7:1.00]
6284 ; ZNVER1-LABEL: test_psrlvq_ymm:
6286 ; ZNVER1-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6287 ; ZNVER1-NEXT: vpsrlvq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6288 ; ZNVER1-NEXT: retq # sched: [1:0.50]
6289 %1 = call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %a0, <4 x i64> %a1)
6290 %2 = load <4 x i64>, <4 x i64> *%a2, align 32
6291 %3 = call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %1, <4 x i64> %2)
6294 declare <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64>, <4 x i64>) nounwind readnone
6296 define <16 x i16> @test_psrlw(<16 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
6297 ; GENERIC-LABEL: test_psrlw:
6299 ; GENERIC-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
6300 ; GENERIC-NEXT: vpsrlw (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
6301 ; GENERIC-NEXT: vpsrlw $2, %ymm0, %ymm0 # sched: [1:1.00]
6302 ; GENERIC-NEXT: retq # sched: [1:1.00]
6304 ; HASWELL-LABEL: test_psrlw:
6306 ; HASWELL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
6307 ; HASWELL-NEXT: vpsrlw (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
6308 ; HASWELL-NEXT: vpsrlw $2, %ymm0, %ymm0 # sched: [1:1.00]
6309 ; HASWELL-NEXT: retq # sched: [7:1.00]
6311 ; BROADWELL-LABEL: test_psrlw:
6312 ; BROADWELL: # %bb.0:
6313 ; BROADWELL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
6314 ; BROADWELL-NEXT: vpsrlw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
6315 ; BROADWELL-NEXT: vpsrlw $2, %ymm0, %ymm0 # sched: [1:1.00]
6316 ; BROADWELL-NEXT: retq # sched: [7:1.00]
6318 ; SKYLAKE-LABEL: test_psrlw:
6320 ; SKYLAKE-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
6321 ; SKYLAKE-NEXT: vpsrlw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6322 ; SKYLAKE-NEXT: vpsrlw $2, %ymm0, %ymm0 # sched: [1:0.50]
6323 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
6325 ; SKX-LABEL: test_psrlw:
6327 ; SKX-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
6328 ; SKX-NEXT: vpsrlw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6329 ; SKX-NEXT: vpsrlw $2, %ymm0, %ymm0 # sched: [1:0.50]
6330 ; SKX-NEXT: retq # sched: [7:1.00]
6332 ; ZNVER1-LABEL: test_psrlw:
6334 ; ZNVER1-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 # sched: [2:1.00]
6335 ; ZNVER1-NEXT: vpsrlw (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
6336 ; ZNVER1-NEXT: vpsrlw $2, %ymm0, %ymm0 # sched: [1:0.25]
6337 ; ZNVER1-NEXT: retq # sched: [1:0.50]
6338 %1 = call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %a0, <8 x i16> %a1)
6339 %2 = load <8 x i16>, <8 x i16> *%a2, align 16
6340 %3 = call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %1, <8 x i16> %2)
6341 %4 = lshr <16 x i16> %3, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
6344 declare <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16>, <8 x i16>) nounwind readnone
6346 define <32 x i8> @test_psubb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
6347 ; GENERIC-LABEL: test_psubb:
6349 ; GENERIC-NEXT: vpsubb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6350 ; GENERIC-NEXT: vpsubb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6351 ; GENERIC-NEXT: retq # sched: [1:1.00]
6353 ; HASWELL-LABEL: test_psubb:
6355 ; HASWELL-NEXT: vpsubb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6356 ; HASWELL-NEXT: vpsubb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6357 ; HASWELL-NEXT: retq # sched: [7:1.00]
6359 ; BROADWELL-LABEL: test_psubb:
6360 ; BROADWELL: # %bb.0:
6361 ; BROADWELL-NEXT: vpsubb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6362 ; BROADWELL-NEXT: vpsubb (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
6363 ; BROADWELL-NEXT: retq # sched: [7:1.00]
6365 ; SKYLAKE-LABEL: test_psubb:
6367 ; SKYLAKE-NEXT: vpsubb %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
6368 ; SKYLAKE-NEXT: vpsubb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6369 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
6371 ; SKX-LABEL: test_psubb:
6373 ; SKX-NEXT: vpsubb %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
6374 ; SKX-NEXT: vpsubb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6375 ; SKX-NEXT: retq # sched: [7:1.00]
6377 ; ZNVER1-LABEL: test_psubb:
6379 ; ZNVER1-NEXT: vpsubb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
6380 ; ZNVER1-NEXT: vpsubb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6381 ; ZNVER1-NEXT: retq # sched: [1:0.50]
6382 %1 = sub <32 x i8> %a0, %a1
6383 %2 = load <32 x i8>, <32 x i8> *%a2, align 32
6384 %3 = sub <32 x i8> %1, %2
6388 define <8 x i32> @test_psubd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
6389 ; GENERIC-LABEL: test_psubd:
6391 ; GENERIC-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6392 ; GENERIC-NEXT: vpsubd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6393 ; GENERIC-NEXT: retq # sched: [1:1.00]
6395 ; HASWELL-LABEL: test_psubd:
6397 ; HASWELL-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6398 ; HASWELL-NEXT: vpsubd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6399 ; HASWELL-NEXT: retq # sched: [7:1.00]
6401 ; BROADWELL-LABEL: test_psubd:
6402 ; BROADWELL: # %bb.0:
6403 ; BROADWELL-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6404 ; BROADWELL-NEXT: vpsubd (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
6405 ; BROADWELL-NEXT: retq # sched: [7:1.00]
6407 ; SKYLAKE-LABEL: test_psubd:
6409 ; SKYLAKE-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
6410 ; SKYLAKE-NEXT: vpsubd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6411 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
6413 ; SKX-LABEL: test_psubd:
6415 ; SKX-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
6416 ; SKX-NEXT: vpsubd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6417 ; SKX-NEXT: retq # sched: [7:1.00]
6419 ; ZNVER1-LABEL: test_psubd:
6421 ; ZNVER1-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
6422 ; ZNVER1-NEXT: vpsubd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6423 ; ZNVER1-NEXT: retq # sched: [1:0.50]
6424 %1 = sub <8 x i32> %a0, %a1
6425 %2 = load <8 x i32>, <8 x i32> *%a2, align 32
6426 %3 = sub <8 x i32> %1, %2
6430 define <4 x i64> @test_psubq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
6431 ; GENERIC-LABEL: test_psubq:
6433 ; GENERIC-NEXT: vpsubq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6434 ; GENERIC-NEXT: vpsubq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6435 ; GENERIC-NEXT: retq # sched: [1:1.00]
6437 ; HASWELL-LABEL: test_psubq:
6439 ; HASWELL-NEXT: vpsubq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6440 ; HASWELL-NEXT: vpsubq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6441 ; HASWELL-NEXT: retq # sched: [7:1.00]
6443 ; BROADWELL-LABEL: test_psubq:
6444 ; BROADWELL: # %bb.0:
6445 ; BROADWELL-NEXT: vpsubq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6446 ; BROADWELL-NEXT: vpsubq (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
6447 ; BROADWELL-NEXT: retq # sched: [7:1.00]
6449 ; SKYLAKE-LABEL: test_psubq:
6451 ; SKYLAKE-NEXT: vpsubq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
6452 ; SKYLAKE-NEXT: vpsubq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6453 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
6455 ; SKX-LABEL: test_psubq:
6457 ; SKX-NEXT: vpsubq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
6458 ; SKX-NEXT: vpsubq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6459 ; SKX-NEXT: retq # sched: [7:1.00]
6461 ; ZNVER1-LABEL: test_psubq:
6463 ; ZNVER1-NEXT: vpsubq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
6464 ; ZNVER1-NEXT: vpsubq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6465 ; ZNVER1-NEXT: retq # sched: [1:0.50]
6466 %1 = sub <4 x i64> %a0, %a1
6467 %2 = load <4 x i64>, <4 x i64> *%a2, align 32
6468 %3 = sub <4 x i64> %1, %2
6472 define <32 x i8> @test_psubsb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
6473 ; GENERIC-LABEL: test_psubsb:
6475 ; GENERIC-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6476 ; GENERIC-NEXT: vpsubsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6477 ; GENERIC-NEXT: retq # sched: [1:1.00]
6479 ; HASWELL-LABEL: test_psubsb:
6481 ; HASWELL-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6482 ; HASWELL-NEXT: vpsubsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6483 ; HASWELL-NEXT: retq # sched: [7:1.00]
6485 ; BROADWELL-LABEL: test_psubsb:
6486 ; BROADWELL: # %bb.0:
6487 ; BROADWELL-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6488 ; BROADWELL-NEXT: vpsubsb (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
6489 ; BROADWELL-NEXT: retq # sched: [7:1.00]
6491 ; SKYLAKE-LABEL: test_psubsb:
6493 ; SKYLAKE-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6494 ; SKYLAKE-NEXT: vpsubsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6495 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
6497 ; SKX-LABEL: test_psubsb:
6499 ; SKX-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6500 ; SKX-NEXT: vpsubsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6501 ; SKX-NEXT: retq # sched: [7:1.00]
6503 ; ZNVER1-LABEL: test_psubsb:
6505 ; ZNVER1-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
6506 ; ZNVER1-NEXT: vpsubsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6507 ; ZNVER1-NEXT: retq # sched: [1:0.50]
6508 %1 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> %a0, <32 x i8> %a1)
6509 %2 = load <32 x i8>, <32 x i8> *%a2, align 32
6510 %3 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> %1, <32 x i8> %2)
6513 declare <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8>, <32 x i8>) nounwind readnone
6515 define <16 x i16> @test_psubsw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
6516 ; GENERIC-LABEL: test_psubsw:
6518 ; GENERIC-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6519 ; GENERIC-NEXT: vpsubsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6520 ; GENERIC-NEXT: retq # sched: [1:1.00]
6522 ; HASWELL-LABEL: test_psubsw:
6524 ; HASWELL-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6525 ; HASWELL-NEXT: vpsubsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6526 ; HASWELL-NEXT: retq # sched: [7:1.00]
6528 ; BROADWELL-LABEL: test_psubsw:
6529 ; BROADWELL: # %bb.0:
6530 ; BROADWELL-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6531 ; BROADWELL-NEXT: vpsubsw (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
6532 ; BROADWELL-NEXT: retq # sched: [7:1.00]
6534 ; SKYLAKE-LABEL: test_psubsw:
6536 ; SKYLAKE-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6537 ; SKYLAKE-NEXT: vpsubsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6538 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
6540 ; SKX-LABEL: test_psubsw:
6542 ; SKX-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6543 ; SKX-NEXT: vpsubsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6544 ; SKX-NEXT: retq # sched: [7:1.00]
6546 ; ZNVER1-LABEL: test_psubsw:
6548 ; ZNVER1-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
6549 ; ZNVER1-NEXT: vpsubsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6550 ; ZNVER1-NEXT: retq # sched: [1:0.50]
6551 %1 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> %a0, <16 x i16> %a1)
6552 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
6553 %3 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> %1, <16 x i16> %2)
6556 declare <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16>, <16 x i16>) nounwind readnone
6558 define <32 x i8> @test_psubusb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
6559 ; GENERIC-LABEL: test_psubusb:
6561 ; GENERIC-NEXT: vpsubusb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6562 ; GENERIC-NEXT: vpsubusb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6563 ; GENERIC-NEXT: retq # sched: [1:1.00]
6565 ; HASWELL-LABEL: test_psubusb:
6567 ; HASWELL-NEXT: vpsubusb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6568 ; HASWELL-NEXT: vpsubusb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6569 ; HASWELL-NEXT: retq # sched: [7:1.00]
6571 ; BROADWELL-LABEL: test_psubusb:
6572 ; BROADWELL: # %bb.0:
6573 ; BROADWELL-NEXT: vpsubusb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6574 ; BROADWELL-NEXT: vpsubusb (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
6575 ; BROADWELL-NEXT: retq # sched: [7:1.00]
6577 ; SKYLAKE-LABEL: test_psubusb:
6579 ; SKYLAKE-NEXT: vpsubusb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6580 ; SKYLAKE-NEXT: vpsubusb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6581 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
6583 ; SKX-LABEL: test_psubusb:
6585 ; SKX-NEXT: vpsubusb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6586 ; SKX-NEXT: vpsubusb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6587 ; SKX-NEXT: retq # sched: [7:1.00]
6589 ; ZNVER1-LABEL: test_psubusb:
6591 ; ZNVER1-NEXT: vpsubusb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
6592 ; ZNVER1-NEXT: vpsubusb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6593 ; ZNVER1-NEXT: retq # sched: [1:0.50]
6594 %1 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> %a0, <32 x i8> %a1)
6595 %2 = load <32 x i8>, <32 x i8> *%a2, align 32
6596 %3 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> %1, <32 x i8> %2)
6599 declare <32 x i8> @llvm.usub.sat.v32i8(<32 x i8>, <32 x i8>) nounwind readnone
6601 define <16 x i16> @test_psubusw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
6602 ; GENERIC-LABEL: test_psubusw:
6604 ; GENERIC-NEXT: vpsubusw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6605 ; GENERIC-NEXT: vpsubusw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6606 ; GENERIC-NEXT: retq # sched: [1:1.00]
6608 ; HASWELL-LABEL: test_psubusw:
6610 ; HASWELL-NEXT: vpsubusw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6611 ; HASWELL-NEXT: vpsubusw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6612 ; HASWELL-NEXT: retq # sched: [7:1.00]
6614 ; BROADWELL-LABEL: test_psubusw:
6615 ; BROADWELL: # %bb.0:
6616 ; BROADWELL-NEXT: vpsubusw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6617 ; BROADWELL-NEXT: vpsubusw (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
6618 ; BROADWELL-NEXT: retq # sched: [7:1.00]
6620 ; SKYLAKE-LABEL: test_psubusw:
6622 ; SKYLAKE-NEXT: vpsubusw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6623 ; SKYLAKE-NEXT: vpsubusw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6624 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
6626 ; SKX-LABEL: test_psubusw:
6628 ; SKX-NEXT: vpsubusw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6629 ; SKX-NEXT: vpsubusw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6630 ; SKX-NEXT: retq # sched: [7:1.00]
6632 ; ZNVER1-LABEL: test_psubusw:
6634 ; ZNVER1-NEXT: vpsubusw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
6635 ; ZNVER1-NEXT: vpsubusw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6636 ; ZNVER1-NEXT: retq # sched: [1:0.50]
6637 %1 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> %a0, <16 x i16> %a1)
6638 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
6639 %3 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> %1, <16 x i16> %2)
6642 declare <16 x i16> @llvm.usub.sat.v16i16(<16 x i16>, <16 x i16>) nounwind readnone
6644 define <16 x i16> @test_psubw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
6645 ; GENERIC-LABEL: test_psubw:
6647 ; GENERIC-NEXT: vpsubw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6648 ; GENERIC-NEXT: vpsubw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6649 ; GENERIC-NEXT: retq # sched: [1:1.00]
6651 ; HASWELL-LABEL: test_psubw:
6653 ; HASWELL-NEXT: vpsubw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6654 ; HASWELL-NEXT: vpsubw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6655 ; HASWELL-NEXT: retq # sched: [7:1.00]
6657 ; BROADWELL-LABEL: test_psubw:
6658 ; BROADWELL: # %bb.0:
6659 ; BROADWELL-NEXT: vpsubw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6660 ; BROADWELL-NEXT: vpsubw (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
6661 ; BROADWELL-NEXT: retq # sched: [7:1.00]
6663 ; SKYLAKE-LABEL: test_psubw:
6665 ; SKYLAKE-NEXT: vpsubw %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
6666 ; SKYLAKE-NEXT: vpsubw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6667 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
6669 ; SKX-LABEL: test_psubw:
6671 ; SKX-NEXT: vpsubw %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
6672 ; SKX-NEXT: vpsubw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6673 ; SKX-NEXT: retq # sched: [7:1.00]
6675 ; ZNVER1-LABEL: test_psubw:
6677 ; ZNVER1-NEXT: vpsubw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
6678 ; ZNVER1-NEXT: vpsubw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6679 ; ZNVER1-NEXT: retq # sched: [1:0.50]
6680 %1 = sub <16 x i16> %a0, %a1
6681 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
6682 %3 = sub <16 x i16> %1, %2
6686 define <32 x i8> @test_punpckhbw(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
6687 ; GENERIC-LABEL: test_punpckhbw:
6689 ; GENERIC-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] sched: [1:1.00]
6690 ; GENERIC-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15],ymm0[24],mem[24],ymm0[25],mem[25],ymm0[26],mem[26],ymm0[27],mem[27],ymm0[28],mem[28],ymm0[29],mem[29],ymm0[30],mem[30],ymm0[31],mem[31] sched: [8:1.00]
6691 ; GENERIC-NEXT: retq # sched: [1:1.00]
6693 ; HASWELL-LABEL: test_punpckhbw:
6695 ; HASWELL-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] sched: [1:1.00]
6696 ; HASWELL-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15],ymm0[24],mem[24],ymm0[25],mem[25],ymm0[26],mem[26],ymm0[27],mem[27],ymm0[28],mem[28],ymm0[29],mem[29],ymm0[30],mem[30],ymm0[31],mem[31] sched: [8:1.00]
6697 ; HASWELL-NEXT: retq # sched: [7:1.00]
6699 ; BROADWELL-LABEL: test_punpckhbw:
6700 ; BROADWELL: # %bb.0:
6701 ; BROADWELL-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] sched: [1:1.00]
6702 ; BROADWELL-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15],ymm0[24],mem[24],ymm0[25],mem[25],ymm0[26],mem[26],ymm0[27],mem[27],ymm0[28],mem[28],ymm0[29],mem[29],ymm0[30],mem[30],ymm0[31],mem[31] sched: [7:1.00]
6703 ; BROADWELL-NEXT: retq # sched: [7:1.00]
6705 ; SKYLAKE-LABEL: test_punpckhbw:
6707 ; SKYLAKE-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] sched: [1:1.00]
6708 ; SKYLAKE-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15],ymm0[24],mem[24],ymm0[25],mem[25],ymm0[26],mem[26],ymm0[27],mem[27],ymm0[28],mem[28],ymm0[29],mem[29],ymm0[30],mem[30],ymm0[31],mem[31] sched: [8:1.00]
6709 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
6711 ; SKX-LABEL: test_punpckhbw:
6713 ; SKX-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] sched: [1:1.00]
6714 ; SKX-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15],ymm0[24],mem[24],ymm0[25],mem[25],ymm0[26],mem[26],ymm0[27],mem[27],ymm0[28],mem[28],ymm0[29],mem[29],ymm0[30],mem[30],ymm0[31],mem[31] sched: [8:1.00]
6715 ; SKX-NEXT: retq # sched: [7:1.00]
6717 ; ZNVER1-LABEL: test_punpckhbw:
6719 ; ZNVER1-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] sched: [1:0.25]
6720 ; ZNVER1-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15],ymm0[24],mem[24],ymm0[25],mem[25],ymm0[26],mem[26],ymm0[27],mem[27],ymm0[28],mem[28],ymm0[29],mem[29],ymm0[30],mem[30],ymm0[31],mem[31] sched: [8:0.50]
6721 ; ZNVER1-NEXT: retq # sched: [1:0.50]
6722 %1 = shufflevector <32 x i8> %a0, <32 x i8> %a1, <32 x i32> <i32 8, i32 40, i32 9, i32 41, i32 10, i32 42, i32 11, i32 43, i32 12, i32 44, i32 13, i32 45, i32 14, i32 46, i32 15, i32 47, i32 24, i32 56, i32 25, i32 57, i32 26, i32 58, i32 27, i32 59, i32 28, i32 60, i32 29, i32 61, i32 30, i32 62, i32 31, i32 63>
6723 %2 = load <32 x i8>, <32 x i8> *%a2, align 32
6724 %3 = shufflevector <32 x i8> %1, <32 x i8> %2, <32 x i32> <i32 8, i32 40, i32 9, i32 41, i32 10, i32 42, i32 11, i32 43, i32 12, i32 44, i32 13, i32 45, i32 14, i32 46, i32 15, i32 47, i32 24, i32 56, i32 25, i32 57, i32 26, i32 58, i32 27, i32 59, i32 28, i32 60, i32 29, i32 61, i32 30, i32 62, i32 31, i32 63>
6728 define <8 x i32> @test_punpckhdq(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
6729 ; GENERIC-LABEL: test_punpckhdq:
6731 ; GENERIC-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00]
6732 ; GENERIC-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00]
6733 ; GENERIC-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.50]
6734 ; GENERIC-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6735 ; GENERIC-NEXT: retq # sched: [1:1.00]
6737 ; HASWELL-LABEL: test_punpckhdq:
6739 ; HASWELL-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00]
6740 ; HASWELL-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00]
6741 ; HASWELL-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.50]
6742 ; HASWELL-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6743 ; HASWELL-NEXT: retq # sched: [7:1.00]
6745 ; BROADWELL-LABEL: test_punpckhdq:
6746 ; BROADWELL: # %bb.0:
6747 ; BROADWELL-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00]
6748 ; BROADWELL-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [7:1.00]
6749 ; BROADWELL-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.50]
6750 ; BROADWELL-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6751 ; BROADWELL-NEXT: retq # sched: [7:1.00]
6753 ; SKYLAKE-LABEL: test_punpckhdq:
6755 ; SKYLAKE-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00]
6756 ; SKYLAKE-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00]
6757 ; SKYLAKE-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.50]
6758 ; SKYLAKE-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
6759 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
6761 ; SKX-LABEL: test_punpckhdq:
6763 ; SKX-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00]
6764 ; SKX-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00]
6765 ; SKX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.50]
6766 ; SKX-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
6767 ; SKX-NEXT: retq # sched: [7:1.00]
6769 ; ZNVER1-LABEL: test_punpckhdq:
6771 ; ZNVER1-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:0.25]
6772 ; ZNVER1-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:0.50]
6773 ; ZNVER1-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.25]
6774 ; ZNVER1-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
6775 ; ZNVER1-NEXT: retq # sched: [1:0.50]
6776 %1 = shufflevector <8 x i32> %a0, <8 x i32> %a1, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
6777 %2 = load <8 x i32>, <8 x i32> *%a2, align 32
6778 %3 = shufflevector <8 x i32> %1, <8 x i32> %2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
6779 %4 = add <8 x i32> %3, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
6783 define <4 x i64> @test_punpckhqdq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
6784 ; GENERIC-LABEL: test_punpckhqdq:
6786 ; GENERIC-NEXT: vpunpckhqdq {{.*#+}} ymm1 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00]
6787 ; GENERIC-NEXT: vpunpckhqdq {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00]
6788 ; GENERIC-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
6789 ; GENERIC-NEXT: retq # sched: [1:1.00]
6791 ; HASWELL-LABEL: test_punpckhqdq:
6793 ; HASWELL-NEXT: vpunpckhqdq {{.*#+}} ymm1 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00]
6794 ; HASWELL-NEXT: vpunpckhqdq {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00]
6795 ; HASWELL-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
6796 ; HASWELL-NEXT: retq # sched: [7:1.00]
6798 ; BROADWELL-LABEL: test_punpckhqdq:
6799 ; BROADWELL: # %bb.0:
6800 ; BROADWELL-NEXT: vpunpckhqdq {{.*#+}} ymm1 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00]
6801 ; BROADWELL-NEXT: vpunpckhqdq {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3] sched: [7:1.00]
6802 ; BROADWELL-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
6803 ; BROADWELL-NEXT: retq # sched: [7:1.00]
6805 ; SKYLAKE-LABEL: test_punpckhqdq:
6807 ; SKYLAKE-NEXT: vpunpckhqdq {{.*#+}} ymm1 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00]
6808 ; SKYLAKE-NEXT: vpunpckhqdq {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00]
6809 ; SKYLAKE-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
6810 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
6812 ; SKX-LABEL: test_punpckhqdq:
6814 ; SKX-NEXT: vpunpckhqdq {{.*#+}} ymm1 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00]
6815 ; SKX-NEXT: vpunpckhqdq {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00]
6816 ; SKX-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
6817 ; SKX-NEXT: retq # sched: [7:1.00]
6819 ; ZNVER1-LABEL: test_punpckhqdq:
6821 ; ZNVER1-NEXT: vpunpckhqdq {{.*#+}} ymm1 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:0.25]
6822 ; ZNVER1-NEXT: vpunpckhqdq {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:0.50]
6823 ; ZNVER1-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.25]
6824 ; ZNVER1-NEXT: retq # sched: [1:0.50]
6825 %1 = shufflevector <4 x i64> %a0, <4 x i64> %a1, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
6826 %2 = load <4 x i64>, <4 x i64> *%a2, align 32
6827 %3 = shufflevector <4 x i64> %a0, <4 x i64> %2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
6828 %4 = add <4 x i64> %1, %3
6832 define <16 x i16> @test_punpckhwd(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
6833 ; GENERIC-LABEL: test_punpckhwd:
6835 ; GENERIC-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15] sched: [1:1.00]
6836 ; GENERIC-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15] sched: [8:1.00]
6837 ; GENERIC-NEXT: retq # sched: [1:1.00]
6839 ; HASWELL-LABEL: test_punpckhwd:
6841 ; HASWELL-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15] sched: [1:1.00]
6842 ; HASWELL-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15] sched: [8:1.00]
6843 ; HASWELL-NEXT: retq # sched: [7:1.00]
6845 ; BROADWELL-LABEL: test_punpckhwd:
6846 ; BROADWELL: # %bb.0:
6847 ; BROADWELL-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15] sched: [1:1.00]
6848 ; BROADWELL-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15] sched: [7:1.00]
6849 ; BROADWELL-NEXT: retq # sched: [7:1.00]
6851 ; SKYLAKE-LABEL: test_punpckhwd:
6853 ; SKYLAKE-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15] sched: [1:1.00]
6854 ; SKYLAKE-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15] sched: [8:1.00]
6855 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
6857 ; SKX-LABEL: test_punpckhwd:
6859 ; SKX-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15] sched: [1:1.00]
6860 ; SKX-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15] sched: [8:1.00]
6861 ; SKX-NEXT: retq # sched: [7:1.00]
6863 ; ZNVER1-LABEL: test_punpckhwd:
6865 ; ZNVER1-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15] sched: [1:0.25]
6866 ; ZNVER1-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15] sched: [8:0.50]
6867 ; ZNVER1-NEXT: retq # sched: [1:0.50]
6868 %1 = shufflevector <16 x i16> %a0, <16 x i16> %a1, <16 x i32> <i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
6869 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
6870 %3 = shufflevector <16 x i16> %1, <16 x i16> %2, <16 x i32> <i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
6874 define <32 x i8> @test_punpcklbw(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
6875 ; GENERIC-LABEL: test_punpcklbw:
6877 ; GENERIC-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] sched: [1:1.00]
6878 ; GENERIC-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[16],mem[16],ymm0[17],mem[17],ymm0[18],mem[18],ymm0[19],mem[19],ymm0[20],mem[20],ymm0[21],mem[21],ymm0[22],mem[22],ymm0[23],mem[23] sched: [8:1.00]
6879 ; GENERIC-NEXT: retq # sched: [1:1.00]
6881 ; HASWELL-LABEL: test_punpcklbw:
6883 ; HASWELL-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] sched: [1:1.00]
6884 ; HASWELL-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[16],mem[16],ymm0[17],mem[17],ymm0[18],mem[18],ymm0[19],mem[19],ymm0[20],mem[20],ymm0[21],mem[21],ymm0[22],mem[22],ymm0[23],mem[23] sched: [8:1.00]
6885 ; HASWELL-NEXT: retq # sched: [7:1.00]
6887 ; BROADWELL-LABEL: test_punpcklbw:
6888 ; BROADWELL: # %bb.0:
6889 ; BROADWELL-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] sched: [1:1.00]
6890 ; BROADWELL-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[16],mem[16],ymm0[17],mem[17],ymm0[18],mem[18],ymm0[19],mem[19],ymm0[20],mem[20],ymm0[21],mem[21],ymm0[22],mem[22],ymm0[23],mem[23] sched: [7:1.00]
6891 ; BROADWELL-NEXT: retq # sched: [7:1.00]
6893 ; SKYLAKE-LABEL: test_punpcklbw:
6895 ; SKYLAKE-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] sched: [1:1.00]
6896 ; SKYLAKE-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[16],mem[16],ymm0[17],mem[17],ymm0[18],mem[18],ymm0[19],mem[19],ymm0[20],mem[20],ymm0[21],mem[21],ymm0[22],mem[22],ymm0[23],mem[23] sched: [8:1.00]
6897 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
6899 ; SKX-LABEL: test_punpcklbw:
6901 ; SKX-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] sched: [1:1.00]
6902 ; SKX-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[16],mem[16],ymm0[17],mem[17],ymm0[18],mem[18],ymm0[19],mem[19],ymm0[20],mem[20],ymm0[21],mem[21],ymm0[22],mem[22],ymm0[23],mem[23] sched: [8:1.00]
6903 ; SKX-NEXT: retq # sched: [7:1.00]
6905 ; ZNVER1-LABEL: test_punpcklbw:
6907 ; ZNVER1-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] sched: [1:0.25]
6908 ; ZNVER1-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[16],mem[16],ymm0[17],mem[17],ymm0[18],mem[18],ymm0[19],mem[19],ymm0[20],mem[20],ymm0[21],mem[21],ymm0[22],mem[22],ymm0[23],mem[23] sched: [8:0.50]
6909 ; ZNVER1-NEXT: retq # sched: [1:0.50]
6910 %1 = shufflevector <32 x i8> %a0, <32 x i8> %a1, <32 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 16, i32 48, i32 17, i32 49, i32 18, i32 50, i32 19, i32 51, i32 20, i32 52, i32 21, i32 53, i32 22, i32 54, i32 23, i32 55>
6911 %2 = load <32 x i8>, <32 x i8> *%a2, align 32
6912 %3 = shufflevector <32 x i8> %1, <32 x i8> %2, <32 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 16, i32 48, i32 17, i32 49, i32 18, i32 50, i32 19, i32 51, i32 20, i32 52, i32 21, i32 53, i32 22, i32 54, i32 23, i32 55>
6916 define <8 x i32> @test_punpckldq(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
6917 ; GENERIC-LABEL: test_punpckldq:
6919 ; GENERIC-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00]
6920 ; GENERIC-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00]
6921 ; GENERIC-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.50]
6922 ; GENERIC-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6923 ; GENERIC-NEXT: retq # sched: [1:1.00]
6925 ; HASWELL-LABEL: test_punpckldq:
6927 ; HASWELL-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00]
6928 ; HASWELL-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00]
6929 ; HASWELL-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.50]
6930 ; HASWELL-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6931 ; HASWELL-NEXT: retq # sched: [7:1.00]
6933 ; BROADWELL-LABEL: test_punpckldq:
6934 ; BROADWELL: # %bb.0:
6935 ; BROADWELL-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00]
6936 ; BROADWELL-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [7:1.00]
6937 ; BROADWELL-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.50]
6938 ; BROADWELL-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6939 ; BROADWELL-NEXT: retq # sched: [7:1.00]
6941 ; SKYLAKE-LABEL: test_punpckldq:
6943 ; SKYLAKE-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00]
6944 ; SKYLAKE-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00]
6945 ; SKYLAKE-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.50]
6946 ; SKYLAKE-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
6947 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
6949 ; SKX-LABEL: test_punpckldq:
6951 ; SKX-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00]
6952 ; SKX-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00]
6953 ; SKX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.50]
6954 ; SKX-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
6955 ; SKX-NEXT: retq # sched: [7:1.00]
6957 ; ZNVER1-LABEL: test_punpckldq:
6959 ; ZNVER1-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:0.25]
6960 ; ZNVER1-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:0.50]
6961 ; ZNVER1-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.25]
6962 ; ZNVER1-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
6963 ; ZNVER1-NEXT: retq # sched: [1:0.50]
6964 %1 = shufflevector <8 x i32> %a0, <8 x i32> %a1, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
6965 %2 = load <8 x i32>, <8 x i32> *%a2, align 32
6966 %3 = shufflevector <8 x i32> %1, <8 x i32> %2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
6967 %4 = add <8 x i32> %3, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
6971 define <4 x i64> @test_punpcklqdq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
6972 ; GENERIC-LABEL: test_punpcklqdq:
6974 ; GENERIC-NEXT: vpunpcklqdq {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00]
6975 ; GENERIC-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00]
6976 ; GENERIC-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
6977 ; GENERIC-NEXT: retq # sched: [1:1.00]
6979 ; HASWELL-LABEL: test_punpcklqdq:
6981 ; HASWELL-NEXT: vpunpcklqdq {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00]
6982 ; HASWELL-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00]
6983 ; HASWELL-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
6984 ; HASWELL-NEXT: retq # sched: [7:1.00]
6986 ; BROADWELL-LABEL: test_punpcklqdq:
6987 ; BROADWELL: # %bb.0:
6988 ; BROADWELL-NEXT: vpunpcklqdq {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00]
6989 ; BROADWELL-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2] sched: [7:1.00]
6990 ; BROADWELL-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
6991 ; BROADWELL-NEXT: retq # sched: [7:1.00]
6993 ; SKYLAKE-LABEL: test_punpcklqdq:
6995 ; SKYLAKE-NEXT: vpunpcklqdq {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00]
6996 ; SKYLAKE-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00]
6997 ; SKYLAKE-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
6998 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
7000 ; SKX-LABEL: test_punpcklqdq:
7002 ; SKX-NEXT: vpunpcklqdq {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00]
7003 ; SKX-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00]
7004 ; SKX-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
7005 ; SKX-NEXT: retq # sched: [7:1.00]
7007 ; ZNVER1-LABEL: test_punpcklqdq:
7009 ; ZNVER1-NEXT: vpunpcklqdq {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:0.25]
7010 ; ZNVER1-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:0.50]
7011 ; ZNVER1-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.25]
7012 ; ZNVER1-NEXT: retq # sched: [1:0.50]
7013 %1 = shufflevector <4 x i64> %a0, <4 x i64> %a1, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
7014 %2 = load <4 x i64>, <4 x i64> *%a2, align 32
7015 %3 = shufflevector <4 x i64> %a0, <4 x i64> %2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
7016 %4 = add <4 x i64> %1, %3
7020 define <16 x i16> @test_punpcklwd(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
7021 ; GENERIC-LABEL: test_punpcklwd:
7023 ; GENERIC-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] sched: [1:1.00]
7024 ; GENERIC-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11] sched: [8:1.00]
7025 ; GENERIC-NEXT: retq # sched: [1:1.00]
7027 ; HASWELL-LABEL: test_punpcklwd:
7029 ; HASWELL-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] sched: [1:1.00]
7030 ; HASWELL-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11] sched: [8:1.00]
7031 ; HASWELL-NEXT: retq # sched: [7:1.00]
7033 ; BROADWELL-LABEL: test_punpcklwd:
7034 ; BROADWELL: # %bb.0:
7035 ; BROADWELL-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] sched: [1:1.00]
7036 ; BROADWELL-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11] sched: [7:1.00]
7037 ; BROADWELL-NEXT: retq # sched: [7:1.00]
7039 ; SKYLAKE-LABEL: test_punpcklwd:
7041 ; SKYLAKE-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] sched: [1:1.00]
7042 ; SKYLAKE-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11] sched: [8:1.00]
7043 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
7045 ; SKX-LABEL: test_punpcklwd:
7047 ; SKX-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] sched: [1:1.00]
7048 ; SKX-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11] sched: [8:1.00]
7049 ; SKX-NEXT: retq # sched: [7:1.00]
7051 ; ZNVER1-LABEL: test_punpcklwd:
7053 ; ZNVER1-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] sched: [1:0.25]
7054 ; ZNVER1-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11] sched: [8:0.50]
7055 ; ZNVER1-NEXT: retq # sched: [1:0.50]
7056 %1 = shufflevector <16 x i16> %a0, <16 x i16> %a1, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27>
7057 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
7058 %3 = shufflevector <16 x i16> %1, <16 x i16> %2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27>
7062 define <4 x i64> @test_pxor(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
7063 ; GENERIC-LABEL: test_pxor:
7065 ; GENERIC-NEXT: vpxor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
7066 ; GENERIC-NEXT: vpxor (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
7067 ; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
7068 ; GENERIC-NEXT: retq # sched: [1:1.00]
7070 ; HASWELL-LABEL: test_pxor:
7072 ; HASWELL-NEXT: vpxor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
7073 ; HASWELL-NEXT: vpxor (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
7074 ; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
7075 ; HASWELL-NEXT: retq # sched: [7:1.00]
7077 ; BROADWELL-LABEL: test_pxor:
7078 ; BROADWELL: # %bb.0:
7079 ; BROADWELL-NEXT: vpxor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
7080 ; BROADWELL-NEXT: vpxor (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
7081 ; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
7082 ; BROADWELL-NEXT: retq # sched: [7:1.00]
7084 ; SKYLAKE-LABEL: test_pxor:
7086 ; SKYLAKE-NEXT: vpxor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
7087 ; SKYLAKE-NEXT: vpxor (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
7088 ; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
7089 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
7091 ; SKX-LABEL: test_pxor:
7093 ; SKX-NEXT: vpxor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
7094 ; SKX-NEXT: vpxor (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
7095 ; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
7096 ; SKX-NEXT: retq # sched: [7:1.00]
7098 ; ZNVER1-LABEL: test_pxor:
7100 ; ZNVER1-NEXT: vpxor %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
7101 ; ZNVER1-NEXT: vpxor (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
7102 ; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
7103 ; ZNVER1-NEXT: retq # sched: [1:0.50]
7104 %1 = xor <4 x i64> %a0, %a1
7105 %2 = load <4 x i64>, <4 x i64> *%a2, align 32
7106 %3 = xor <4 x i64> %1, %2
7107 %4 = add <4 x i64> %3, %a1