1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
9 define <8 x i32> @test_broadcasti128(<8 x i32> %a0, <4 x i32> *%a1) {
10 ; GENERIC-LABEL: test_broadcasti128:
12 ; GENERIC-NEXT: vbroadcasti128 {{.*#+}} ymm1 = mem[0,1,0,1] sched: [4:0.50]
13 ; GENERIC-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
14 ; GENERIC-NEXT: retq # sched: [1:1.00]
16 ; HASWELL-LABEL: test_broadcasti128:
18 ; HASWELL-NEXT: vbroadcasti128 {{.*#+}} ymm1 = mem[0,1,0,1] sched: [1:0.50]
19 ; HASWELL-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
20 ; HASWELL-NEXT: retq # sched: [2:1.00]
22 ; BROADWELL-LABEL: test_broadcasti128:
24 ; BROADWELL-NEXT: vbroadcasti128 {{.*#+}} ymm1 = mem[0,1,0,1] sched: [6:0.50]
25 ; BROADWELL-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
26 ; BROADWELL-NEXT: retq # sched: [7:1.00]
28 ; SKYLAKE-LABEL: test_broadcasti128:
30 ; SKYLAKE-NEXT: vbroadcasti128 {{.*#+}} ymm1 = mem[0,1,0,1] sched: [7:0.50]
31 ; SKYLAKE-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
32 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
34 ; SKX-LABEL: test_broadcasti128:
36 ; SKX-NEXT: vbroadcasti128 {{.*#+}} ymm1 = mem[0,1,0,1] sched: [7:0.50]
37 ; SKX-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
38 ; SKX-NEXT: retq # sched: [7:1.00]
40 ; ZNVER1-LABEL: test_broadcasti128:
42 ; ZNVER1-NEXT: vbroadcasti128 {{.*#+}} ymm1 = mem[0,1,0,1] sched: [8:0.50]
43 ; ZNVER1-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.25]
44 ; ZNVER1-NEXT: retq # sched: [1:0.50]
45 %1 = load <4 x i32>, <4 x i32> *%a1, align 16
46 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
47 %3 = add <8 x i32> %2, %a0
51 define <4 x double> @test_broadcastsd_ymm(<2 x double> %a0) {
52 ; GENERIC-LABEL: test_broadcastsd_ymm:
54 ; GENERIC-NEXT: vbroadcastsd %xmm0, %ymm0 # sched: [1:1.00]
55 ; GENERIC-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
56 ; GENERIC-NEXT: retq # sched: [1:1.00]
58 ; HASWELL-LABEL: test_broadcastsd_ymm:
60 ; HASWELL-NEXT: vbroadcastsd %xmm0, %ymm0 # sched: [3:1.00]
61 ; HASWELL-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
62 ; HASWELL-NEXT: retq # sched: [2:1.00]
64 ; BROADWELL-LABEL: test_broadcastsd_ymm:
66 ; BROADWELL-NEXT: vbroadcastsd %xmm0, %ymm0 # sched: [3:1.00]
67 ; BROADWELL-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
68 ; BROADWELL-NEXT: retq # sched: [7:1.00]
70 ; SKYLAKE-LABEL: test_broadcastsd_ymm:
72 ; SKYLAKE-NEXT: vbroadcastsd %xmm0, %ymm0 # sched: [3:1.00]
73 ; SKYLAKE-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [4:0.50]
74 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
76 ; SKX-LABEL: test_broadcastsd_ymm:
78 ; SKX-NEXT: vbroadcastsd %xmm0, %ymm0 # sched: [3:1.00]
79 ; SKX-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [4:0.33]
80 ; SKX-NEXT: retq # sched: [7:1.00]
82 ; ZNVER1-LABEL: test_broadcastsd_ymm:
84 ; ZNVER1-NEXT: vbroadcastsd %xmm0, %ymm0 # sched: [100:0.25]
85 ; ZNVER1-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
86 ; ZNVER1-NEXT: retq # sched: [1:0.50]
87 %1 = shufflevector <2 x double> %a0, <2 x double> undef, <4 x i32> zeroinitializer
88 %2 = fadd <4 x double> %1, %1
92 define <4 x float> @test_broadcastss(<4 x float> %a0) {
93 ; GENERIC-LABEL: test_broadcastss:
95 ; GENERIC-NEXT: vbroadcastss %xmm0, %xmm0 # sched: [1:1.00]
96 ; GENERIC-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
97 ; GENERIC-NEXT: retq # sched: [1:1.00]
99 ; HASWELL-LABEL: test_broadcastss:
101 ; HASWELL-NEXT: vbroadcastss %xmm0, %xmm0 # sched: [1:1.00]
102 ; HASWELL-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
103 ; HASWELL-NEXT: retq # sched: [2:1.00]
105 ; BROADWELL-LABEL: test_broadcastss:
107 ; BROADWELL-NEXT: vbroadcastss %xmm0, %xmm0 # sched: [1:1.00]
108 ; BROADWELL-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
109 ; BROADWELL-NEXT: retq # sched: [7:1.00]
111 ; SKYLAKE-LABEL: test_broadcastss:
113 ; SKYLAKE-NEXT: vbroadcastss %xmm0, %xmm0 # sched: [1:1.00]
114 ; SKYLAKE-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [4:0.50]
115 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
117 ; SKX-LABEL: test_broadcastss:
119 ; SKX-NEXT: vbroadcastss %xmm0, %xmm0 # sched: [1:1.00]
120 ; SKX-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [4:0.33]
121 ; SKX-NEXT: retq # sched: [7:1.00]
123 ; ZNVER1-LABEL: test_broadcastss:
125 ; ZNVER1-NEXT: vbroadcastss %xmm0, %xmm0 # sched: [1:0.50]
126 ; ZNVER1-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
127 ; ZNVER1-NEXT: retq # sched: [1:0.50]
128 %1 = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32> zeroinitializer
129 %2 = fadd <4 x float> %1, %1
133 define <8 x float> @test_broadcastss_ymm(<4 x float> %a0) {
134 ; GENERIC-LABEL: test_broadcastss_ymm:
136 ; GENERIC-NEXT: vbroadcastss %xmm0, %ymm0 # sched: [1:1.00]
137 ; GENERIC-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
138 ; GENERIC-NEXT: retq # sched: [1:1.00]
140 ; HASWELL-LABEL: test_broadcastss_ymm:
142 ; HASWELL-NEXT: vbroadcastss %xmm0, %ymm0 # sched: [3:1.00]
143 ; HASWELL-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
144 ; HASWELL-NEXT: retq # sched: [2:1.00]
146 ; BROADWELL-LABEL: test_broadcastss_ymm:
148 ; BROADWELL-NEXT: vbroadcastss %xmm0, %ymm0 # sched: [3:1.00]
149 ; BROADWELL-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
150 ; BROADWELL-NEXT: retq # sched: [7:1.00]
152 ; SKYLAKE-LABEL: test_broadcastss_ymm:
154 ; SKYLAKE-NEXT: vbroadcastss %xmm0, %ymm0 # sched: [3:1.00]
155 ; SKYLAKE-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [4:0.50]
156 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
158 ; SKX-LABEL: test_broadcastss_ymm:
160 ; SKX-NEXT: vbroadcastss %xmm0, %ymm0 # sched: [3:1.00]
161 ; SKX-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [4:0.33]
162 ; SKX-NEXT: retq # sched: [7:1.00]
164 ; ZNVER1-LABEL: test_broadcastss_ymm:
166 ; ZNVER1-NEXT: vbroadcastss %xmm0, %ymm0 # sched: [100:0.25]
167 ; ZNVER1-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
168 ; ZNVER1-NEXT: retq # sched: [1:0.50]
169 %1 = shufflevector <4 x float> %a0, <4 x float> undef, <8 x i32> zeroinitializer
170 %2 = fadd <8 x float> %1, %1
174 define <4 x i32> @test_extracti128(<8 x i32> %a0, <8 x i32> %a1, <4 x i32> *%a2) {
175 ; GENERIC-LABEL: test_extracti128:
177 ; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm2 # sched: [3:1.00]
178 ; GENERIC-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
179 ; GENERIC-NEXT: vextracti128 $1, %ymm0, %xmm0 # sched: [1:1.00]
180 ; GENERIC-NEXT: vextracti128 $1, %ymm2, (%rdi) # sched: [1:1.00]
181 ; GENERIC-NEXT: vzeroupper
182 ; GENERIC-NEXT: retq # sched: [1:1.00]
184 ; HASWELL-LABEL: test_extracti128:
186 ; HASWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm2 # sched: [1:0.50]
187 ; HASWELL-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
188 ; HASWELL-NEXT: vextracti128 $1, %ymm0, %xmm0 # sched: [3:1.00]
189 ; HASWELL-NEXT: vextracti128 $1, %ymm2, (%rdi) # sched: [1:1.00]
190 ; HASWELL-NEXT: vzeroupper # sched: [4:1.00]
191 ; HASWELL-NEXT: retq # sched: [2:1.00]
193 ; BROADWELL-LABEL: test_extracti128:
195 ; BROADWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm2 # sched: [1:0.50]
196 ; BROADWELL-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
197 ; BROADWELL-NEXT: vextracti128 $1, %ymm0, %xmm0 # sched: [3:1.00]
198 ; BROADWELL-NEXT: vextracti128 $1, %ymm2, (%rdi) # sched: [1:1.00]
199 ; BROADWELL-NEXT: vzeroupper # sched: [4:1.00]
200 ; BROADWELL-NEXT: retq # sched: [7:1.00]
202 ; SKYLAKE-LABEL: test_extracti128:
204 ; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm2 # sched: [1:0.33]
205 ; SKYLAKE-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
206 ; SKYLAKE-NEXT: vextracti128 $1, %ymm0, %xmm0 # sched: [3:1.00]
207 ; SKYLAKE-NEXT: vextracti128 $1, %ymm2, (%rdi) # sched: [1:1.00]
208 ; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00]
209 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
211 ; SKX-LABEL: test_extracti128:
213 ; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm2 # sched: [1:0.33]
214 ; SKX-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
215 ; SKX-NEXT: vextracti128 $1, %ymm0, %xmm0 # sched: [3:1.00]
216 ; SKX-NEXT: vextracti128 $1, %ymm2, (%rdi) # sched: [1:1.00]
217 ; SKX-NEXT: vzeroupper # sched: [4:1.00]
218 ; SKX-NEXT: retq # sched: [7:1.00]
220 ; ZNVER1-LABEL: test_extracti128:
222 ; ZNVER1-NEXT: vpaddd %ymm1, %ymm0, %ymm2 # sched: [1:0.25]
223 ; ZNVER1-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
224 ; ZNVER1-NEXT: vextracti128 $1, %ymm0, %xmm0 # sched: [2:0.25]
225 ; ZNVER1-NEXT: vextracti128 $1, %ymm2, (%rdi) # sched: [1:0.50]
226 ; ZNVER1-NEXT: vzeroupper # sched: [100:?]
227 ; ZNVER1-NEXT: retq # sched: [1:0.50]
228 %1 = add <8 x i32> %a0, %a1
229 %2 = sub <8 x i32> %a0, %a1
230 %3 = shufflevector <8 x i32> %1, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
231 %4 = shufflevector <8 x i32> %2, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
232 store <4 x i32> %3, <4 x i32> *%a2
236 define <2 x double> @test_gatherdpd(<2 x double> %a0, i8* %a1, <4 x i32> %a2, <2 x double> %a3) {
237 ; GENERIC-LABEL: test_gatherdpd:
239 ; GENERIC-NEXT: vgatherdpd %xmm2, (%rdi,%xmm1,2), %xmm0
240 ; GENERIC-NEXT: retq # sched: [1:1.00]
242 ; HASWELL-LABEL: test_gatherdpd:
244 ; HASWELL-NEXT: vgatherdpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [1:?]
245 ; HASWELL-NEXT: retq # sched: [2:1.00]
247 ; BROADWELL-LABEL: test_gatherdpd:
249 ; BROADWELL-NEXT: vgatherdpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [25:3.00]
250 ; BROADWELL-NEXT: retq # sched: [7:1.00]
252 ; SKYLAKE-LABEL: test_gatherdpd:
254 ; SKYLAKE-NEXT: vgatherdpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
255 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
257 ; SKX-LABEL: test_gatherdpd:
259 ; SKX-NEXT: vgatherdpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
260 ; SKX-NEXT: retq # sched: [7:1.00]
262 ; ZNVER1-LABEL: test_gatherdpd:
264 ; ZNVER1-NEXT: vgatherdpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [100:?]
265 ; ZNVER1-NEXT: retq # sched: [1:0.50]
266 %1 = call <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double> %a0, i8* %a1, <4 x i32> %a2, <2 x double> %a3, i8 2)
269 declare <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double>, i8*, <4 x i32>, <2 x double>, i8) nounwind readonly
271 define <4 x double> @test_gatherdpd_ymm(<4 x double> %a0, i8* %a1, <4 x i32> %a2, <4 x double> %a3) {
272 ; GENERIC-LABEL: test_gatherdpd_ymm:
274 ; GENERIC-NEXT: vgatherdpd %ymm2, (%rdi,%xmm1,8), %ymm0
275 ; GENERIC-NEXT: retq # sched: [1:1.00]
277 ; HASWELL-LABEL: test_gatherdpd_ymm:
279 ; HASWELL-NEXT: vgatherdpd %ymm2, (%rdi,%xmm1,8), %ymm0 # sched: [1:?]
280 ; HASWELL-NEXT: retq # sched: [2:1.00]
282 ; BROADWELL-LABEL: test_gatherdpd_ymm:
284 ; BROADWELL-NEXT: vgatherdpd %ymm2, (%rdi,%xmm1,8), %ymm0 # sched: [26:5.00]
285 ; BROADWELL-NEXT: retq # sched: [7:1.00]
287 ; SKYLAKE-LABEL: test_gatherdpd_ymm:
289 ; SKYLAKE-NEXT: vgatherdpd %ymm2, (%rdi,%xmm1,8), %ymm0 # sched: [25:1.00]
290 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
292 ; SKX-LABEL: test_gatherdpd_ymm:
294 ; SKX-NEXT: vgatherdpd %ymm2, (%rdi,%xmm1,8), %ymm0 # sched: [25:1.00]
295 ; SKX-NEXT: retq # sched: [7:1.00]
297 ; ZNVER1-LABEL: test_gatherdpd_ymm:
299 ; ZNVER1-NEXT: vgatherdpd %ymm2, (%rdi,%xmm1,8), %ymm0 # sched: [100:?]
300 ; ZNVER1-NEXT: retq # sched: [1:0.50]
301 %1 = call <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double> %a0, i8* %a1, <4 x i32> %a2, <4 x double> %a3, i8 8)
304 declare <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double>, i8*, <4 x i32>, <4 x double>, i8) nounwind readonly
306 define <4 x float> @test_gatherdps(<4 x float> %a0, i8* %a1, <4 x i32> %a2, <4 x float> %a3) {
307 ; GENERIC-LABEL: test_gatherdps:
309 ; GENERIC-NEXT: vgatherdps %xmm2, (%rdi,%xmm1,2), %xmm0
310 ; GENERIC-NEXT: retq # sched: [1:1.00]
312 ; HASWELL-LABEL: test_gatherdps:
314 ; HASWELL-NEXT: vgatherdps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [1:?]
315 ; HASWELL-NEXT: retq # sched: [2:1.00]
317 ; BROADWELL-LABEL: test_gatherdps:
319 ; BROADWELL-NEXT: vgatherdps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [25:3.00]
320 ; BROADWELL-NEXT: retq # sched: [7:1.00]
322 ; SKYLAKE-LABEL: test_gatherdps:
324 ; SKYLAKE-NEXT: vgatherdps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
325 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
327 ; SKX-LABEL: test_gatherdps:
329 ; SKX-NEXT: vgatherdps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
330 ; SKX-NEXT: retq # sched: [7:1.00]
332 ; ZNVER1-LABEL: test_gatherdps:
334 ; ZNVER1-NEXT: vgatherdps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [100:?]
335 ; ZNVER1-NEXT: retq # sched: [1:0.50]
336 %1 = call <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float> %a0, i8* %a1, <4 x i32> %a2, <4 x float> %a3, i8 2)
339 declare <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float>, i8*, <4 x i32>, <4 x float>, i8) nounwind readonly
341 define <8 x float> @test_gatherdps_ymm(<8 x float> %a0, i8* %a1, <8 x i32> %a2, <8 x float> %a3) {
342 ; GENERIC-LABEL: test_gatherdps_ymm:
344 ; GENERIC-NEXT: vgatherdps %ymm2, (%rdi,%ymm1,4), %ymm0
345 ; GENERIC-NEXT: retq # sched: [1:1.00]
347 ; HASWELL-LABEL: test_gatherdps_ymm:
349 ; HASWELL-NEXT: vgatherdps %ymm2, (%rdi,%ymm1,4), %ymm0 # sched: [1:?]
350 ; HASWELL-NEXT: retq # sched: [2:1.00]
352 ; BROADWELL-LABEL: test_gatherdps_ymm:
354 ; BROADWELL-NEXT: vgatherdps %ymm2, (%rdi,%ymm1,4), %ymm0 # sched: [26:4.00]
355 ; BROADWELL-NEXT: retq # sched: [7:1.00]
357 ; SKYLAKE-LABEL: test_gatherdps_ymm:
359 ; SKYLAKE-NEXT: vgatherdps %ymm2, (%rdi,%ymm1,4), %ymm0 # sched: [25:1.00]
360 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
362 ; SKX-LABEL: test_gatherdps_ymm:
364 ; SKX-NEXT: vgatherdps %ymm2, (%rdi,%ymm1,4), %ymm0 # sched: [25:1.00]
365 ; SKX-NEXT: retq # sched: [7:1.00]
367 ; ZNVER1-LABEL: test_gatherdps_ymm:
369 ; ZNVER1-NEXT: vgatherdps %ymm2, (%rdi,%ymm1,4), %ymm0 # sched: [100:?]
370 ; ZNVER1-NEXT: retq # sched: [1:0.50]
371 %1 = call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> %a0, i8* %a1, <8 x i32> %a2, <8 x float> %a3, i8 4)
374 declare <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float>, i8*, <8 x i32>, <8 x float>, i8) nounwind readonly
376 define <2 x double> @test_gatherqpd(<2 x double> %a0, i8* %a1, <2 x i64> %a2, <2 x double> %a3) {
377 ; GENERIC-LABEL: test_gatherqpd:
379 ; GENERIC-NEXT: vgatherqpd %xmm2, (%rdi,%xmm1,2), %xmm0
380 ; GENERIC-NEXT: retq # sched: [1:1.00]
382 ; HASWELL-LABEL: test_gatherqpd:
384 ; HASWELL-NEXT: vgatherqpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [1:?]
385 ; HASWELL-NEXT: retq # sched: [2:1.00]
387 ; BROADWELL-LABEL: test_gatherqpd:
389 ; BROADWELL-NEXT: vgatherqpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:3.00]
390 ; BROADWELL-NEXT: retq # sched: [7:1.00]
392 ; SKYLAKE-LABEL: test_gatherqpd:
394 ; SKYLAKE-NEXT: vgatherqpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
395 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
397 ; SKX-LABEL: test_gatherqpd:
399 ; SKX-NEXT: vgatherqpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
400 ; SKX-NEXT: retq # sched: [7:1.00]
402 ; ZNVER1-LABEL: test_gatherqpd:
404 ; ZNVER1-NEXT: vgatherqpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [100:?]
405 ; ZNVER1-NEXT: retq # sched: [1:0.50]
406 %1 = call <2 x double> @llvm.x86.avx2.gather.q.pd(<2 x double> %a0, i8* %a1, <2 x i64> %a2, <2 x double> %a3, i8 2)
409 declare <2 x double> @llvm.x86.avx2.gather.q.pd(<2 x double>, i8*, <2 x i64>, <2 x double>, i8) nounwind readonly
411 define <4 x double> @test_gatherqpd_ymm(<4 x double> %a0, i8* %a1, <4 x i64> %a2, <4 x double> %a3) {
412 ; GENERIC-LABEL: test_gatherqpd_ymm:
414 ; GENERIC-NEXT: vgatherqpd %ymm2, (%rdi,%ymm1,8), %ymm0
415 ; GENERIC-NEXT: retq # sched: [1:1.00]
417 ; HASWELL-LABEL: test_gatherqpd_ymm:
419 ; HASWELL-NEXT: vgatherqpd %ymm2, (%rdi,%ymm1,8), %ymm0 # sched: [1:?]
420 ; HASWELL-NEXT: retq # sched: [2:1.00]
422 ; BROADWELL-LABEL: test_gatherqpd_ymm:
424 ; BROADWELL-NEXT: vgatherqpd %ymm2, (%rdi,%ymm1,8), %ymm0 # sched: [23:3.00]
425 ; BROADWELL-NEXT: retq # sched: [7:1.00]
427 ; SKYLAKE-LABEL: test_gatherqpd_ymm:
429 ; SKYLAKE-NEXT: vgatherqpd %ymm2, (%rdi,%ymm1,8), %ymm0 # sched: [25:1.00]
430 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
432 ; SKX-LABEL: test_gatherqpd_ymm:
434 ; SKX-NEXT: vgatherqpd %ymm2, (%rdi,%ymm1,8), %ymm0 # sched: [25:1.00]
435 ; SKX-NEXT: retq # sched: [7:1.00]
437 ; ZNVER1-LABEL: test_gatherqpd_ymm:
439 ; ZNVER1-NEXT: vgatherqpd %ymm2, (%rdi,%ymm1,8), %ymm0 # sched: [100:?]
440 ; ZNVER1-NEXT: retq # sched: [1:0.50]
441 %1 = call <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double> %a0, i8* %a1, <4 x i64> %a2, <4 x double> %a3, i8 8)
444 declare <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double>, i8*, <4 x i64>, <4 x double>, i8) nounwind readonly
446 define <4 x float> @test_gatherqps(<4 x float> %a0, i8* %a1, <2 x i64> %a2, <4 x float> %a3) {
447 ; GENERIC-LABEL: test_gatherqps:
449 ; GENERIC-NEXT: vgatherqps %xmm2, (%rdi,%xmm1,2), %xmm0
450 ; GENERIC-NEXT: retq # sched: [1:1.00]
452 ; HASWELL-LABEL: test_gatherqps:
454 ; HASWELL-NEXT: vgatherqps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [1:?]
455 ; HASWELL-NEXT: retq # sched: [2:1.00]
457 ; BROADWELL-LABEL: test_gatherqps:
459 ; BROADWELL-NEXT: vgatherqps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [27:5.00]
460 ; BROADWELL-NEXT: retq # sched: [7:1.00]
462 ; SKYLAKE-LABEL: test_gatherqps:
464 ; SKYLAKE-NEXT: vgatherqps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
465 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
467 ; SKX-LABEL: test_gatherqps:
469 ; SKX-NEXT: vgatherqps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
470 ; SKX-NEXT: retq # sched: [7:1.00]
472 ; ZNVER1-LABEL: test_gatherqps:
474 ; ZNVER1-NEXT: vgatherqps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [100:?]
475 ; ZNVER1-NEXT: retq # sched: [1:0.50]
476 %1 = call <4 x float> @llvm.x86.avx2.gather.q.ps(<4 x float> %a0, i8* %a1, <2 x i64> %a2, <4 x float> %a3, i8 2)
479 declare <4 x float> @llvm.x86.avx2.gather.q.ps(<4 x float>, i8*, <2 x i64>, <4 x float>, i8) nounwind readonly
481 define <4 x float> @test_gatherqps_ymm(<4 x float> %a0, i8* %a1, <4 x i64> %a2, <4 x float> %a3) {
482 ; GENERIC-LABEL: test_gatherqps_ymm:
484 ; GENERIC-NEXT: vgatherqps %xmm2, (%rdi,%ymm1,4), %xmm0
485 ; GENERIC-NEXT: vzeroupper
486 ; GENERIC-NEXT: retq # sched: [1:1.00]
488 ; HASWELL-LABEL: test_gatherqps_ymm:
490 ; HASWELL-NEXT: vgatherqps %xmm2, (%rdi,%ymm1,4), %xmm0 # sched: [1:?]
491 ; HASWELL-NEXT: vzeroupper # sched: [4:1.00]
492 ; HASWELL-NEXT: retq # sched: [2:1.00]
494 ; BROADWELL-LABEL: test_gatherqps_ymm:
496 ; BROADWELL-NEXT: vgatherqps %xmm2, (%rdi,%ymm1,4), %xmm0 # sched: [24:5.00]
497 ; BROADWELL-NEXT: vzeroupper # sched: [4:1.00]
498 ; BROADWELL-NEXT: retq # sched: [7:1.00]
500 ; SKYLAKE-LABEL: test_gatherqps_ymm:
502 ; SKYLAKE-NEXT: vgatherqps %xmm2, (%rdi,%ymm1,4), %xmm0 # sched: [25:1.00]
503 ; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00]
504 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
506 ; SKX-LABEL: test_gatherqps_ymm:
508 ; SKX-NEXT: vgatherqps %xmm2, (%rdi,%ymm1,4), %xmm0 # sched: [25:1.00]
509 ; SKX-NEXT: vzeroupper # sched: [4:1.00]
510 ; SKX-NEXT: retq # sched: [7:1.00]
512 ; ZNVER1-LABEL: test_gatherqps_ymm:
514 ; ZNVER1-NEXT: vgatherqps %xmm2, (%rdi,%ymm1,4), %xmm0 # sched: [100:?]
515 ; ZNVER1-NEXT: vzeroupper # sched: [100:?]
516 ; ZNVER1-NEXT: retq # sched: [1:0.50]
517 %1 = call <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float> %a0, i8* %a1, <4 x i64> %a2, <4 x float> %a3, i8 4)
520 declare <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float>, i8*, <4 x i64>, <4 x float>, i8) nounwind readonly
522 define <8 x i32> @test_inserti128(<8 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
523 ; GENERIC-LABEL: test_inserti128:
525 ; GENERIC-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 # sched: [1:1.00]
526 ; GENERIC-NEXT: vinserti128 $1, (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
527 ; GENERIC-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
528 ; GENERIC-NEXT: retq # sched: [1:1.00]
530 ; HASWELL-LABEL: test_inserti128:
532 ; HASWELL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 # sched: [3:1.00]
533 ; HASWELL-NEXT: vinserti128 $1, (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
534 ; HASWELL-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
535 ; HASWELL-NEXT: retq # sched: [2:1.00]
537 ; BROADWELL-LABEL: test_inserti128:
539 ; BROADWELL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 # sched: [3:1.00]
540 ; BROADWELL-NEXT: vinserti128 $1, (%rdi), %ymm0, %ymm0 # sched: [6:0.50]
541 ; BROADWELL-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
542 ; BROADWELL-NEXT: retq # sched: [7:1.00]
544 ; SKYLAKE-LABEL: test_inserti128:
546 ; SKYLAKE-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 # sched: [3:1.00]
547 ; SKYLAKE-NEXT: vinserti128 $1, (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
548 ; SKYLAKE-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
549 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
551 ; SKX-LABEL: test_inserti128:
553 ; SKX-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 # sched: [3:1.00]
554 ; SKX-NEXT: vinserti128 $1, (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
555 ; SKX-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
556 ; SKX-NEXT: retq # sched: [7:1.00]
558 ; ZNVER1-LABEL: test_inserti128:
560 ; ZNVER1-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 # sched: [2:0.25]
561 ; ZNVER1-NEXT: vinserti128 $1, (%rdi), %ymm0, %ymm0 # sched: [9:0.50]
562 ; ZNVER1-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.25]
563 ; ZNVER1-NEXT: retq # sched: [1:0.50]
564 %1 = shufflevector <4 x i32> %a1, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
565 %2 = shufflevector <8 x i32> %a0, <8 x i32> %1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
566 %3 = load <4 x i32>, <4 x i32> *%a2, align 16
567 %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
568 %5 = shufflevector <8 x i32> %a0, <8 x i32> %4, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
569 %6 = add <8 x i32> %2, %5
573 define <4 x i64> @test_movntdqa(i8* %a0) {
574 ; GENERIC-LABEL: test_movntdqa:
576 ; GENERIC-NEXT: vmovntdqa (%rdi), %ymm0 # sched: [4:0.50]
577 ; GENERIC-NEXT: retq # sched: [1:1.00]
579 ; HASWELL-LABEL: test_movntdqa:
581 ; HASWELL-NEXT: vmovntdqa (%rdi), %ymm0 # sched: [1:0.50]
582 ; HASWELL-NEXT: retq # sched: [2:1.00]
584 ; BROADWELL-LABEL: test_movntdqa:
586 ; BROADWELL-NEXT: vmovntdqa (%rdi), %ymm0 # sched: [6:0.50]
587 ; BROADWELL-NEXT: retq # sched: [7:1.00]
589 ; SKYLAKE-LABEL: test_movntdqa:
591 ; SKYLAKE-NEXT: vmovntdqa (%rdi), %ymm0 # sched: [7:0.50]
592 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
594 ; SKX-LABEL: test_movntdqa:
596 ; SKX-NEXT: vmovntdqa (%rdi), %ymm0 # sched: [7:0.50]
597 ; SKX-NEXT: retq # sched: [7:1.00]
599 ; ZNVER1-LABEL: test_movntdqa:
601 ; ZNVER1-NEXT: vmovntdqa (%rdi), %ymm0 # sched: [8:0.50]
602 ; ZNVER1-NEXT: retq # sched: [1:0.50]
603 %1 = call <4 x i64> @llvm.x86.avx2.movntdqa(i8* %a0)
606 declare <4 x i64> @llvm.x86.avx2.movntdqa(i8*) nounwind readonly
608 define <16 x i16> @test_mpsadbw(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
609 ; GENERIC-LABEL: test_mpsadbw:
611 ; GENERIC-NEXT: vmpsadbw $7, %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
612 ; GENERIC-NEXT: vmpsadbw $7, (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
613 ; GENERIC-NEXT: retq # sched: [1:1.00]
615 ; HASWELL-LABEL: test_mpsadbw:
617 ; HASWELL-NEXT: vmpsadbw $7, %ymm1, %ymm0, %ymm0 # sched: [7:2.00]
618 ; HASWELL-NEXT: vmpsadbw $7, (%rdi), %ymm0, %ymm0 # sched: [7:2.00]
619 ; HASWELL-NEXT: retq # sched: [2:1.00]
621 ; BROADWELL-LABEL: test_mpsadbw:
623 ; BROADWELL-NEXT: vmpsadbw $7, %ymm1, %ymm0, %ymm0 # sched: [7:2.00]
624 ; BROADWELL-NEXT: vmpsadbw $7, (%rdi), %ymm0, %ymm0 # sched: [13:2.00]
625 ; BROADWELL-NEXT: retq # sched: [7:1.00]
627 ; SKYLAKE-LABEL: test_mpsadbw:
629 ; SKYLAKE-NEXT: vmpsadbw $7, %ymm1, %ymm0, %ymm0 # sched: [4:2.00]
630 ; SKYLAKE-NEXT: vmpsadbw $7, (%rdi), %ymm0, %ymm0 # sched: [11:2.00]
631 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
633 ; SKX-LABEL: test_mpsadbw:
635 ; SKX-NEXT: vmpsadbw $7, %ymm1, %ymm0, %ymm0 # sched: [4:2.00]
636 ; SKX-NEXT: vmpsadbw $7, (%rdi), %ymm0, %ymm0 # sched: [11:2.00]
637 ; SKX-NEXT: retq # sched: [7:1.00]
639 ; ZNVER1-LABEL: test_mpsadbw:
641 ; ZNVER1-NEXT: vmpsadbw $7, %ymm1, %ymm0, %ymm0 # sched: [100:?]
642 ; ZNVER1-NEXT: vmpsadbw $7, (%rdi), %ymm0, %ymm0 # sched: [100:?]
643 ; ZNVER1-NEXT: retq # sched: [1:0.50]
644 %1 = call <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8> %a0, <32 x i8> %a1, i8 7)
645 %2 = bitcast <16 x i16> %1 to <32 x i8>
646 %3 = load <32 x i8>, <32 x i8> *%a2, align 32
647 %4 = call <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8> %2, <32 x i8> %3, i8 7)
650 declare <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8>, <32 x i8>, i8) nounwind readnone
652 define <32 x i8> @test_pabsb(<32 x i8> %a0, <32 x i8> *%a1) {
653 ; GENERIC-LABEL: test_pabsb:
655 ; GENERIC-NEXT: vpabsb %ymm0, %ymm0 # sched: [3:1.00]
656 ; GENERIC-NEXT: vpabsb (%rdi), %ymm1 # sched: [7:1.00]
657 ; GENERIC-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
658 ; GENERIC-NEXT: retq # sched: [1:1.00]
660 ; HASWELL-LABEL: test_pabsb:
662 ; HASWELL-NEXT: vpabsb %ymm0, %ymm0 # sched: [1:0.50]
663 ; HASWELL-NEXT: vpabsb (%rdi), %ymm1 # sched: [1:0.50]
664 ; HASWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
665 ; HASWELL-NEXT: retq # sched: [2:1.00]
667 ; BROADWELL-LABEL: test_pabsb:
669 ; BROADWELL-NEXT: vpabsb %ymm0, %ymm0 # sched: [1:0.50]
670 ; BROADWELL-NEXT: vpabsb (%rdi), %ymm1 # sched: [7:0.50]
671 ; BROADWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
672 ; BROADWELL-NEXT: retq # sched: [7:1.00]
674 ; SKYLAKE-LABEL: test_pabsb:
676 ; SKYLAKE-NEXT: vpabsb %ymm0, %ymm0 # sched: [1:0.50]
677 ; SKYLAKE-NEXT: vpabsb (%rdi), %ymm1 # sched: [8:0.50]
678 ; SKYLAKE-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
679 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
681 ; SKX-LABEL: test_pabsb:
683 ; SKX-NEXT: vpabsb %ymm0, %ymm0 # sched: [1:0.50]
684 ; SKX-NEXT: vpabsb (%rdi), %ymm1 # sched: [8:0.50]
685 ; SKX-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
686 ; SKX-NEXT: retq # sched: [7:1.00]
688 ; ZNVER1-LABEL: test_pabsb:
690 ; ZNVER1-NEXT: vpabsb (%rdi), %ymm1 # sched: [8:0.50]
691 ; ZNVER1-NEXT: vpabsb %ymm0, %ymm0 # sched: [1:0.25]
692 ; ZNVER1-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
693 ; ZNVER1-NEXT: retq # sched: [1:0.50]
694 %1 = call <32 x i8> @llvm.x86.avx2.pabs.b(<32 x i8> %a0)
695 %2 = load <32 x i8>, <32 x i8> *%a1, align 32
696 %3 = call <32 x i8> @llvm.x86.avx2.pabs.b(<32 x i8> %2)
697 %4 = or <32 x i8> %1, %3
700 declare <32 x i8> @llvm.x86.avx2.pabs.b(<32 x i8>) nounwind readnone
702 define <8 x i32> @test_pabsd(<8 x i32> %a0, <8 x i32> *%a1) {
703 ; GENERIC-LABEL: test_pabsd:
705 ; GENERIC-NEXT: vpabsd %ymm0, %ymm0 # sched: [3:1.00]
706 ; GENERIC-NEXT: vpabsd (%rdi), %ymm1 # sched: [7:1.00]
707 ; GENERIC-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
708 ; GENERIC-NEXT: retq # sched: [1:1.00]
710 ; HASWELL-LABEL: test_pabsd:
712 ; HASWELL-NEXT: vpabsd %ymm0, %ymm0 # sched: [1:0.50]
713 ; HASWELL-NEXT: vpabsd (%rdi), %ymm1 # sched: [1:0.50]
714 ; HASWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
715 ; HASWELL-NEXT: retq # sched: [2:1.00]
717 ; BROADWELL-LABEL: test_pabsd:
719 ; BROADWELL-NEXT: vpabsd %ymm0, %ymm0 # sched: [1:0.50]
720 ; BROADWELL-NEXT: vpabsd (%rdi), %ymm1 # sched: [7:0.50]
721 ; BROADWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
722 ; BROADWELL-NEXT: retq # sched: [7:1.00]
724 ; SKYLAKE-LABEL: test_pabsd:
726 ; SKYLAKE-NEXT: vpabsd %ymm0, %ymm0 # sched: [1:0.50]
727 ; SKYLAKE-NEXT: vpabsd (%rdi), %ymm1 # sched: [8:0.50]
728 ; SKYLAKE-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
729 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
731 ; SKX-LABEL: test_pabsd:
733 ; SKX-NEXT: vpabsd %ymm0, %ymm0 # sched: [1:0.50]
734 ; SKX-NEXT: vpabsd (%rdi), %ymm1 # sched: [8:0.50]
735 ; SKX-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
736 ; SKX-NEXT: retq # sched: [7:1.00]
738 ; ZNVER1-LABEL: test_pabsd:
740 ; ZNVER1-NEXT: vpabsd (%rdi), %ymm1 # sched: [8:0.50]
741 ; ZNVER1-NEXT: vpabsd %ymm0, %ymm0 # sched: [1:0.25]
742 ; ZNVER1-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
743 ; ZNVER1-NEXT: retq # sched: [1:0.50]
744 %1 = call <8 x i32> @llvm.x86.avx2.pabs.d(<8 x i32> %a0)
745 %2 = load <8 x i32>, <8 x i32> *%a1, align 32
746 %3 = call <8 x i32> @llvm.x86.avx2.pabs.d(<8 x i32> %2)
747 %4 = or <8 x i32> %1, %3
750 declare <8 x i32> @llvm.x86.avx2.pabs.d(<8 x i32>) nounwind readnone
752 define <16 x i16> @test_pabsw(<16 x i16> %a0, <16 x i16> *%a1) {
753 ; GENERIC-LABEL: test_pabsw:
755 ; GENERIC-NEXT: vpabsw %ymm0, %ymm0 # sched: [3:1.00]
756 ; GENERIC-NEXT: vpabsw (%rdi), %ymm1 # sched: [7:1.00]
757 ; GENERIC-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
758 ; GENERIC-NEXT: retq # sched: [1:1.00]
760 ; HASWELL-LABEL: test_pabsw:
762 ; HASWELL-NEXT: vpabsw %ymm0, %ymm0 # sched: [1:0.50]
763 ; HASWELL-NEXT: vpabsw (%rdi), %ymm1 # sched: [1:0.50]
764 ; HASWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
765 ; HASWELL-NEXT: retq # sched: [2:1.00]
767 ; BROADWELL-LABEL: test_pabsw:
769 ; BROADWELL-NEXT: vpabsw %ymm0, %ymm0 # sched: [1:0.50]
770 ; BROADWELL-NEXT: vpabsw (%rdi), %ymm1 # sched: [7:0.50]
771 ; BROADWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
772 ; BROADWELL-NEXT: retq # sched: [7:1.00]
774 ; SKYLAKE-LABEL: test_pabsw:
776 ; SKYLAKE-NEXT: vpabsw %ymm0, %ymm0 # sched: [1:0.50]
777 ; SKYLAKE-NEXT: vpabsw (%rdi), %ymm1 # sched: [8:0.50]
778 ; SKYLAKE-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
779 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
781 ; SKX-LABEL: test_pabsw:
783 ; SKX-NEXT: vpabsw %ymm0, %ymm0 # sched: [1:0.50]
784 ; SKX-NEXT: vpabsw (%rdi), %ymm1 # sched: [8:0.50]
785 ; SKX-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
786 ; SKX-NEXT: retq # sched: [7:1.00]
788 ; ZNVER1-LABEL: test_pabsw:
790 ; ZNVER1-NEXT: vpabsw (%rdi), %ymm1 # sched: [8:0.50]
791 ; ZNVER1-NEXT: vpabsw %ymm0, %ymm0 # sched: [1:0.25]
792 ; ZNVER1-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
793 ; ZNVER1-NEXT: retq # sched: [1:0.50]
794 %1 = call <16 x i16> @llvm.x86.avx2.pabs.w(<16 x i16> %a0)
795 %2 = load <16 x i16>, <16 x i16> *%a1, align 32
796 %3 = call <16 x i16> @llvm.x86.avx2.pabs.w(<16 x i16> %2)
797 %4 = or <16 x i16> %1, %3
800 declare <16 x i16> @llvm.x86.avx2.pabs.w(<16 x i16>) nounwind readnone
802 define <16 x i16> @test_packssdw(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
803 ; GENERIC-LABEL: test_packssdw:
805 ; GENERIC-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
806 ; GENERIC-NEXT: vpackssdw (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
807 ; GENERIC-NEXT: retq # sched: [1:1.00]
809 ; HASWELL-LABEL: test_packssdw:
811 ; HASWELL-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
812 ; HASWELL-NEXT: vpackssdw (%rdi), %ymm0, %ymm0 # sched: [1:1.00]
813 ; HASWELL-NEXT: retq # sched: [2:1.00]
815 ; BROADWELL-LABEL: test_packssdw:
817 ; BROADWELL-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
818 ; BROADWELL-NEXT: vpackssdw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
819 ; BROADWELL-NEXT: retq # sched: [7:1.00]
821 ; SKYLAKE-LABEL: test_packssdw:
823 ; SKYLAKE-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
824 ; SKYLAKE-NEXT: vpackssdw (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
825 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
827 ; SKX-LABEL: test_packssdw:
829 ; SKX-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
830 ; SKX-NEXT: vpackssdw (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
831 ; SKX-NEXT: retq # sched: [7:1.00]
833 ; ZNVER1-LABEL: test_packssdw:
835 ; ZNVER1-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
836 ; ZNVER1-NEXT: vpackssdw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
837 ; ZNVER1-NEXT: retq # sched: [1:0.50]
838 %1 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %a0, <8 x i32> %a1)
839 %2 = bitcast <16 x i16> %1 to <8 x i32>
840 %3 = load <8 x i32>, <8 x i32> *%a2, align 32
841 %4 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %2, <8 x i32> %3)
844 declare <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32>, <8 x i32>) nounwind readnone
846 define <32 x i8> @test_packsswb(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
847 ; GENERIC-LABEL: test_packsswb:
849 ; GENERIC-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
850 ; GENERIC-NEXT: vpacksswb (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
851 ; GENERIC-NEXT: retq # sched: [1:1.00]
853 ; HASWELL-LABEL: test_packsswb:
855 ; HASWELL-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
856 ; HASWELL-NEXT: vpacksswb (%rdi), %ymm0, %ymm0 # sched: [1:1.00]
857 ; HASWELL-NEXT: retq # sched: [2:1.00]
859 ; BROADWELL-LABEL: test_packsswb:
861 ; BROADWELL-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
862 ; BROADWELL-NEXT: vpacksswb (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
863 ; BROADWELL-NEXT: retq # sched: [7:1.00]
865 ; SKYLAKE-LABEL: test_packsswb:
867 ; SKYLAKE-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
868 ; SKYLAKE-NEXT: vpacksswb (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
869 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
871 ; SKX-LABEL: test_packsswb:
873 ; SKX-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
874 ; SKX-NEXT: vpacksswb (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
875 ; SKX-NEXT: retq # sched: [7:1.00]
877 ; ZNVER1-LABEL: test_packsswb:
879 ; ZNVER1-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
880 ; ZNVER1-NEXT: vpacksswb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
881 ; ZNVER1-NEXT: retq # sched: [1:0.50]
882 %1 = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %a0, <16 x i16> %a1)
883 %2 = bitcast <32 x i8> %1 to <16 x i16>
884 %3 = load <16 x i16>, <16 x i16> *%a2, align 32
885 %4 = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %2, <16 x i16> %3)
888 declare <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16>, <16 x i16>) nounwind readnone
890 define <16 x i16> @test_packusdw(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
891 ; GENERIC-LABEL: test_packusdw:
893 ; GENERIC-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
894 ; GENERIC-NEXT: vpackusdw (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
895 ; GENERIC-NEXT: retq # sched: [1:1.00]
897 ; HASWELL-LABEL: test_packusdw:
899 ; HASWELL-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
900 ; HASWELL-NEXT: vpackusdw (%rdi), %ymm0, %ymm0 # sched: [1:1.00]
901 ; HASWELL-NEXT: retq # sched: [2:1.00]
903 ; BROADWELL-LABEL: test_packusdw:
905 ; BROADWELL-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
906 ; BROADWELL-NEXT: vpackusdw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
907 ; BROADWELL-NEXT: retq # sched: [7:1.00]
909 ; SKYLAKE-LABEL: test_packusdw:
911 ; SKYLAKE-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
912 ; SKYLAKE-NEXT: vpackusdw (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
913 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
915 ; SKX-LABEL: test_packusdw:
917 ; SKX-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
918 ; SKX-NEXT: vpackusdw (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
919 ; SKX-NEXT: retq # sched: [7:1.00]
921 ; ZNVER1-LABEL: test_packusdw:
923 ; ZNVER1-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
924 ; ZNVER1-NEXT: vpackusdw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
925 ; ZNVER1-NEXT: retq # sched: [1:0.50]
926 %1 = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %a0, <8 x i32> %a1)
927 %2 = bitcast <16 x i16> %1 to <8 x i32>
928 %3 = load <8 x i32>, <8 x i32> *%a2, align 32
929 %4 = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %2, <8 x i32> %3)
932 declare <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32>, <8 x i32>) nounwind readnone
934 define <32 x i8> @test_packuswb(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
935 ; GENERIC-LABEL: test_packuswb:
937 ; GENERIC-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
938 ; GENERIC-NEXT: vpackuswb (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
939 ; GENERIC-NEXT: retq # sched: [1:1.00]
941 ; HASWELL-LABEL: test_packuswb:
943 ; HASWELL-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
944 ; HASWELL-NEXT: vpackuswb (%rdi), %ymm0, %ymm0 # sched: [1:1.00]
945 ; HASWELL-NEXT: retq # sched: [2:1.00]
947 ; BROADWELL-LABEL: test_packuswb:
949 ; BROADWELL-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
950 ; BROADWELL-NEXT: vpackuswb (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
951 ; BROADWELL-NEXT: retq # sched: [7:1.00]
953 ; SKYLAKE-LABEL: test_packuswb:
955 ; SKYLAKE-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
956 ; SKYLAKE-NEXT: vpackuswb (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
957 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
959 ; SKX-LABEL: test_packuswb:
961 ; SKX-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
962 ; SKX-NEXT: vpackuswb (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
963 ; SKX-NEXT: retq # sched: [7:1.00]
965 ; ZNVER1-LABEL: test_packuswb:
967 ; ZNVER1-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
968 ; ZNVER1-NEXT: vpackuswb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
969 ; ZNVER1-NEXT: retq # sched: [1:0.50]
970 %1 = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> %a0, <16 x i16> %a1)
971 %2 = bitcast <32 x i8> %1 to <16 x i16>
972 %3 = load <16 x i16>, <16 x i16> *%a2, align 32
973 %4 = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> %2, <16 x i16> %3)
976 declare <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16>, <16 x i16>) nounwind readnone
978 define <32 x i8> @test_paddb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
979 ; GENERIC-LABEL: test_paddb:
981 ; GENERIC-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
982 ; GENERIC-NEXT: vpaddb (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
983 ; GENERIC-NEXT: retq # sched: [1:1.00]
985 ; HASWELL-LABEL: test_paddb:
987 ; HASWELL-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
988 ; HASWELL-NEXT: vpaddb (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
989 ; HASWELL-NEXT: retq # sched: [2:1.00]
991 ; BROADWELL-LABEL: test_paddb:
993 ; BROADWELL-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
994 ; BROADWELL-NEXT: vpaddb (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
995 ; BROADWELL-NEXT: retq # sched: [7:1.00]
997 ; SKYLAKE-LABEL: test_paddb:
999 ; SKYLAKE-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1000 ; SKYLAKE-NEXT: vpaddb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1001 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
1003 ; SKX-LABEL: test_paddb:
1005 ; SKX-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1006 ; SKX-NEXT: vpaddb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1007 ; SKX-NEXT: retq # sched: [7:1.00]
1009 ; ZNVER1-LABEL: test_paddb:
1011 ; ZNVER1-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
1012 ; ZNVER1-NEXT: vpaddb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1013 ; ZNVER1-NEXT: retq # sched: [1:0.50]
1014 %1 = add <32 x i8> %a0, %a1
1015 %2 = load <32 x i8>, <32 x i8> *%a2, align 32
1016 %3 = add <32 x i8> %1, %2
1020 define <8 x i32> @test_paddd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
1021 ; GENERIC-LABEL: test_paddd:
1023 ; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
1024 ; GENERIC-NEXT: vpaddd (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
1025 ; GENERIC-NEXT: retq # sched: [1:1.00]
1027 ; HASWELL-LABEL: test_paddd:
1029 ; HASWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1030 ; HASWELL-NEXT: vpaddd (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
1031 ; HASWELL-NEXT: retq # sched: [2:1.00]
1033 ; BROADWELL-LABEL: test_paddd:
1034 ; BROADWELL: # BB#0:
1035 ; BROADWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1036 ; BROADWELL-NEXT: vpaddd (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
1037 ; BROADWELL-NEXT: retq # sched: [7:1.00]
1039 ; SKYLAKE-LABEL: test_paddd:
1041 ; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1042 ; SKYLAKE-NEXT: vpaddd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1043 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
1045 ; SKX-LABEL: test_paddd:
1047 ; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1048 ; SKX-NEXT: vpaddd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1049 ; SKX-NEXT: retq # sched: [7:1.00]
1051 ; ZNVER1-LABEL: test_paddd:
1053 ; ZNVER1-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
1054 ; ZNVER1-NEXT: vpaddd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1055 ; ZNVER1-NEXT: retq # sched: [1:0.50]
1056 %1 = add <8 x i32> %a0, %a1
1057 %2 = load <8 x i32>, <8 x i32> *%a2, align 32
1058 %3 = add <8 x i32> %1, %2
1062 define <4 x i64> @test_paddq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
1063 ; GENERIC-LABEL: test_paddq:
1065 ; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
1066 ; GENERIC-NEXT: vpaddq (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
1067 ; GENERIC-NEXT: retq # sched: [1:1.00]
1069 ; HASWELL-LABEL: test_paddq:
1071 ; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1072 ; HASWELL-NEXT: vpaddq (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
1073 ; HASWELL-NEXT: retq # sched: [2:1.00]
1075 ; BROADWELL-LABEL: test_paddq:
1076 ; BROADWELL: # BB#0:
1077 ; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1078 ; BROADWELL-NEXT: vpaddq (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
1079 ; BROADWELL-NEXT: retq # sched: [7:1.00]
1081 ; SKYLAKE-LABEL: test_paddq:
1083 ; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1084 ; SKYLAKE-NEXT: vpaddq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1085 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
1087 ; SKX-LABEL: test_paddq:
1089 ; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1090 ; SKX-NEXT: vpaddq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1091 ; SKX-NEXT: retq # sched: [7:1.00]
1093 ; ZNVER1-LABEL: test_paddq:
1095 ; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
1096 ; ZNVER1-NEXT: vpaddq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1097 ; ZNVER1-NEXT: retq # sched: [1:0.50]
1098 %1 = add <4 x i64> %a0, %a1
1099 %2 = load <4 x i64>, <4 x i64> *%a2, align 32
1100 %3 = add <4 x i64> %1, %2
1104 define <32 x i8> @test_paddsb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
1105 ; GENERIC-LABEL: test_paddsb:
1107 ; GENERIC-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
1108 ; GENERIC-NEXT: vpaddsb (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
1109 ; GENERIC-NEXT: retq # sched: [1:1.00]
1111 ; HASWELL-LABEL: test_paddsb:
1113 ; HASWELL-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1114 ; HASWELL-NEXT: vpaddsb (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
1115 ; HASWELL-NEXT: retq # sched: [2:1.00]
1117 ; BROADWELL-LABEL: test_paddsb:
1118 ; BROADWELL: # BB#0:
1119 ; BROADWELL-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1120 ; BROADWELL-NEXT: vpaddsb (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
1121 ; BROADWELL-NEXT: retq # sched: [7:1.00]
1123 ; SKYLAKE-LABEL: test_paddsb:
1125 ; SKYLAKE-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1126 ; SKYLAKE-NEXT: vpaddsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1127 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
1129 ; SKX-LABEL: test_paddsb:
1131 ; SKX-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1132 ; SKX-NEXT: vpaddsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1133 ; SKX-NEXT: retq # sched: [7:1.00]
1135 ; ZNVER1-LABEL: test_paddsb:
1137 ; ZNVER1-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
1138 ; ZNVER1-NEXT: vpaddsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1139 ; ZNVER1-NEXT: retq # sched: [1:0.50]
1140 %1 = call <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8> %a0, <32 x i8> %a1)
1141 %2 = load <32 x i8>, <32 x i8> *%a2, align 32
1142 %3 = call <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8> %1, <32 x i8> %2)
1145 declare <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8>, <32 x i8>) nounwind readnone
1147 define <16 x i16> @test_paddsw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
1148 ; GENERIC-LABEL: test_paddsw:
1150 ; GENERIC-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
1151 ; GENERIC-NEXT: vpaddsw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
1152 ; GENERIC-NEXT: retq # sched: [1:1.00]
1154 ; HASWELL-LABEL: test_paddsw:
1156 ; HASWELL-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1157 ; HASWELL-NEXT: vpaddsw (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
1158 ; HASWELL-NEXT: retq # sched: [2:1.00]
1160 ; BROADWELL-LABEL: test_paddsw:
1161 ; BROADWELL: # BB#0:
1162 ; BROADWELL-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1163 ; BROADWELL-NEXT: vpaddsw (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
1164 ; BROADWELL-NEXT: retq # sched: [7:1.00]
1166 ; SKYLAKE-LABEL: test_paddsw:
1168 ; SKYLAKE-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1169 ; SKYLAKE-NEXT: vpaddsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1170 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
1172 ; SKX-LABEL: test_paddsw:
1174 ; SKX-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1175 ; SKX-NEXT: vpaddsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1176 ; SKX-NEXT: retq # sched: [7:1.00]
1178 ; ZNVER1-LABEL: test_paddsw:
1180 ; ZNVER1-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
1181 ; ZNVER1-NEXT: vpaddsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1182 ; ZNVER1-NEXT: retq # sched: [1:0.50]
1183 %1 = call <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16> %a0, <16 x i16> %a1)
1184 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
1185 %3 = call <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16> %1, <16 x i16> %2)
1188 declare <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16>, <16 x i16>) nounwind readnone
1190 define <32 x i8> @test_paddusb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
1191 ; GENERIC-LABEL: test_paddusb:
1193 ; GENERIC-NEXT: vpaddusb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
1194 ; GENERIC-NEXT: vpaddusb (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
1195 ; GENERIC-NEXT: retq # sched: [1:1.00]
1197 ; HASWELL-LABEL: test_paddusb:
1199 ; HASWELL-NEXT: vpaddusb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1200 ; HASWELL-NEXT: vpaddusb (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
1201 ; HASWELL-NEXT: retq # sched: [2:1.00]
1203 ; BROADWELL-LABEL: test_paddusb:
1204 ; BROADWELL: # BB#0:
1205 ; BROADWELL-NEXT: vpaddusb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1206 ; BROADWELL-NEXT: vpaddusb (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
1207 ; BROADWELL-NEXT: retq # sched: [7:1.00]
1209 ; SKYLAKE-LABEL: test_paddusb:
1211 ; SKYLAKE-NEXT: vpaddusb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1212 ; SKYLAKE-NEXT: vpaddusb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1213 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
1215 ; SKX-LABEL: test_paddusb:
1217 ; SKX-NEXT: vpaddusb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1218 ; SKX-NEXT: vpaddusb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1219 ; SKX-NEXT: retq # sched: [7:1.00]
1221 ; ZNVER1-LABEL: test_paddusb:
1223 ; ZNVER1-NEXT: vpaddusb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
1224 ; ZNVER1-NEXT: vpaddusb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1225 ; ZNVER1-NEXT: retq # sched: [1:0.50]
1226 %1 = call <32 x i8> @llvm.x86.avx2.paddus.b(<32 x i8> %a0, <32 x i8> %a1)
1227 %2 = load <32 x i8>, <32 x i8> *%a2, align 32
1228 %3 = call <32 x i8> @llvm.x86.avx2.paddus.b(<32 x i8> %1, <32 x i8> %2)
1231 declare <32 x i8> @llvm.x86.avx2.paddus.b(<32 x i8>, <32 x i8>) nounwind readnone
1233 define <16 x i16> @test_paddusw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
1234 ; GENERIC-LABEL: test_paddusw:
1236 ; GENERIC-NEXT: vpaddusw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
1237 ; GENERIC-NEXT: vpaddusw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
1238 ; GENERIC-NEXT: retq # sched: [1:1.00]
1240 ; HASWELL-LABEL: test_paddusw:
1242 ; HASWELL-NEXT: vpaddusw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1243 ; HASWELL-NEXT: vpaddusw (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
1244 ; HASWELL-NEXT: retq # sched: [2:1.00]
1246 ; BROADWELL-LABEL: test_paddusw:
1247 ; BROADWELL: # BB#0:
1248 ; BROADWELL-NEXT: vpaddusw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1249 ; BROADWELL-NEXT: vpaddusw (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
1250 ; BROADWELL-NEXT: retq # sched: [7:1.00]
1252 ; SKYLAKE-LABEL: test_paddusw:
1254 ; SKYLAKE-NEXT: vpaddusw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1255 ; SKYLAKE-NEXT: vpaddusw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1256 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
1258 ; SKX-LABEL: test_paddusw:
1260 ; SKX-NEXT: vpaddusw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1261 ; SKX-NEXT: vpaddusw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1262 ; SKX-NEXT: retq # sched: [7:1.00]
1264 ; ZNVER1-LABEL: test_paddusw:
1266 ; ZNVER1-NEXT: vpaddusw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
1267 ; ZNVER1-NEXT: vpaddusw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1268 ; ZNVER1-NEXT: retq # sched: [1:0.50]
1269 %1 = call <16 x i16> @llvm.x86.avx2.paddus.w(<16 x i16> %a0, <16 x i16> %a1)
1270 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
1271 %3 = call <16 x i16> @llvm.x86.avx2.paddus.w(<16 x i16> %1, <16 x i16> %2)
1274 declare <16 x i16> @llvm.x86.avx2.paddus.w(<16 x i16>, <16 x i16>) nounwind readnone
1276 define <16 x i16> @test_paddw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
1277 ; GENERIC-LABEL: test_paddw:
1279 ; GENERIC-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
1280 ; GENERIC-NEXT: vpaddw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
1281 ; GENERIC-NEXT: retq # sched: [1:1.00]
1283 ; HASWELL-LABEL: test_paddw:
1285 ; HASWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1286 ; HASWELL-NEXT: vpaddw (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
1287 ; HASWELL-NEXT: retq # sched: [2:1.00]
1289 ; BROADWELL-LABEL: test_paddw:
1290 ; BROADWELL: # BB#0:
1291 ; BROADWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1292 ; BROADWELL-NEXT: vpaddw (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
1293 ; BROADWELL-NEXT: retq # sched: [7:1.00]
1295 ; SKYLAKE-LABEL: test_paddw:
1297 ; SKYLAKE-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1298 ; SKYLAKE-NEXT: vpaddw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1299 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
1301 ; SKX-LABEL: test_paddw:
1303 ; SKX-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1304 ; SKX-NEXT: vpaddw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1305 ; SKX-NEXT: retq # sched: [7:1.00]
1307 ; ZNVER1-LABEL: test_paddw:
1309 ; ZNVER1-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
1310 ; ZNVER1-NEXT: vpaddw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1311 ; ZNVER1-NEXT: retq # sched: [1:0.50]
1312 %1 = add <16 x i16> %a0, %a1
1313 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
1314 %3 = add <16 x i16> %1, %2
1318 define <32 x i8> @test_palignr(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
1319 ; GENERIC-LABEL: test_palignr:
1321 ; GENERIC-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],ymm1[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16] sched: [1:1.00]
1322 ; GENERIC-NEXT: vpalignr {{.*#+}} ymm0 = mem[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],mem[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16] sched: [5:1.00]
1323 ; GENERIC-NEXT: retq # sched: [1:1.00]
1325 ; HASWELL-LABEL: test_palignr:
1327 ; HASWELL-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],ymm1[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16] sched: [1:1.00]
1328 ; HASWELL-NEXT: vpalignr {{.*#+}} ymm0 = mem[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],mem[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16] sched: [1:1.00]
1329 ; HASWELL-NEXT: retq # sched: [2:1.00]
1331 ; BROADWELL-LABEL: test_palignr:
1332 ; BROADWELL: # BB#0:
1333 ; BROADWELL-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],ymm1[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16] sched: [1:1.00]
1334 ; BROADWELL-NEXT: vpalignr {{.*#+}} ymm0 = mem[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],mem[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16] sched: [7:1.00]
1335 ; BROADWELL-NEXT: retq # sched: [7:1.00]
1337 ; SKYLAKE-LABEL: test_palignr:
1339 ; SKYLAKE-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],ymm1[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16] sched: [1:1.00]
1340 ; SKYLAKE-NEXT: vpalignr {{.*#+}} ymm0 = mem[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],mem[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16] sched: [8:1.00]
1341 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
1343 ; SKX-LABEL: test_palignr:
1345 ; SKX-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],ymm1[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16] sched: [1:1.00]
1346 ; SKX-NEXT: vpalignr {{.*#+}} ymm0 = mem[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],mem[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16] sched: [8:1.00]
1347 ; SKX-NEXT: retq # sched: [7:1.00]
1349 ; ZNVER1-LABEL: test_palignr:
1351 ; ZNVER1-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],ymm1[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16] sched: [1:0.25]
1352 ; ZNVER1-NEXT: vpalignr {{.*#+}} ymm0 = mem[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],mem[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16] sched: [8:0.50]
1353 ; ZNVER1-NEXT: retq # sched: [1:0.50]
1354 %1 = shufflevector <32 x i8> %a1, <32 x i8> %a0, <32 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 32, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 48>
1355 %2 = load <32 x i8>, <32 x i8> *%a2, align 32
1356 %3 = shufflevector <32 x i8> %2, <32 x i8> %1, <32 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 32, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 48>
1360 define <4 x i64> @test_pand(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
1361 ; GENERIC-LABEL: test_pand:
1363 ; GENERIC-NEXT: vpand %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
1364 ; GENERIC-NEXT: vpand (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
1365 ; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
1366 ; GENERIC-NEXT: retq # sched: [1:1.00]
1368 ; HASWELL-LABEL: test_pand:
1370 ; HASWELL-NEXT: vpand %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1371 ; HASWELL-NEXT: vpand (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
1372 ; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1373 ; HASWELL-NEXT: retq # sched: [2:1.00]
1375 ; BROADWELL-LABEL: test_pand:
1376 ; BROADWELL: # BB#0:
1377 ; BROADWELL-NEXT: vpand %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1378 ; BROADWELL-NEXT: vpand (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
1379 ; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1380 ; BROADWELL-NEXT: retq # sched: [7:1.00]
1382 ; SKYLAKE-LABEL: test_pand:
1384 ; SKYLAKE-NEXT: vpand %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1385 ; SKYLAKE-NEXT: vpand (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1386 ; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1387 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
1389 ; SKX-LABEL: test_pand:
1391 ; SKX-NEXT: vpand %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1392 ; SKX-NEXT: vpand (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1393 ; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1394 ; SKX-NEXT: retq # sched: [7:1.00]
1396 ; ZNVER1-LABEL: test_pand:
1398 ; ZNVER1-NEXT: vpand %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
1399 ; ZNVER1-NEXT: vpand (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1400 ; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
1401 ; ZNVER1-NEXT: retq # sched: [1:0.50]
1402 %1 = and <4 x i64> %a0, %a1
1403 %2 = load <4 x i64>, <4 x i64> *%a2, align 32
1404 %3 = and <4 x i64> %1, %2
1405 %4 = add <4 x i64> %3, %a1
1409 define <4 x i64> @test_pandn(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
1410 ; GENERIC-LABEL: test_pandn:
1412 ; GENERIC-NEXT: vpandn %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
1413 ; GENERIC-NEXT: vpandn (%rdi), %ymm0, %ymm1 # sched: [5:1.00]
1414 ; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
1415 ; GENERIC-NEXT: retq # sched: [1:1.00]
1417 ; HASWELL-LABEL: test_pandn:
1419 ; HASWELL-NEXT: vpandn %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1420 ; HASWELL-NEXT: vpandn (%rdi), %ymm0, %ymm1 # sched: [1:0.50]
1421 ; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1422 ; HASWELL-NEXT: retq # sched: [2:1.00]
1424 ; BROADWELL-LABEL: test_pandn:
1425 ; BROADWELL: # BB#0:
1426 ; BROADWELL-NEXT: vpandn %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1427 ; BROADWELL-NEXT: vpandn (%rdi), %ymm0, %ymm1 # sched: [7:0.50]
1428 ; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1429 ; BROADWELL-NEXT: retq # sched: [7:1.00]
1431 ; SKYLAKE-LABEL: test_pandn:
1433 ; SKYLAKE-NEXT: vpandn %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1434 ; SKYLAKE-NEXT: vpandn (%rdi), %ymm0, %ymm1 # sched: [8:0.50]
1435 ; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1436 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
1438 ; SKX-LABEL: test_pandn:
1440 ; SKX-NEXT: vpandn %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1441 ; SKX-NEXT: vpandn (%rdi), %ymm0, %ymm1 # sched: [8:0.50]
1442 ; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1443 ; SKX-NEXT: retq # sched: [7:1.00]
1445 ; ZNVER1-LABEL: test_pandn:
1447 ; ZNVER1-NEXT: vpandn %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
1448 ; ZNVER1-NEXT: vpandn (%rdi), %ymm0, %ymm1 # sched: [8:0.50]
1449 ; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
1450 ; ZNVER1-NEXT: retq # sched: [1:0.50]
1451 %1 = xor <4 x i64> %a0, <i64 -1, i64 -1, i64 -1, i64 -1>
1452 %2 = and <4 x i64> %a1, %1
1453 %3 = load <4 x i64>, <4 x i64> *%a2, align 32
1454 %4 = xor <4 x i64> %2, <i64 -1, i64 -1, i64 -1, i64 -1>
1455 %5 = and <4 x i64> %3, %4
1456 %6 = add <4 x i64> %2, %5
1460 define <32 x i8> @test_pavgb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
1461 ; GENERIC-LABEL: test_pavgb:
1463 ; GENERIC-NEXT: vpavgb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
1464 ; GENERIC-NEXT: vpavgb (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
1465 ; GENERIC-NEXT: retq # sched: [1:1.00]
1467 ; HASWELL-LABEL: test_pavgb:
1469 ; HASWELL-NEXT: vpavgb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1470 ; HASWELL-NEXT: vpavgb (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
1471 ; HASWELL-NEXT: retq # sched: [2:1.00]
1473 ; BROADWELL-LABEL: test_pavgb:
1474 ; BROADWELL: # BB#0:
1475 ; BROADWELL-NEXT: vpavgb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1476 ; BROADWELL-NEXT: vpavgb (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
1477 ; BROADWELL-NEXT: retq # sched: [7:1.00]
1479 ; SKYLAKE-LABEL: test_pavgb:
1481 ; SKYLAKE-NEXT: vpavgb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1482 ; SKYLAKE-NEXT: vpavgb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1483 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
1485 ; SKX-LABEL: test_pavgb:
1487 ; SKX-NEXT: vpavgb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1488 ; SKX-NEXT: vpavgb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1489 ; SKX-NEXT: retq # sched: [7:1.00]
1491 ; ZNVER1-LABEL: test_pavgb:
1493 ; ZNVER1-NEXT: vpavgb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
1494 ; ZNVER1-NEXT: vpavgb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1495 ; ZNVER1-NEXT: retq # sched: [1:0.50]
1496 %1 = zext <32 x i8> %a0 to <32 x i16>
1497 %2 = zext <32 x i8> %a1 to <32 x i16>
1498 %3 = add <32 x i16> %1, %2
1499 %4 = add <32 x i16> %3, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
1500 %5 = lshr <32 x i16> %4, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
1501 %6 = trunc <32 x i16> %5 to <32 x i8>
1502 %7 = load <32 x i8>, <32 x i8> *%a2, align 32
1503 %8 = zext <32 x i8> %6 to <32 x i16>
1504 %9 = zext <32 x i8> %7 to <32 x i16>
1505 %10 = add <32 x i16> %8, %9
1506 %11 = add <32 x i16> %10, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
1507 %12 = lshr <32 x i16> %11, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
1508 %13 = trunc <32 x i16> %12 to <32 x i8>
1512 define <16 x i16> @test_pavgw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
1513 ; GENERIC-LABEL: test_pavgw:
1515 ; GENERIC-NEXT: vpavgw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
1516 ; GENERIC-NEXT: vpavgw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
1517 ; GENERIC-NEXT: retq # sched: [1:1.00]
1519 ; HASWELL-LABEL: test_pavgw:
1521 ; HASWELL-NEXT: vpavgw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1522 ; HASWELL-NEXT: vpavgw (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
1523 ; HASWELL-NEXT: retq # sched: [2:1.00]
1525 ; BROADWELL-LABEL: test_pavgw:
1526 ; BROADWELL: # BB#0:
1527 ; BROADWELL-NEXT: vpavgw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1528 ; BROADWELL-NEXT: vpavgw (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
1529 ; BROADWELL-NEXT: retq # sched: [7:1.00]
1531 ; SKYLAKE-LABEL: test_pavgw:
1533 ; SKYLAKE-NEXT: vpavgw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1534 ; SKYLAKE-NEXT: vpavgw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1535 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
1537 ; SKX-LABEL: test_pavgw:
1539 ; SKX-NEXT: vpavgw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1540 ; SKX-NEXT: vpavgw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1541 ; SKX-NEXT: retq # sched: [7:1.00]
1543 ; ZNVER1-LABEL: test_pavgw:
1545 ; ZNVER1-NEXT: vpavgw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
1546 ; ZNVER1-NEXT: vpavgw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
1547 ; ZNVER1-NEXT: retq # sched: [1:0.50]
1548 %1 = zext <16 x i16> %a0 to <16 x i32>
1549 %2 = zext <16 x i16> %a1 to <16 x i32>
1550 %3 = add <16 x i32> %1, %2
1551 %4 = add <16 x i32> %3, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
1552 %5 = lshr <16 x i32> %4, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
1553 %6 = trunc <16 x i32> %5 to <16 x i16>
1554 %7 = load <16 x i16>, <16 x i16> *%a2, align 32
1555 %8 = zext <16 x i16> %6 to <16 x i32>
1556 %9 = zext <16 x i16> %7 to <16 x i32>
1557 %10 = add <16 x i32> %8, %9
1558 %11 = add <16 x i32> %10, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
1559 %12 = lshr <16 x i32> %11, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
1560 %13 = trunc <16 x i32> %12 to <16 x i16>
1564 define <4 x i32> @test_pblendd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
1565 ; GENERIC-LABEL: test_pblendd:
1567 ; GENERIC-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0,1,2],xmm0[3] sched: [1:0.50]
1568 ; GENERIC-NEXT: vpblendd {{.*#+}} xmm1 = mem[0],xmm1[1],mem[2],xmm1[3] sched: [5:0.50]
1569 ; GENERIC-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1570 ; GENERIC-NEXT: retq # sched: [1:1.00]
1572 ; HASWELL-LABEL: test_pblendd:
1574 ; HASWELL-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0,1,2],xmm0[3] sched: [1:0.33]
1575 ; HASWELL-NEXT: vpblendd {{.*#+}} xmm1 = mem[0],xmm1[1],mem[2],xmm1[3] sched: [1:0.50]
1576 ; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1577 ; HASWELL-NEXT: retq # sched: [2:1.00]
1579 ; BROADWELL-LABEL: test_pblendd:
1580 ; BROADWELL: # BB#0:
1581 ; BROADWELL-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0,1,2],xmm0[3] sched: [1:0.33]
1582 ; BROADWELL-NEXT: vpblendd {{.*#+}} xmm1 = mem[0],xmm1[1],mem[2],xmm1[3] sched: [6:0.50]
1583 ; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1584 ; BROADWELL-NEXT: retq # sched: [7:1.00]
1586 ; SKYLAKE-LABEL: test_pblendd:
1588 ; SKYLAKE-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0,1,2],xmm0[3] sched: [1:0.33]
1589 ; SKYLAKE-NEXT: vpblendd {{.*#+}} xmm1 = mem[0],xmm1[1],mem[2],xmm1[3] sched: [7:0.50]
1590 ; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
1591 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
1593 ; SKX-LABEL: test_pblendd:
1595 ; SKX-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0,1,2],xmm0[3] sched: [1:0.33]
1596 ; SKX-NEXT: vpblendd {{.*#+}} xmm1 = mem[0],xmm1[1],mem[2],xmm1[3] sched: [7:0.50]
1597 ; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
1598 ; SKX-NEXT: retq # sched: [7:1.00]
1600 ; ZNVER1-LABEL: test_pblendd:
1602 ; ZNVER1-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0,1,2],xmm0[3] sched: [1:0.50]
1603 ; ZNVER1-NEXT: vpblendd {{.*#+}} xmm1 = mem[0],xmm1[1],mem[2],xmm1[3] sched: [8:1.00]
1604 ; ZNVER1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
1605 ; ZNVER1-NEXT: retq # sched: [1:0.50]
1606 %1 = shufflevector <4 x i32> %a0, <4 x i32> %a1, <4 x i32> <i32 4, i32 5, i32 6, i32 3>
1607 %2 = load <4 x i32>, <4 x i32> *%a2, align 16
1608 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
1609 %4 = add <4 x i32> %a0, %3
1613 define <8 x i32> @test_pblendd_ymm(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
1614 ; GENERIC-LABEL: test_pblendd_ymm:
1616 ; GENERIC-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2],ymm0[3,4,5,6],ymm1[7] sched: [1:0.50]
1617 ; GENERIC-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],mem[1,2],ymm1[3,4,5,6,7] sched: [5:0.50]
1618 ; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
1619 ; GENERIC-NEXT: retq # sched: [1:1.00]
1621 ; HASWELL-LABEL: test_pblendd_ymm:
1623 ; HASWELL-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2],ymm0[3,4,5,6],ymm1[7] sched: [1:0.33]
1624 ; HASWELL-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],mem[1,2],ymm1[3,4,5,6,7] sched: [1:0.50]
1625 ; HASWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1626 ; HASWELL-NEXT: retq # sched: [2:1.00]
1628 ; BROADWELL-LABEL: test_pblendd_ymm:
1629 ; BROADWELL: # BB#0:
1630 ; BROADWELL-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2],ymm0[3,4,5,6],ymm1[7] sched: [1:0.33]
1631 ; BROADWELL-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],mem[1,2],ymm1[3,4,5,6,7] sched: [7:0.50]
1632 ; BROADWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1633 ; BROADWELL-NEXT: retq # sched: [7:1.00]
1635 ; SKYLAKE-LABEL: test_pblendd_ymm:
1637 ; SKYLAKE-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2],ymm0[3,4,5,6],ymm1[7] sched: [1:0.33]
1638 ; SKYLAKE-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],mem[1,2],ymm1[3,4,5,6,7] sched: [8:0.50]
1639 ; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1640 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
1642 ; SKX-LABEL: test_pblendd_ymm:
1644 ; SKX-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2],ymm0[3,4,5,6],ymm1[7] sched: [1:0.33]
1645 ; SKX-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],mem[1,2],ymm1[3,4,5,6,7] sched: [8:0.50]
1646 ; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1647 ; SKX-NEXT: retq # sched: [7:1.00]
1649 ; ZNVER1-LABEL: test_pblendd_ymm:
1651 ; ZNVER1-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2],ymm0[3,4,5,6],ymm1[7] sched: [1:0.50]
1652 ; ZNVER1-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],mem[1,2],ymm1[3,4,5,6,7] sched: [9:1.50]
1653 ; ZNVER1-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
1654 ; ZNVER1-NEXT: retq # sched: [1:0.50]
1655 %1 = shufflevector <8 x i32> %a0, <8 x i32> %a1, <8 x i32> <i32 8, i32 9, i32 10, i32 3, i32 4, i32 5, i32 6, i32 15>
1656 %2 = load <8 x i32>, <8 x i32> *%a2, align 32
1657 %3 = shufflevector <8 x i32> %1, <8 x i32> %2, <8 x i32> <i32 0, i32 9, i32 10, i32 3, i32 4, i32 5, i32 6, i32 7>
1658 %4 = add <8 x i32> %a0, %3
1662 define <32 x i8> @test_pblendvb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> %a2, <32 x i8> *%a3, <32 x i8> %a4) {
1663 ; GENERIC-LABEL: test_pblendvb:
1665 ; GENERIC-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:1.00]
1666 ; GENERIC-NEXT: vpblendvb %ymm3, (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
1667 ; GENERIC-NEXT: retq # sched: [1:1.00]
1669 ; HASWELL-LABEL: test_pblendvb:
1671 ; HASWELL-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:2.00]
1672 ; HASWELL-NEXT: vpblendvb %ymm3, (%rdi), %ymm0, %ymm0 # sched: [2:2.00]
1673 ; HASWELL-NEXT: retq # sched: [2:1.00]
1675 ; BROADWELL-LABEL: test_pblendvb:
1676 ; BROADWELL: # BB#0:
1677 ; BROADWELL-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:2.00]
1678 ; BROADWELL-NEXT: vpblendvb %ymm3, (%rdi), %ymm0, %ymm0 # sched: [8:2.00]
1679 ; BROADWELL-NEXT: retq # sched: [7:1.00]
1681 ; SKYLAKE-LABEL: test_pblendvb:
1683 ; SKYLAKE-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:0.67]
1684 ; SKYLAKE-NEXT: vpblendvb %ymm3, (%rdi), %ymm0, %ymm0 # sched: [8:0.67]
1685 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
1687 ; SKX-LABEL: test_pblendvb:
1689 ; SKX-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:0.67]
1690 ; SKX-NEXT: vpblendvb %ymm3, (%rdi), %ymm0, %ymm0 # sched: [8:0.67]
1691 ; SKX-NEXT: retq # sched: [7:1.00]
1693 ; ZNVER1-LABEL: test_pblendvb:
1695 ; ZNVER1-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
1696 ; ZNVER1-NEXT: vpblendvb %ymm3, (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
1697 ; ZNVER1-NEXT: retq # sched: [1:0.50]
1698 %1 = call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> %a2)
1699 %2 = load <32 x i8>, <32 x i8> *%a3, align 32
1700 %3 = call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %1, <32 x i8> %2, <32 x i8> %a4)
1703 declare <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8>, <32 x i8>, <32 x i8>) nounwind readnone
1705 define <16 x i16> @test_pblendw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
1706 ; GENERIC-LABEL: test_pblendw:
1708 ; GENERIC-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4],ymm0[5,6,7,8,9],ymm1[10,11,12],ymm0[13,14,15] sched: [1:0.50]
1709 ; GENERIC-NEXT: vpblendw {{.*#+}} ymm0 = mem[0],ymm0[1],mem[2],ymm0[3],mem[4],ymm0[5],mem[6],ymm0[7],mem[8],ymm0[9],mem[10],ymm0[11],mem[12],ymm0[13],mem[14],ymm0[15] sched: [5:0.50]
1710 ; GENERIC-NEXT: retq # sched: [1:1.00]
1712 ; HASWELL-LABEL: test_pblendw:
1714 ; HASWELL-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4],ymm0[5,6,7,8,9],ymm1[10,11,12],ymm0[13,14,15] sched: [1:1.00]
1715 ; HASWELL-NEXT: vpblendw {{.*#+}} ymm0 = mem[0],ymm0[1],mem[2],ymm0[3],mem[4],ymm0[5],mem[6],ymm0[7],mem[8],ymm0[9],mem[10],ymm0[11],mem[12],ymm0[13],mem[14],ymm0[15] sched: [4:1.00]
1716 ; HASWELL-NEXT: retq # sched: [2:1.00]
1718 ; BROADWELL-LABEL: test_pblendw:
1719 ; BROADWELL: # BB#0:
1720 ; BROADWELL-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4],ymm0[5,6,7,8,9],ymm1[10,11,12],ymm0[13,14,15] sched: [1:1.00]
1721 ; BROADWELL-NEXT: vpblendw {{.*#+}} ymm0 = mem[0],ymm0[1],mem[2],ymm0[3],mem[4],ymm0[5],mem[6],ymm0[7],mem[8],ymm0[9],mem[10],ymm0[11],mem[12],ymm0[13],mem[14],ymm0[15] sched: [7:1.00]
1722 ; BROADWELL-NEXT: retq # sched: [7:1.00]
1724 ; SKYLAKE-LABEL: test_pblendw:
1726 ; SKYLAKE-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4],ymm0[5,6,7,8,9],ymm1[10,11,12],ymm0[13,14,15] sched: [1:1.00]
1727 ; SKYLAKE-NEXT: vpblendw {{.*#+}} ymm0 = mem[0],ymm0[1],mem[2],ymm0[3],mem[4],ymm0[5],mem[6],ymm0[7],mem[8],ymm0[9],mem[10],ymm0[11],mem[12],ymm0[13],mem[14],ymm0[15] sched: [8:1.00]
1728 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
1730 ; SKX-LABEL: test_pblendw:
1732 ; SKX-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4],ymm0[5,6,7,8,9],ymm1[10,11,12],ymm0[13,14,15] sched: [1:1.00]
1733 ; SKX-NEXT: vpblendw {{.*#+}} ymm0 = mem[0],ymm0[1],mem[2],ymm0[3],mem[4],ymm0[5],mem[6],ymm0[7],mem[8],ymm0[9],mem[10],ymm0[11],mem[12],ymm0[13],mem[14],ymm0[15] sched: [8:1.00]
1734 ; SKX-NEXT: retq # sched: [7:1.00]
1736 ; ZNVER1-LABEL: test_pblendw:
1738 ; ZNVER1-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4],ymm0[5,6,7,8,9],ymm1[10,11,12],ymm0[13,14,15] sched: [2:0.33]
1739 ; ZNVER1-NEXT: vpblendw {{.*#+}} ymm0 = mem[0],ymm0[1],mem[2],ymm0[3],mem[4],ymm0[5],mem[6],ymm0[7],mem[8],ymm0[9],mem[10],ymm0[11],mem[12],ymm0[13],mem[14],ymm0[15] sched: [9:0.50]
1740 ; ZNVER1-NEXT: retq # sched: [1:0.50]
1741 %1 = shufflevector <16 x i16> %a0, <16 x i16> %a1, <16 x i32> <i32 0, i32 1, i32 18, i32 19, i32 20, i32 5, i32 6, i32 7, i32 8, i32 9, i32 26, i32 27, i32 28, i32 13, i32 14, i32 15>
1742 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
1743 %3 = shufflevector <16 x i16> %1, <16 x i16> %2, <16 x i32> <i32 16, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 24, i32 9, i32 26, i32 11, i32 28, i32 13, i32 30, i32 15>
1747 define <16 x i8> @test_pbroadcastb(<16 x i8> %a0, <16 x i8> *%a1) {
1748 ; GENERIC-LABEL: test_pbroadcastb:
1750 ; GENERIC-NEXT: vpbroadcastb %xmm0, %xmm0 # sched: [1:1.00]
1751 ; GENERIC-NEXT: vpbroadcastb (%rdi), %xmm1 # sched: [4:0.50]
1752 ; GENERIC-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1753 ; GENERIC-NEXT: retq # sched: [1:1.00]
1755 ; HASWELL-LABEL: test_pbroadcastb:
1757 ; HASWELL-NEXT: vpbroadcastb %xmm0, %xmm0 # sched: [3:1.00]
1758 ; HASWELL-NEXT: vpbroadcastb (%rdi), %xmm1 # sched: [4:1.00]
1759 ; HASWELL-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1760 ; HASWELL-NEXT: retq # sched: [2:1.00]
1762 ; BROADWELL-LABEL: test_pbroadcastb:
1763 ; BROADWELL: # BB#0:
1764 ; BROADWELL-NEXT: vpbroadcastb %xmm0, %xmm0 # sched: [3:1.00]
1765 ; BROADWELL-NEXT: vpbroadcastb (%rdi), %xmm1 # sched: [9:1.00]
1766 ; BROADWELL-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1767 ; BROADWELL-NEXT: retq # sched: [7:1.00]
1769 ; SKYLAKE-LABEL: test_pbroadcastb:
1771 ; SKYLAKE-NEXT: vpbroadcastb %xmm0, %xmm0 # sched: [3:1.00]
1772 ; SKYLAKE-NEXT: vpbroadcastb (%rdi), %xmm1 # sched: [7:1.00]
1773 ; SKYLAKE-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
1774 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
1776 ; SKX-LABEL: test_pbroadcastb:
1778 ; SKX-NEXT: vpbroadcastb %xmm0, %xmm0 # sched: [3:1.00]
1779 ; SKX-NEXT: vpbroadcastb (%rdi), %xmm1 # sched: [7:1.00]
1780 ; SKX-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
1781 ; SKX-NEXT: retq # sched: [7:1.00]
1783 ; ZNVER1-LABEL: test_pbroadcastb:
1785 ; ZNVER1-NEXT: vpbroadcastb (%rdi), %xmm1 # sched: [8:1.00]
1786 ; ZNVER1-NEXT: vpbroadcastb %xmm0, %xmm0 # sched: [1:0.25]
1787 ; ZNVER1-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
1788 ; ZNVER1-NEXT: retq # sched: [1:0.50]
1789 %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <16 x i32> zeroinitializer
1790 %2 = load <16 x i8>, <16 x i8> *%a1, align 16
1791 %3 = shufflevector <16 x i8> %2, <16 x i8> undef, <16 x i32> zeroinitializer
1792 %4 = add <16 x i8> %1, %3
1796 define <32 x i8> @test_pbroadcastb_ymm(<32 x i8> %a0, <32 x i8> *%a1) {
1797 ; GENERIC-LABEL: test_pbroadcastb_ymm:
1799 ; GENERIC-NEXT: vpbroadcastb %xmm0, %ymm0 # sched: [1:1.00]
1800 ; GENERIC-NEXT: vpbroadcastb (%rdi), %ymm1 # sched: [4:0.50]
1801 ; GENERIC-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
1802 ; GENERIC-NEXT: retq # sched: [1:1.00]
1804 ; HASWELL-LABEL: test_pbroadcastb_ymm:
1806 ; HASWELL-NEXT: vpbroadcastb %xmm0, %ymm0 # sched: [3:1.00]
1807 ; HASWELL-NEXT: vpbroadcastb (%rdi), %ymm1 # sched: [4:1.00]
1808 ; HASWELL-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1809 ; HASWELL-NEXT: retq # sched: [2:1.00]
1811 ; BROADWELL-LABEL: test_pbroadcastb_ymm:
1812 ; BROADWELL: # BB#0:
1813 ; BROADWELL-NEXT: vpbroadcastb %xmm0, %ymm0 # sched: [3:1.00]
1814 ; BROADWELL-NEXT: vpbroadcastb (%rdi), %ymm1 # sched: [9:1.00]
1815 ; BROADWELL-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1816 ; BROADWELL-NEXT: retq # sched: [7:1.00]
1818 ; SKYLAKE-LABEL: test_pbroadcastb_ymm:
1820 ; SKYLAKE-NEXT: vpbroadcastb %xmm0, %ymm0 # sched: [3:1.00]
1821 ; SKYLAKE-NEXT: vpbroadcastb (%rdi), %ymm1 # sched: [8:1.00]
1822 ; SKYLAKE-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1823 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
1825 ; SKX-LABEL: test_pbroadcastb_ymm:
1827 ; SKX-NEXT: vpbroadcastb %xmm0, %ymm0 # sched: [3:1.00]
1828 ; SKX-NEXT: vpbroadcastb (%rdi), %ymm1 # sched: [8:1.00]
1829 ; SKX-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1830 ; SKX-NEXT: retq # sched: [7:1.00]
1832 ; ZNVER1-LABEL: test_pbroadcastb_ymm:
1834 ; ZNVER1-NEXT: vpbroadcastb (%rdi), %ymm1 # sched: [8:2.00]
1835 ; ZNVER1-NEXT: vpbroadcastb %xmm0, %ymm0 # sched: [2:0.25]
1836 ; ZNVER1-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
1837 ; ZNVER1-NEXT: retq # sched: [1:0.50]
1838 %1 = shufflevector <32 x i8> %a0, <32 x i8> undef, <32 x i32> zeroinitializer
1839 %2 = load <32 x i8>, <32 x i8> *%a1, align 32
1840 %3 = shufflevector <32 x i8> %2, <32 x i8> undef, <32 x i32> zeroinitializer
1841 %4 = add <32 x i8> %1, %3
1845 define <4 x i32> @test_pbroadcastd(<4 x i32> %a0, <4 x i32> *%a1) {
1846 ; GENERIC-LABEL: test_pbroadcastd:
1848 ; GENERIC-NEXT: vpbroadcastd %xmm0, %xmm0 # sched: [1:1.00]
1849 ; GENERIC-NEXT: vpbroadcastd (%rdi), %xmm1 # sched: [4:0.50]
1850 ; GENERIC-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1851 ; GENERIC-NEXT: retq # sched: [1:1.00]
1853 ; HASWELL-LABEL: test_pbroadcastd:
1855 ; HASWELL-NEXT: vpbroadcastd %xmm0, %xmm0 # sched: [1:1.00]
1856 ; HASWELL-NEXT: vpbroadcastd (%rdi), %xmm1 # sched: [1:0.50]
1857 ; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1858 ; HASWELL-NEXT: retq # sched: [2:1.00]
1860 ; BROADWELL-LABEL: test_pbroadcastd:
1861 ; BROADWELL: # BB#0:
1862 ; BROADWELL-NEXT: vpbroadcastd %xmm0, %xmm0 # sched: [1:1.00]
1863 ; BROADWELL-NEXT: vpbroadcastd (%rdi), %xmm1 # sched: [5:0.50]
1864 ; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1865 ; BROADWELL-NEXT: retq # sched: [7:1.00]
1867 ; SKYLAKE-LABEL: test_pbroadcastd:
1869 ; SKYLAKE-NEXT: vpbroadcastd %xmm0, %xmm0 # sched: [1:1.00]
1870 ; SKYLAKE-NEXT: vpbroadcastd (%rdi), %xmm1 # sched: [6:0.50]
1871 ; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
1872 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
1874 ; SKX-LABEL: test_pbroadcastd:
1876 ; SKX-NEXT: vpbroadcastd %xmm0, %xmm0 # sched: [1:1.00]
1877 ; SKX-NEXT: vpaddd (%rdi){1to4}, %xmm0, %xmm0 # sched: [7:0.50]
1878 ; SKX-NEXT: retq # sched: [7:1.00]
1880 ; ZNVER1-LABEL: test_pbroadcastd:
1882 ; ZNVER1-NEXT: vpbroadcastd (%rdi), %xmm1 # sched: [8:0.50]
1883 ; ZNVER1-NEXT: vpbroadcastd %xmm0, %xmm0 # sched: [1:0.25]
1884 ; ZNVER1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
1885 ; ZNVER1-NEXT: retq # sched: [1:0.50]
1886 %1 = shufflevector <4 x i32> %a0, <4 x i32> undef, <4 x i32> zeroinitializer
1887 %2 = load <4 x i32>, <4 x i32> *%a1, align 16
1888 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> zeroinitializer
1889 %4 = add <4 x i32> %1, %3
1893 define <8 x i32> @test_pbroadcastd_ymm(<8 x i32> %a0, <8 x i32> *%a1) {
1894 ; GENERIC-LABEL: test_pbroadcastd_ymm:
1896 ; GENERIC-NEXT: vpbroadcastd %xmm0, %ymm0 # sched: [1:1.00]
1897 ; GENERIC-NEXT: vpbroadcastd (%rdi), %ymm1 # sched: [4:0.50]
1898 ; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
1899 ; GENERIC-NEXT: retq # sched: [1:1.00]
1901 ; HASWELL-LABEL: test_pbroadcastd_ymm:
1903 ; HASWELL-NEXT: vpbroadcastd %xmm0, %ymm0 # sched: [3:1.00]
1904 ; HASWELL-NEXT: vpbroadcastd (%rdi), %ymm1 # sched: [1:0.50]
1905 ; HASWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1906 ; HASWELL-NEXT: retq # sched: [2:1.00]
1908 ; BROADWELL-LABEL: test_pbroadcastd_ymm:
1909 ; BROADWELL: # BB#0:
1910 ; BROADWELL-NEXT: vpbroadcastd %xmm0, %ymm0 # sched: [3:1.00]
1911 ; BROADWELL-NEXT: vpbroadcastd (%rdi), %ymm1 # sched: [6:0.50]
1912 ; BROADWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1913 ; BROADWELL-NEXT: retq # sched: [7:1.00]
1915 ; SKYLAKE-LABEL: test_pbroadcastd_ymm:
1917 ; SKYLAKE-NEXT: vpbroadcastd %xmm0, %ymm0 # sched: [3:1.00]
1918 ; SKYLAKE-NEXT: vpbroadcastd (%rdi), %ymm1 # sched: [7:0.50]
1919 ; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1920 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
1922 ; SKX-LABEL: test_pbroadcastd_ymm:
1924 ; SKX-NEXT: vpbroadcastd %xmm0, %ymm0 # sched: [3:1.00]
1925 ; SKX-NEXT: vpaddd (%rdi){1to8}, %ymm0, %ymm0 # sched: [8:0.50]
1926 ; SKX-NEXT: retq # sched: [7:1.00]
1928 ; ZNVER1-LABEL: test_pbroadcastd_ymm:
1930 ; ZNVER1-NEXT: vpbroadcastd (%rdi), %ymm1 # sched: [8:0.50]
1931 ; ZNVER1-NEXT: vpbroadcastd %xmm0, %ymm0 # sched: [2:0.25]
1932 ; ZNVER1-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
1933 ; ZNVER1-NEXT: retq # sched: [1:0.50]
1934 %1 = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> zeroinitializer
1935 %2 = load <8 x i32>, <8 x i32> *%a1, align 32
1936 %3 = shufflevector <8 x i32> %2, <8 x i32> undef, <8 x i32> zeroinitializer
1937 %4 = add <8 x i32> %1, %3
1941 define <2 x i64> @test_pbroadcastq(<2 x i64> %a0, <2 x i64> *%a1) {
1942 ; GENERIC-LABEL: test_pbroadcastq:
1944 ; GENERIC-NEXT: vpbroadcastq %xmm0, %xmm0 # sched: [1:1.00]
1945 ; GENERIC-NEXT: vpbroadcastq (%rdi), %xmm1 # sched: [4:0.50]
1946 ; GENERIC-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1947 ; GENERIC-NEXT: retq # sched: [1:1.00]
1949 ; HASWELL-LABEL: test_pbroadcastq:
1951 ; HASWELL-NEXT: vpbroadcastq %xmm0, %xmm0 # sched: [1:1.00]
1952 ; HASWELL-NEXT: vpbroadcastq (%rdi), %xmm1 # sched: [1:0.50]
1953 ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1954 ; HASWELL-NEXT: retq # sched: [2:1.00]
1956 ; BROADWELL-LABEL: test_pbroadcastq:
1957 ; BROADWELL: # BB#0:
1958 ; BROADWELL-NEXT: vpbroadcastq %xmm0, %xmm0 # sched: [1:1.00]
1959 ; BROADWELL-NEXT: vpbroadcastq (%rdi), %xmm1 # sched: [5:0.50]
1960 ; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1961 ; BROADWELL-NEXT: retq # sched: [7:1.00]
1963 ; SKYLAKE-LABEL: test_pbroadcastq:
1965 ; SKYLAKE-NEXT: vpbroadcastq %xmm0, %xmm0 # sched: [1:1.00]
1966 ; SKYLAKE-NEXT: vpbroadcastq (%rdi), %xmm1 # sched: [6:0.50]
1967 ; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
1968 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
1970 ; SKX-LABEL: test_pbroadcastq:
1972 ; SKX-NEXT: vpbroadcastq %xmm0, %xmm0 # sched: [1:1.00]
1973 ; SKX-NEXT: vpaddq (%rdi){1to2}, %xmm0, %xmm0 # sched: [7:0.50]
1974 ; SKX-NEXT: retq # sched: [7:1.00]
1976 ; ZNVER1-LABEL: test_pbroadcastq:
1978 ; ZNVER1-NEXT: vpbroadcastq (%rdi), %xmm1 # sched: [8:0.50]
1979 ; ZNVER1-NEXT: vpbroadcastq %xmm0, %xmm0 # sched: [1:0.25]
1980 ; ZNVER1-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
1981 ; ZNVER1-NEXT: retq # sched: [1:0.50]
1982 %1 = shufflevector <2 x i64> %a0, <2 x i64> undef, <2 x i32> zeroinitializer
1983 %2 = load <2 x i64>, <2 x i64> *%a1, align 16
1984 %3 = shufflevector <2 x i64> %2, <2 x i64> undef, <2 x i32> zeroinitializer
1985 %4 = add <2 x i64> %1, %3
1989 define <4 x i64> @test_pbroadcastq_ymm(<4 x i64> %a0, <4 x i64> *%a1) {
1990 ; GENERIC-LABEL: test_pbroadcastq_ymm:
1992 ; GENERIC-NEXT: vpbroadcastq %xmm0, %ymm0 # sched: [1:1.00]
1993 ; GENERIC-NEXT: vpbroadcastq (%rdi), %ymm1 # sched: [4:0.50]
1994 ; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
1995 ; GENERIC-NEXT: retq # sched: [1:1.00]
1997 ; HASWELL-LABEL: test_pbroadcastq_ymm:
1999 ; HASWELL-NEXT: vpbroadcastq %xmm0, %ymm0 # sched: [3:1.00]
2000 ; HASWELL-NEXT: vpbroadcastq (%rdi), %ymm1 # sched: [1:0.50]
2001 ; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2002 ; HASWELL-NEXT: retq # sched: [2:1.00]
2004 ; BROADWELL-LABEL: test_pbroadcastq_ymm:
2005 ; BROADWELL: # BB#0:
2006 ; BROADWELL-NEXT: vpbroadcastq %xmm0, %ymm0 # sched: [3:1.00]
2007 ; BROADWELL-NEXT: vpbroadcastq (%rdi), %ymm1 # sched: [6:0.50]
2008 ; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2009 ; BROADWELL-NEXT: retq # sched: [7:1.00]
2011 ; SKYLAKE-LABEL: test_pbroadcastq_ymm:
2013 ; SKYLAKE-NEXT: vpbroadcastq %xmm0, %ymm0 # sched: [3:1.00]
2014 ; SKYLAKE-NEXT: vpbroadcastq (%rdi), %ymm1 # sched: [7:0.50]
2015 ; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
2016 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
2018 ; SKX-LABEL: test_pbroadcastq_ymm:
2020 ; SKX-NEXT: vpbroadcastq %xmm0, %ymm0 # sched: [3:1.00]
2021 ; SKX-NEXT: vpaddq (%rdi){1to4}, %ymm0, %ymm0 # sched: [8:0.50]
2022 ; SKX-NEXT: retq # sched: [7:1.00]
2024 ; ZNVER1-LABEL: test_pbroadcastq_ymm:
2026 ; ZNVER1-NEXT: vpbroadcastq (%rdi), %ymm1 # sched: [8:0.50]
2027 ; ZNVER1-NEXT: vpbroadcastq %xmm0, %ymm0 # sched: [2:0.25]
2028 ; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
2029 ; ZNVER1-NEXT: retq # sched: [1:0.50]
2030 %1 = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32> zeroinitializer
2031 %2 = load <4 x i64>, <4 x i64> *%a1, align 32
2032 %3 = shufflevector <4 x i64> %2, <4 x i64> undef, <4 x i32> zeroinitializer
2033 %4 = add <4 x i64> %1, %3
2037 define <8 x i16> @test_pbroadcastw(<8 x i16> %a0, <8 x i16> *%a1) {
2038 ; GENERIC-LABEL: test_pbroadcastw:
2040 ; GENERIC-NEXT: vpbroadcastw %xmm0, %xmm0 # sched: [1:1.00]
2041 ; GENERIC-NEXT: vpbroadcastw (%rdi), %xmm1 # sched: [4:0.50]
2042 ; GENERIC-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
2043 ; GENERIC-NEXT: retq # sched: [1:1.00]
2045 ; HASWELL-LABEL: test_pbroadcastw:
2047 ; HASWELL-NEXT: vpbroadcastw %xmm0, %xmm0 # sched: [3:1.00]
2048 ; HASWELL-NEXT: vpbroadcastw (%rdi), %xmm1 # sched: [4:1.00]
2049 ; HASWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
2050 ; HASWELL-NEXT: retq # sched: [2:1.00]
2052 ; BROADWELL-LABEL: test_pbroadcastw:
2053 ; BROADWELL: # BB#0:
2054 ; BROADWELL-NEXT: vpbroadcastw %xmm0, %xmm0 # sched: [3:1.00]
2055 ; BROADWELL-NEXT: vpbroadcastw (%rdi), %xmm1 # sched: [9:1.00]
2056 ; BROADWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
2057 ; BROADWELL-NEXT: retq # sched: [7:1.00]
2059 ; SKYLAKE-LABEL: test_pbroadcastw:
2061 ; SKYLAKE-NEXT: vpbroadcastw %xmm0, %xmm0 # sched: [3:1.00]
2062 ; SKYLAKE-NEXT: vpbroadcastw (%rdi), %xmm1 # sched: [7:1.00]
2063 ; SKYLAKE-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
2064 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
2066 ; SKX-LABEL: test_pbroadcastw:
2068 ; SKX-NEXT: vpbroadcastw %xmm0, %xmm0 # sched: [3:1.00]
2069 ; SKX-NEXT: vpbroadcastw (%rdi), %xmm1 # sched: [7:1.00]
2070 ; SKX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
2071 ; SKX-NEXT: retq # sched: [7:1.00]
2073 ; ZNVER1-LABEL: test_pbroadcastw:
2075 ; ZNVER1-NEXT: vpbroadcastw (%rdi), %xmm1 # sched: [8:1.00]
2076 ; ZNVER1-NEXT: vpbroadcastw %xmm0, %xmm0 # sched: [1:0.25]
2077 ; ZNVER1-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
2078 ; ZNVER1-NEXT: retq # sched: [1:0.50]
2079 %1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> zeroinitializer
2080 %2 = load <8 x i16>, <8 x i16> *%a1, align 16
2081 %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> zeroinitializer
2082 %4 = add <8 x i16> %1, %3
2086 define <16 x i16> @test_pbroadcastw_ymm(<16 x i16> %a0, <16 x i16> *%a1) {
2087 ; GENERIC-LABEL: test_pbroadcastw_ymm:
2089 ; GENERIC-NEXT: vpbroadcastw %xmm0, %ymm0 # sched: [1:1.00]
2090 ; GENERIC-NEXT: vpbroadcastw (%rdi), %ymm1 # sched: [4:0.50]
2091 ; GENERIC-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
2092 ; GENERIC-NEXT: retq # sched: [1:1.00]
2094 ; HASWELL-LABEL: test_pbroadcastw_ymm:
2096 ; HASWELL-NEXT: vpbroadcastw %xmm0, %ymm0 # sched: [3:1.00]
2097 ; HASWELL-NEXT: vpbroadcastw (%rdi), %ymm1 # sched: [4:1.00]
2098 ; HASWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2099 ; HASWELL-NEXT: retq # sched: [2:1.00]
2101 ; BROADWELL-LABEL: test_pbroadcastw_ymm:
2102 ; BROADWELL: # BB#0:
2103 ; BROADWELL-NEXT: vpbroadcastw %xmm0, %ymm0 # sched: [3:1.00]
2104 ; BROADWELL-NEXT: vpbroadcastw (%rdi), %ymm1 # sched: [9:1.00]
2105 ; BROADWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2106 ; BROADWELL-NEXT: retq # sched: [7:1.00]
2108 ; SKYLAKE-LABEL: test_pbroadcastw_ymm:
2110 ; SKYLAKE-NEXT: vpbroadcastw %xmm0, %ymm0 # sched: [3:1.00]
2111 ; SKYLAKE-NEXT: vpbroadcastw (%rdi), %ymm1 # sched: [8:1.00]
2112 ; SKYLAKE-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
2113 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
2115 ; SKX-LABEL: test_pbroadcastw_ymm:
2117 ; SKX-NEXT: vpbroadcastw %xmm0, %ymm0 # sched: [3:1.00]
2118 ; SKX-NEXT: vpbroadcastw (%rdi), %ymm1 # sched: [8:1.00]
2119 ; SKX-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
2120 ; SKX-NEXT: retq # sched: [7:1.00]
2122 ; ZNVER1-LABEL: test_pbroadcastw_ymm:
2124 ; ZNVER1-NEXT: vpbroadcastw (%rdi), %ymm1 # sched: [8:2.00]
2125 ; ZNVER1-NEXT: vpbroadcastw %xmm0, %ymm0 # sched: [2:0.25]
2126 ; ZNVER1-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
2127 ; ZNVER1-NEXT: retq # sched: [1:0.50]
2128 %1 = shufflevector <16 x i16> %a0, <16 x i16> undef, <16 x i32> zeroinitializer
2129 %2 = load <16 x i16>, <16 x i16> *%a1, align 32
2130 %3 = shufflevector <16 x i16> %2, <16 x i16> undef, <16 x i32> zeroinitializer
2131 %4 = add <16 x i16> %1, %3
2135 define <32 x i8> @test_pcmpeqb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
2136 ; GENERIC-LABEL: test_pcmpeqb:
2138 ; GENERIC-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
2139 ; GENERIC-NEXT: vpcmpeqb (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
2140 ; GENERIC-NEXT: retq # sched: [1:1.00]
2142 ; HASWELL-LABEL: test_pcmpeqb:
2144 ; HASWELL-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2145 ; HASWELL-NEXT: vpcmpeqb (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
2146 ; HASWELL-NEXT: retq # sched: [2:1.00]
2148 ; BROADWELL-LABEL: test_pcmpeqb:
2149 ; BROADWELL: # BB#0:
2150 ; BROADWELL-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2151 ; BROADWELL-NEXT: vpcmpeqb (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
2152 ; BROADWELL-NEXT: retq # sched: [7:1.00]
2154 ; SKYLAKE-LABEL: test_pcmpeqb:
2156 ; SKYLAKE-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2157 ; SKYLAKE-NEXT: vpcmpeqb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2158 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
2160 ; SKX-LABEL: test_pcmpeqb:
2162 ; SKX-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 # sched: [3:1.00]
2163 ; SKX-NEXT: vpmovm2b %k0, %ymm0
2164 ; SKX-NEXT: vpcmpeqb (%rdi), %ymm0, %k0 # sched: [10:1.00]
2165 ; SKX-NEXT: vpmovm2b %k0, %ymm0
2166 ; SKX-NEXT: retq # sched: [7:1.00]
2168 ; ZNVER1-LABEL: test_pcmpeqb:
2170 ; ZNVER1-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
2171 ; ZNVER1-NEXT: vpcmpeqb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2172 ; ZNVER1-NEXT: retq # sched: [1:0.50]
2173 %1 = icmp eq <32 x i8> %a0, %a1
2174 %2 = sext <32 x i1> %1 to <32 x i8>
2175 %3 = load <32 x i8>, <32 x i8> *%a2, align 32
2176 %4 = icmp eq <32 x i8> %2, %3
2177 %5 = sext <32 x i1> %4 to <32 x i8>
2181 define <8 x i32> @test_pcmpeqd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
2182 ; GENERIC-LABEL: test_pcmpeqd:
2184 ; GENERIC-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
2185 ; GENERIC-NEXT: vpcmpeqd (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
2186 ; GENERIC-NEXT: retq # sched: [1:1.00]
2188 ; HASWELL-LABEL: test_pcmpeqd:
2190 ; HASWELL-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2191 ; HASWELL-NEXT: vpcmpeqd (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
2192 ; HASWELL-NEXT: retq # sched: [2:1.00]
2194 ; BROADWELL-LABEL: test_pcmpeqd:
2195 ; BROADWELL: # BB#0:
2196 ; BROADWELL-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2197 ; BROADWELL-NEXT: vpcmpeqd (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
2198 ; BROADWELL-NEXT: retq # sched: [7:1.00]
2200 ; SKYLAKE-LABEL: test_pcmpeqd:
2202 ; SKYLAKE-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2203 ; SKYLAKE-NEXT: vpcmpeqd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2204 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
2206 ; SKX-LABEL: test_pcmpeqd:
2208 ; SKX-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 # sched: [3:1.00]
2209 ; SKX-NEXT: vpmovm2d %k0, %ymm0
2210 ; SKX-NEXT: vpcmpeqd (%rdi), %ymm0, %k0 # sched: [10:1.00]
2211 ; SKX-NEXT: vpmovm2d %k0, %ymm0
2212 ; SKX-NEXT: retq # sched: [7:1.00]
2214 ; ZNVER1-LABEL: test_pcmpeqd:
2216 ; ZNVER1-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
2217 ; ZNVER1-NEXT: vpcmpeqd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2218 ; ZNVER1-NEXT: retq # sched: [1:0.50]
2219 %1 = icmp eq <8 x i32> %a0, %a1
2220 %2 = sext <8 x i1> %1 to <8 x i32>
2221 %3 = load <8 x i32>, <8 x i32> *%a2, align 32
2222 %4 = icmp eq <8 x i32> %2, %3
2223 %5 = sext <8 x i1> %4 to <8 x i32>
2227 define <4 x i64> @test_pcmpeqq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
2228 ; GENERIC-LABEL: test_pcmpeqq:
2230 ; GENERIC-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
2231 ; GENERIC-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
2232 ; GENERIC-NEXT: retq # sched: [1:1.00]
2234 ; HASWELL-LABEL: test_pcmpeqq:
2236 ; HASWELL-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2237 ; HASWELL-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
2238 ; HASWELL-NEXT: retq # sched: [2:1.00]
2240 ; BROADWELL-LABEL: test_pcmpeqq:
2241 ; BROADWELL: # BB#0:
2242 ; BROADWELL-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2243 ; BROADWELL-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
2244 ; BROADWELL-NEXT: retq # sched: [7:1.00]
2246 ; SKYLAKE-LABEL: test_pcmpeqq:
2248 ; SKYLAKE-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2249 ; SKYLAKE-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2250 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
2252 ; SKX-LABEL: test_pcmpeqq:
2254 ; SKX-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 # sched: [3:1.00]
2255 ; SKX-NEXT: vpmovm2q %k0, %ymm0
2256 ; SKX-NEXT: vpcmpeqq (%rdi), %ymm0, %k0 # sched: [10:1.00]
2257 ; SKX-NEXT: vpmovm2q %k0, %ymm0
2258 ; SKX-NEXT: retq # sched: [7:1.00]
2260 ; ZNVER1-LABEL: test_pcmpeqq:
2262 ; ZNVER1-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
2263 ; ZNVER1-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2264 ; ZNVER1-NEXT: retq # sched: [1:0.50]
2265 %1 = icmp eq <4 x i64> %a0, %a1
2266 %2 = sext <4 x i1> %1 to <4 x i64>
2267 %3 = load <4 x i64>, <4 x i64> *%a2, align 32
2268 %4 = icmp eq <4 x i64> %2, %3
2269 %5 = sext <4 x i1> %4 to <4 x i64>
2273 define <16 x i16> @test_pcmpeqw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
2274 ; GENERIC-LABEL: test_pcmpeqw:
2276 ; GENERIC-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
2277 ; GENERIC-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
2278 ; GENERIC-NEXT: retq # sched: [1:1.00]
2280 ; HASWELL-LABEL: test_pcmpeqw:
2282 ; HASWELL-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2283 ; HASWELL-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
2284 ; HASWELL-NEXT: retq # sched: [2:1.00]
2286 ; BROADWELL-LABEL: test_pcmpeqw:
2287 ; BROADWELL: # BB#0:
2288 ; BROADWELL-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2289 ; BROADWELL-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
2290 ; BROADWELL-NEXT: retq # sched: [7:1.00]
2292 ; SKYLAKE-LABEL: test_pcmpeqw:
2294 ; SKYLAKE-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2295 ; SKYLAKE-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2296 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
2298 ; SKX-LABEL: test_pcmpeqw:
2300 ; SKX-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 # sched: [3:1.00]
2301 ; SKX-NEXT: vpmovm2w %k0, %ymm0
2302 ; SKX-NEXT: vpcmpeqw (%rdi), %ymm0, %k0 # sched: [10:1.00]
2303 ; SKX-NEXT: vpmovm2w %k0, %ymm0
2304 ; SKX-NEXT: retq # sched: [7:1.00]
2306 ; ZNVER1-LABEL: test_pcmpeqw:
2308 ; ZNVER1-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
2309 ; ZNVER1-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2310 ; ZNVER1-NEXT: retq # sched: [1:0.50]
2311 %1 = icmp eq <16 x i16> %a0, %a1
2312 %2 = sext <16 x i1> %1 to <16 x i16>
2313 %3 = load <16 x i16>, <16 x i16> *%a2, align 32
2314 %4 = icmp eq <16 x i16> %2, %3
2315 %5 = sext <16 x i1> %4 to <16 x i16>
2319 define <32 x i8> @test_pcmpgtb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
2320 ; GENERIC-LABEL: test_pcmpgtb:
2322 ; GENERIC-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
2323 ; GENERIC-NEXT: vpcmpgtb (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
2324 ; GENERIC-NEXT: retq # sched: [1:1.00]
2326 ; HASWELL-LABEL: test_pcmpgtb:
2328 ; HASWELL-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2329 ; HASWELL-NEXT: vpcmpgtb (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
2330 ; HASWELL-NEXT: retq # sched: [2:1.00]
2332 ; BROADWELL-LABEL: test_pcmpgtb:
2333 ; BROADWELL: # BB#0:
2334 ; BROADWELL-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2335 ; BROADWELL-NEXT: vpcmpgtb (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
2336 ; BROADWELL-NEXT: retq # sched: [7:1.00]
2338 ; SKYLAKE-LABEL: test_pcmpgtb:
2340 ; SKYLAKE-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2341 ; SKYLAKE-NEXT: vpcmpgtb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2342 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
2344 ; SKX-LABEL: test_pcmpgtb:
2346 ; SKX-NEXT: vpcmpgtb %ymm1, %ymm0, %k0 # sched: [3:1.00]
2347 ; SKX-NEXT: vpmovm2b %k0, %ymm0
2348 ; SKX-NEXT: vpcmpgtb (%rdi), %ymm0, %k0 # sched: [10:1.00]
2349 ; SKX-NEXT: vpmovm2b %k0, %ymm0
2350 ; SKX-NEXT: retq # sched: [7:1.00]
2352 ; ZNVER1-LABEL: test_pcmpgtb:
2354 ; ZNVER1-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
2355 ; ZNVER1-NEXT: vpcmpgtb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2356 ; ZNVER1-NEXT: retq # sched: [1:0.50]
2357 %1 = icmp sgt <32 x i8> %a0, %a1
2358 %2 = sext <32 x i1> %1 to <32 x i8>
2359 %3 = load <32 x i8>, <32 x i8> *%a2, align 32
2360 %4 = icmp sgt <32 x i8> %2, %3
2361 %5 = sext <32 x i1> %4 to <32 x i8>
2365 define <8 x i32> @test_pcmpgtd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
2366 ; GENERIC-LABEL: test_pcmpgtd:
2368 ; GENERIC-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
2369 ; GENERIC-NEXT: vpcmpgtd (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
2370 ; GENERIC-NEXT: retq # sched: [1:1.00]
2372 ; HASWELL-LABEL: test_pcmpgtd:
2374 ; HASWELL-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2375 ; HASWELL-NEXT: vpcmpgtd (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
2376 ; HASWELL-NEXT: retq # sched: [2:1.00]
2378 ; BROADWELL-LABEL: test_pcmpgtd:
2379 ; BROADWELL: # BB#0:
2380 ; BROADWELL-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2381 ; BROADWELL-NEXT: vpcmpgtd (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
2382 ; BROADWELL-NEXT: retq # sched: [7:1.00]
2384 ; SKYLAKE-LABEL: test_pcmpgtd:
2386 ; SKYLAKE-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2387 ; SKYLAKE-NEXT: vpcmpgtd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2388 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
2390 ; SKX-LABEL: test_pcmpgtd:
2392 ; SKX-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 # sched: [3:1.00]
2393 ; SKX-NEXT: vpmovm2d %k0, %ymm0
2394 ; SKX-NEXT: vpcmpgtd (%rdi), %ymm0, %k0 # sched: [10:1.00]
2395 ; SKX-NEXT: vpmovm2d %k0, %ymm0
2396 ; SKX-NEXT: retq # sched: [7:1.00]
2398 ; ZNVER1-LABEL: test_pcmpgtd:
2400 ; ZNVER1-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
2401 ; ZNVER1-NEXT: vpcmpgtd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2402 ; ZNVER1-NEXT: retq # sched: [1:0.50]
2403 %1 = icmp sgt <8 x i32> %a0, %a1
2404 %2 = sext <8 x i1> %1 to <8 x i32>
2405 %3 = load <8 x i32>, <8 x i32> *%a2, align 32
2406 %4 = icmp sgt <8 x i32> %2, %3
2407 %5 = sext <8 x i1> %4 to <8 x i32>
2411 define <4 x i64> @test_pcmpgtq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
2412 ; GENERIC-LABEL: test_pcmpgtq:
2414 ; GENERIC-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
2415 ; GENERIC-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
2416 ; GENERIC-NEXT: retq # sched: [1:1.00]
2418 ; HASWELL-LABEL: test_pcmpgtq:
2420 ; HASWELL-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
2421 ; HASWELL-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
2422 ; HASWELL-NEXT: retq # sched: [2:1.00]
2424 ; BROADWELL-LABEL: test_pcmpgtq:
2425 ; BROADWELL: # BB#0:
2426 ; BROADWELL-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
2427 ; BROADWELL-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
2428 ; BROADWELL-NEXT: retq # sched: [7:1.00]
2430 ; SKYLAKE-LABEL: test_pcmpgtq:
2432 ; SKYLAKE-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
2433 ; SKYLAKE-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
2434 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
2436 ; SKX-LABEL: test_pcmpgtq:
2438 ; SKX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 # sched: [3:1.00]
2439 ; SKX-NEXT: vpmovm2q %k0, %ymm0
2440 ; SKX-NEXT: vpcmpgtq (%rdi), %ymm0, %k0 # sched: [10:1.00]
2441 ; SKX-NEXT: vpmovm2q %k0, %ymm0
2442 ; SKX-NEXT: retq # sched: [7:1.00]
2444 ; ZNVER1-LABEL: test_pcmpgtq:
2446 ; ZNVER1-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2447 ; ZNVER1-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
2448 ; ZNVER1-NEXT: retq # sched: [1:0.50]
2449 %1 = icmp sgt <4 x i64> %a0, %a1
2450 %2 = sext <4 x i1> %1 to <4 x i64>
2451 %3 = load <4 x i64>, <4 x i64> *%a2, align 32
2452 %4 = icmp sgt <4 x i64> %2, %3
2453 %5 = sext <4 x i1> %4 to <4 x i64>
2457 define <16 x i16> @test_pcmpgtw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
2458 ; GENERIC-LABEL: test_pcmpgtw:
2460 ; GENERIC-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
2461 ; GENERIC-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
2462 ; GENERIC-NEXT: retq # sched: [1:1.00]
2464 ; HASWELL-LABEL: test_pcmpgtw:
2466 ; HASWELL-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2467 ; HASWELL-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
2468 ; HASWELL-NEXT: retq # sched: [2:1.00]
2470 ; BROADWELL-LABEL: test_pcmpgtw:
2471 ; BROADWELL: # BB#0:
2472 ; BROADWELL-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2473 ; BROADWELL-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
2474 ; BROADWELL-NEXT: retq # sched: [7:1.00]
2476 ; SKYLAKE-LABEL: test_pcmpgtw:
2478 ; SKYLAKE-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2479 ; SKYLAKE-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2480 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
2482 ; SKX-LABEL: test_pcmpgtw:
2484 ; SKX-NEXT: vpcmpgtw %ymm1, %ymm0, %k0 # sched: [3:1.00]
2485 ; SKX-NEXT: vpmovm2w %k0, %ymm0
2486 ; SKX-NEXT: vpcmpgtw (%rdi), %ymm0, %k0 # sched: [10:1.00]
2487 ; SKX-NEXT: vpmovm2w %k0, %ymm0
2488 ; SKX-NEXT: retq # sched: [7:1.00]
2490 ; ZNVER1-LABEL: test_pcmpgtw:
2492 ; ZNVER1-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
2493 ; ZNVER1-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
2494 ; ZNVER1-NEXT: retq # sched: [1:0.50]
2495 %1 = icmp sgt <16 x i16> %a0, %a1
2496 %2 = sext <16 x i1> %1 to <16 x i16>
2497 %3 = load <16 x i16>, <16 x i16> *%a2, align 32
2498 %4 = icmp sgt <16 x i16> %2, %3
2499 %5 = sext <16 x i1> %4 to <16 x i16>
2503 define <4 x i64> @test_perm2i128(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
2504 ; GENERIC-LABEL: test_perm2i128:
2506 ; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [1:1.00]
2507 ; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [5:1.00]
2508 ; GENERIC-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
2509 ; GENERIC-NEXT: retq # sched: [1:1.00]
2511 ; HASWELL-LABEL: test_perm2i128:
2513 ; HASWELL-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [3:1.00]
2514 ; HASWELL-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [3:1.00]
2515 ; HASWELL-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
2516 ; HASWELL-NEXT: retq # sched: [2:1.00]
2518 ; BROADWELL-LABEL: test_perm2i128:
2519 ; BROADWELL: # BB#0:
2520 ; BROADWELL-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [3:1.00]
2521 ; BROADWELL-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [9:1.00]
2522 ; BROADWELL-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
2523 ; BROADWELL-NEXT: retq # sched: [7:1.00]
2525 ; SKYLAKE-LABEL: test_perm2i128:
2527 ; SKYLAKE-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [3:1.00]
2528 ; SKYLAKE-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [10:1.00]
2529 ; SKYLAKE-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
2530 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
2532 ; SKX-LABEL: test_perm2i128:
2534 ; SKX-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [3:1.00]
2535 ; SKX-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [10:1.00]
2536 ; SKX-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
2537 ; SKX-NEXT: retq # sched: [7:1.00]
2539 ; ZNVER1-LABEL: test_perm2i128:
2541 ; ZNVER1-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [2:0.25]
2542 ; ZNVER1-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [9:0.50]
2543 ; ZNVER1-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.25]
2544 ; ZNVER1-NEXT: retq # sched: [1:0.50]
2545 %1 = shufflevector <4 x i64> %a0, <4 x i64> %a1, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
2546 %2 = load <4 x i64>, <4 x i64> *%a2, align 32
2547 %3 = shufflevector <4 x i64> %a0, <4 x i64> %2, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
2548 %4 = add <4 x i64> %1, %3
2552 define <8 x i32> @test_permd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
2553 ; GENERIC-LABEL: test_permd:
2555 ; GENERIC-NEXT: vpermd %ymm1, %ymm0, %ymm1 # sched: [1:1.00]
2556 ; GENERIC-NEXT: vpermd (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
2557 ; GENERIC-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
2558 ; GENERIC-NEXT: retq # sched: [1:1.00]
2560 ; HASWELL-LABEL: test_permd:
2562 ; HASWELL-NEXT: vpermd %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
2563 ; HASWELL-NEXT: vpermd (%rdi), %ymm0, %ymm0 # sched: [3:1.00]
2564 ; HASWELL-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
2565 ; HASWELL-NEXT: retq # sched: [2:1.00]
2567 ; BROADWELL-LABEL: test_permd:
2568 ; BROADWELL: # BB#0:
2569 ; BROADWELL-NEXT: vpermd %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
2570 ; BROADWELL-NEXT: vpermd (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
2571 ; BROADWELL-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
2572 ; BROADWELL-NEXT: retq # sched: [7:1.00]
2574 ; SKYLAKE-LABEL: test_permd:
2576 ; SKYLAKE-NEXT: vpermd %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
2577 ; SKYLAKE-NEXT: vpermd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
2578 ; SKYLAKE-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
2579 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
2581 ; SKX-LABEL: test_permd:
2583 ; SKX-NEXT: vpermd %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
2584 ; SKX-NEXT: vpermd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
2585 ; SKX-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
2586 ; SKX-NEXT: retq # sched: [7:1.00]
2588 ; ZNVER1-LABEL: test_permd:
2590 ; ZNVER1-NEXT: vpermd %ymm1, %ymm0, %ymm1 # sched: [2:0.25]
2591 ; ZNVER1-NEXT: vpermd (%rdi), %ymm0, %ymm0 # sched: [9:0.50]
2592 ; ZNVER1-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.25]
2593 ; ZNVER1-NEXT: retq # sched: [1:0.50]
2594 %1 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a1, <8 x i32> %a0)
2595 %2 = load <8 x i32>, <8 x i32> *%a2, align 32
2596 %3 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %2, <8 x i32> %a0)
2597 %4 = add <8 x i32> %1, %3
2600 declare <8 x i32> @llvm.x86.avx2.permd(<8 x i32>, <8 x i32>) nounwind readonly
2602 define <4 x double> @test_permpd(<4 x double> %a0, <4 x double> *%a1) {
2603 ; GENERIC-LABEL: test_permpd:
2605 ; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [1:1.00]
2606 ; GENERIC-NEXT: vpermpd {{.*#+}} ymm1 = mem[0,2,2,3] sched: [5:1.00]
2607 ; GENERIC-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
2608 ; GENERIC-NEXT: retq # sched: [1:1.00]
2610 ; HASWELL-LABEL: test_permpd:
2612 ; HASWELL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [3:1.00]
2613 ; HASWELL-NEXT: vpermpd {{.*#+}} ymm1 = mem[0,2,2,3] sched: [3:1.00]
2614 ; HASWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
2615 ; HASWELL-NEXT: retq # sched: [2:1.00]
2617 ; BROADWELL-LABEL: test_permpd:
2618 ; BROADWELL: # BB#0:
2619 ; BROADWELL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [3:1.00]
2620 ; BROADWELL-NEXT: vpermpd {{.*#+}} ymm1 = mem[0,2,2,3] sched: [9:1.00]
2621 ; BROADWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
2622 ; BROADWELL-NEXT: retq # sched: [7:1.00]
2624 ; SKYLAKE-LABEL: test_permpd:
2626 ; SKYLAKE-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [3:1.00]
2627 ; SKYLAKE-NEXT: vpermpd {{.*#+}} ymm1 = mem[0,2,2,3] sched: [10:1.00]
2628 ; SKYLAKE-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
2629 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
2631 ; SKX-LABEL: test_permpd:
2633 ; SKX-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [3:1.00]
2634 ; SKX-NEXT: vpermpd {{.*#+}} ymm1 = mem[0,2,2,3] sched: [10:1.00]
2635 ; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
2636 ; SKX-NEXT: retq # sched: [7:1.00]
2638 ; ZNVER1-LABEL: test_permpd:
2640 ; ZNVER1-NEXT: vpermpd {{.*#+}} ymm1 = mem[0,2,2,3] sched: [107:0.50]
2641 ; ZNVER1-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [100:0.25]
2642 ; ZNVER1-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
2643 ; ZNVER1-NEXT: retq # sched: [1:0.50]
2644 %1 = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 2, i32 3>
2645 %2 = load <4 x double>, <4 x double> *%a1, align 32
2646 %3 = shufflevector <4 x double> %2, <4 x double> undef, <4 x i32> <i32 0, i32 2, i32 2, i32 3>
2647 %4 = fadd <4 x double> %1, %3
2651 define <8 x float> @test_permps(<8 x i32> %a0, <8 x float> %a1, <8 x float> *%a2) {
2652 ; GENERIC-LABEL: test_permps:
2654 ; GENERIC-NEXT: vpermps %ymm1, %ymm0, %ymm1 # sched: [1:1.00]
2655 ; GENERIC-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
2656 ; GENERIC-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
2657 ; GENERIC-NEXT: retq # sched: [1:1.00]
2659 ; HASWELL-LABEL: test_permps:
2661 ; HASWELL-NEXT: vpermps %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
2662 ; HASWELL-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [3:1.00]
2663 ; HASWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
2664 ; HASWELL-NEXT: retq # sched: [2:1.00]
2666 ; BROADWELL-LABEL: test_permps:
2667 ; BROADWELL: # BB#0:
2668 ; BROADWELL-NEXT: vpermps %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
2669 ; BROADWELL-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
2670 ; BROADWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
2671 ; BROADWELL-NEXT: retq # sched: [7:1.00]
2673 ; SKYLAKE-LABEL: test_permps:
2675 ; SKYLAKE-NEXT: vpermps %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
2676 ; SKYLAKE-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
2677 ; SKYLAKE-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
2678 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
2680 ; SKX-LABEL: test_permps:
2682 ; SKX-NEXT: vpermps %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
2683 ; SKX-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
2684 ; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.33]
2685 ; SKX-NEXT: retq # sched: [7:1.00]
2687 ; ZNVER1-LABEL: test_permps:
2689 ; ZNVER1-NEXT: vpermps %ymm1, %ymm0, %ymm1 # sched: [100:0.25]
2690 ; ZNVER1-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [107:0.50]
2691 ; ZNVER1-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
2692 ; ZNVER1-NEXT: retq # sched: [1:0.50]
2693 %1 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a1, <8 x i32> %a0)
2694 %2 = load <8 x float>, <8 x float> *%a2, align 32
2695 %3 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %2, <8 x i32> %a0)
2696 %4 = fadd <8 x float> %1, %3
2699 declare <8 x float> @llvm.x86.avx2.permps(<8 x float>, <8 x i32>) nounwind readonly
2701 define <4 x i64> @test_permq(<4 x i64> %a0, <4 x i64> *%a1) {
2702 ; GENERIC-LABEL: test_permq:
2704 ; GENERIC-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [1:1.00]
2705 ; GENERIC-NEXT: vpermq {{.*#+}} ymm1 = mem[0,2,2,3] sched: [5:1.00]
2706 ; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
2707 ; GENERIC-NEXT: retq # sched: [1:1.00]
2709 ; HASWELL-LABEL: test_permq:
2711 ; HASWELL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [3:1.00]
2712 ; HASWELL-NEXT: vpermq {{.*#+}} ymm1 = mem[0,2,2,3] sched: [3:1.00]
2713 ; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2714 ; HASWELL-NEXT: retq # sched: [2:1.00]
2716 ; BROADWELL-LABEL: test_permq:
2717 ; BROADWELL: # BB#0:
2718 ; BROADWELL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [3:1.00]
2719 ; BROADWELL-NEXT: vpermq {{.*#+}} ymm1 = mem[0,2,2,3] sched: [9:1.00]
2720 ; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
2721 ; BROADWELL-NEXT: retq # sched: [7:1.00]
2723 ; SKYLAKE-LABEL: test_permq:
2725 ; SKYLAKE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [3:1.00]
2726 ; SKYLAKE-NEXT: vpermq {{.*#+}} ymm1 = mem[0,2,2,3] sched: [10:1.00]
2727 ; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
2728 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
2730 ; SKX-LABEL: test_permq:
2732 ; SKX-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [3:1.00]
2733 ; SKX-NEXT: vpermq {{.*#+}} ymm1 = mem[0,2,2,3] sched: [10:1.00]
2734 ; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
2735 ; SKX-NEXT: retq # sched: [7:1.00]
2737 ; ZNVER1-LABEL: test_permq:
2739 ; ZNVER1-NEXT: vpermq {{.*#+}} ymm1 = mem[0,2,2,3] sched: [9:0.50]
2740 ; ZNVER1-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [2:0.25]
2741 ; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
2742 ; ZNVER1-NEXT: retq # sched: [1:0.50]
2743 %1 = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32> <i32 3, i32 2, i32 2, i32 3>
2744 %2 = load <4 x i64>, <4 x i64> *%a1, align 32
2745 %3 = shufflevector <4 x i64> %2, <4 x i64> undef, <4 x i32> <i32 0, i32 2, i32 2, i32 3>
2746 %4 = add <4 x i64> %1, %3
2750 define <4 x i32> @test_pgatherdd(<4 x i32> %a0, i8* %a1, <4 x i32> %a2, <4 x i32> %a3) {
2751 ; GENERIC-LABEL: test_pgatherdd:
2753 ; GENERIC-NEXT: vpgatherdd %xmm2, (%rdi,%xmm1,2), %xmm0
2754 ; GENERIC-NEXT: retq # sched: [1:1.00]
2756 ; HASWELL-LABEL: test_pgatherdd:
2758 ; HASWELL-NEXT: vpgatherdd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [1:?]
2759 ; HASWELL-NEXT: retq # sched: [2:1.00]
2761 ; BROADWELL-LABEL: test_pgatherdd:
2762 ; BROADWELL: # BB#0:
2763 ; BROADWELL-NEXT: vpgatherdd %xmm2, (%rdi,%xmm1,2), %xmm0
2764 ; BROADWELL-NEXT: retq # sched: [7:1.00]
2766 ; SKYLAKE-LABEL: test_pgatherdd:
2768 ; SKYLAKE-NEXT: vpgatherdd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
2769 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
2771 ; SKX-LABEL: test_pgatherdd:
2773 ; SKX-NEXT: vpgatherdd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
2774 ; SKX-NEXT: retq # sched: [7:1.00]
2776 ; ZNVER1-LABEL: test_pgatherdd:
2778 ; ZNVER1-NEXT: vpgatherdd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [100:?]
2779 ; ZNVER1-NEXT: retq # sched: [1:0.50]
2780 %1 = call <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32> %a0, i8* %a1, <4 x i32> %a2, <4 x i32> %a3, i8 2)
2783 declare <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32>, i8*, <4 x i32>, <4 x i32>, i8) nounwind readonly
2785 define <8 x i32> @test_pgatherdd_ymm(<8 x i32> %a0, i8* %a1, <8 x i32> %a2, <8 x i32> %a3) {
2786 ; GENERIC-LABEL: test_pgatherdd_ymm:
2788 ; GENERIC-NEXT: vpgatherdd %ymm2, (%rdi,%ymm1,2), %ymm0
2789 ; GENERIC-NEXT: retq # sched: [1:1.00]
2791 ; HASWELL-LABEL: test_pgatherdd_ymm:
2793 ; HASWELL-NEXT: vpgatherdd %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [1:?]
2794 ; HASWELL-NEXT: retq # sched: [2:1.00]
2796 ; BROADWELL-LABEL: test_pgatherdd_ymm:
2797 ; BROADWELL: # BB#0:
2798 ; BROADWELL-NEXT: vpgatherdd %ymm2, (%rdi,%ymm1,2), %ymm0
2799 ; BROADWELL-NEXT: retq # sched: [7:1.00]
2801 ; SKYLAKE-LABEL: test_pgatherdd_ymm:
2803 ; SKYLAKE-NEXT: vpgatherdd %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [25:1.00]
2804 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
2806 ; SKX-LABEL: test_pgatherdd_ymm:
2808 ; SKX-NEXT: vpgatherdd %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [25:1.00]
2809 ; SKX-NEXT: retq # sched: [7:1.00]
2811 ; ZNVER1-LABEL: test_pgatherdd_ymm:
2813 ; ZNVER1-NEXT: vpgatherdd %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [100:?]
2814 ; ZNVER1-NEXT: retq # sched: [1:0.50]
2815 %1 = call <8 x i32> @llvm.x86.avx2.gather.d.d.256(<8 x i32> %a0, i8* %a1, <8 x i32> %a2, <8 x i32> %a3, i8 2)
2818 declare <8 x i32> @llvm.x86.avx2.gather.d.d.256(<8 x i32>, i8*, <8 x i32>, <8 x i32>, i8) nounwind readonly
2820 define <2 x i64> @test_pgatherdq(<2 x i64> %a0, i8* %a1, <4 x i32> %a2, <2 x i64> %a3) {
2821 ; GENERIC-LABEL: test_pgatherdq:
2823 ; GENERIC-NEXT: vpgatherdq %xmm2, (%rdi,%xmm1,2), %xmm0
2824 ; GENERIC-NEXT: retq # sched: [1:1.00]
2826 ; HASWELL-LABEL: test_pgatherdq:
2828 ; HASWELL-NEXT: vpgatherdq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [1:?]
2829 ; HASWELL-NEXT: retq # sched: [2:1.00]
2831 ; BROADWELL-LABEL: test_pgatherdq:
2832 ; BROADWELL: # BB#0:
2833 ; BROADWELL-NEXT: vpgatherdq %xmm2, (%rdi,%xmm1,2), %xmm0
2834 ; BROADWELL-NEXT: retq # sched: [7:1.00]
2836 ; SKYLAKE-LABEL: test_pgatherdq:
2838 ; SKYLAKE-NEXT: vpgatherdq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
2839 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
2841 ; SKX-LABEL: test_pgatherdq:
2843 ; SKX-NEXT: vpgatherdq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
2844 ; SKX-NEXT: retq # sched: [7:1.00]
2846 ; ZNVER1-LABEL: test_pgatherdq:
2848 ; ZNVER1-NEXT: vpgatherdq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [100:?]
2849 ; ZNVER1-NEXT: retq # sched: [1:0.50]
2850 %1 = call <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64> %a0, i8* %a1, <4 x i32> %a2, <2 x i64> %a3, i8 2)
2853 declare <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64>, i8*, <4 x i32>, <2 x i64>, i8) nounwind readonly
2855 define <4 x i64> @test_pgatherdq_ymm(<4 x i64> %a0, i8* %a1, <4 x i32> %a2, <4 x i64> %a3) {
2856 ; GENERIC-LABEL: test_pgatherdq_ymm:
2858 ; GENERIC-NEXT: vpgatherdq %ymm2, (%rdi,%xmm1,2), %ymm0
2859 ; GENERIC-NEXT: retq # sched: [1:1.00]
2861 ; HASWELL-LABEL: test_pgatherdq_ymm:
2863 ; HASWELL-NEXT: vpgatherdq %ymm2, (%rdi,%xmm1,2), %ymm0 # sched: [1:?]
2864 ; HASWELL-NEXT: retq # sched: [2:1.00]
2866 ; BROADWELL-LABEL: test_pgatherdq_ymm:
2867 ; BROADWELL: # BB#0:
2868 ; BROADWELL-NEXT: vpgatherdq %ymm2, (%rdi,%xmm1,2), %ymm0
2869 ; BROADWELL-NEXT: retq # sched: [7:1.00]
2871 ; SKYLAKE-LABEL: test_pgatherdq_ymm:
2873 ; SKYLAKE-NEXT: vpgatherdq %ymm2, (%rdi,%xmm1,2), %ymm0 # sched: [25:1.00]
2874 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
2876 ; SKX-LABEL: test_pgatherdq_ymm:
2878 ; SKX-NEXT: vpgatherdq %ymm2, (%rdi,%xmm1,2), %ymm0 # sched: [25:1.00]
2879 ; SKX-NEXT: retq # sched: [7:1.00]
2881 ; ZNVER1-LABEL: test_pgatherdq_ymm:
2883 ; ZNVER1-NEXT: vpgatherdq %ymm2, (%rdi,%xmm1,2), %ymm0 # sched: [100:?]
2884 ; ZNVER1-NEXT: retq # sched: [1:0.50]
2885 %1 = call <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64> %a0, i8* %a1, <4 x i32> %a2, <4 x i64> %a3, i8 2)
2888 declare <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64>, i8*, <4 x i32>, <4 x i64>, i8) nounwind readonly
2890 define <4 x i32> @test_pgatherqd(<4 x i32> %a0, i8* %a1, <2 x i64> %a2, <4 x i32> %a3) {
2891 ; GENERIC-LABEL: test_pgatherqd:
2893 ; GENERIC-NEXT: vpgatherqd %xmm2, (%rdi,%xmm1,2), %xmm0
2894 ; GENERIC-NEXT: retq # sched: [1:1.00]
2896 ; HASWELL-LABEL: test_pgatherqd:
2898 ; HASWELL-NEXT: vpgatherqd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [1:?]
2899 ; HASWELL-NEXT: retq # sched: [2:1.00]
2901 ; BROADWELL-LABEL: test_pgatherqd:
2902 ; BROADWELL: # BB#0:
2903 ; BROADWELL-NEXT: vpgatherqd %xmm2, (%rdi,%xmm1,2), %xmm0
2904 ; BROADWELL-NEXT: retq # sched: [7:1.00]
2906 ; SKYLAKE-LABEL: test_pgatherqd:
2908 ; SKYLAKE-NEXT: vpgatherqd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
2909 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
2911 ; SKX-LABEL: test_pgatherqd:
2913 ; SKX-NEXT: vpgatherqd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
2914 ; SKX-NEXT: retq # sched: [7:1.00]
2916 ; ZNVER1-LABEL: test_pgatherqd:
2918 ; ZNVER1-NEXT: vpgatherqd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [100:?]
2919 ; ZNVER1-NEXT: retq # sched: [1:0.50]
2920 %1 = call <4 x i32> @llvm.x86.avx2.gather.q.d(<4 x i32> %a0, i8* %a1, <2 x i64> %a2, <4 x i32> %a3, i8 2)
2923 declare <4 x i32> @llvm.x86.avx2.gather.q.d(<4 x i32>, i8*, <2 x i64>, <4 x i32>, i8) nounwind readonly
2925 define <4 x i32> @test_pgatherqd_ymm(<4 x i32> %a0, i8* %a1, <4 x i64> %a2, <4 x i32> %a3) {
2926 ; GENERIC-LABEL: test_pgatherqd_ymm:
2928 ; GENERIC-NEXT: vpgatherqd %xmm2, (%rdi,%ymm1,2), %xmm0
2929 ; GENERIC-NEXT: vzeroupper
2930 ; GENERIC-NEXT: retq # sched: [1:1.00]
2932 ; HASWELL-LABEL: test_pgatherqd_ymm:
2934 ; HASWELL-NEXT: vpgatherqd %xmm2, (%rdi,%ymm1,2), %xmm0 # sched: [1:?]
2935 ; HASWELL-NEXT: vzeroupper # sched: [4:1.00]
2936 ; HASWELL-NEXT: retq # sched: [2:1.00]
2938 ; BROADWELL-LABEL: test_pgatherqd_ymm:
2939 ; BROADWELL: # BB#0:
2940 ; BROADWELL-NEXT: vpgatherqd %xmm2, (%rdi,%ymm1,2), %xmm0
2941 ; BROADWELL-NEXT: vzeroupper # sched: [4:1.00]
2942 ; BROADWELL-NEXT: retq # sched: [7:1.00]
2944 ; SKYLAKE-LABEL: test_pgatherqd_ymm:
2946 ; SKYLAKE-NEXT: vpgatherqd %xmm2, (%rdi,%ymm1,2), %xmm0 # sched: [25:1.00]
2947 ; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00]
2948 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
2950 ; SKX-LABEL: test_pgatherqd_ymm:
2952 ; SKX-NEXT: vpgatherqd %xmm2, (%rdi,%ymm1,2), %xmm0 # sched: [25:1.00]
2953 ; SKX-NEXT: vzeroupper # sched: [4:1.00]
2954 ; SKX-NEXT: retq # sched: [7:1.00]
2956 ; ZNVER1-LABEL: test_pgatherqd_ymm:
2958 ; ZNVER1-NEXT: vpgatherqd %xmm2, (%rdi,%ymm1,2), %xmm0 # sched: [100:?]
2959 ; ZNVER1-NEXT: vzeroupper # sched: [100:?]
2960 ; ZNVER1-NEXT: retq # sched: [1:0.50]
2961 %1 = call <4 x i32> @llvm.x86.avx2.gather.q.d.256(<4 x i32> %a0, i8* %a1, <4 x i64> %a2, <4 x i32> %a3, i8 2)
2964 declare <4 x i32> @llvm.x86.avx2.gather.q.d.256(<4 x i32>, i8*, <4 x i64>, <4 x i32>, i8) nounwind readonly
2966 define <2 x i64> @test_pgatherqq(<2 x i64> %a0, i8 *%a1, <2 x i64> %a2, <2 x i64> %a3) {
2967 ; GENERIC-LABEL: test_pgatherqq:
2969 ; GENERIC-NEXT: vpgatherqq %xmm2, (%rdi,%xmm1,2), %xmm0
2970 ; GENERIC-NEXT: retq # sched: [1:1.00]
2972 ; HASWELL-LABEL: test_pgatherqq:
2974 ; HASWELL-NEXT: vpgatherqq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [1:?]
2975 ; HASWELL-NEXT: retq # sched: [2:1.00]
2977 ; BROADWELL-LABEL: test_pgatherqq:
2978 ; BROADWELL: # BB#0:
2979 ; BROADWELL-NEXT: vpgatherqq %xmm2, (%rdi,%xmm1,2), %xmm0
2980 ; BROADWELL-NEXT: retq # sched: [7:1.00]
2982 ; SKYLAKE-LABEL: test_pgatherqq:
2984 ; SKYLAKE-NEXT: vpgatherqq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
2985 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
2987 ; SKX-LABEL: test_pgatherqq:
2989 ; SKX-NEXT: vpgatherqq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
2990 ; SKX-NEXT: retq # sched: [7:1.00]
2992 ; ZNVER1-LABEL: test_pgatherqq:
2994 ; ZNVER1-NEXT: vpgatherqq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [100:?]
2995 ; ZNVER1-NEXT: retq # sched: [1:0.50]
2996 %1 = call <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64> %a0, i8* %a1, <2 x i64> %a2, <2 x i64> %a3, i8 2)
2999 declare <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64>, i8*, <2 x i64>, <2 x i64>, i8) nounwind readonly
3001 define <4 x i64> @test_pgatherqq_ymm(<4 x i64> %a0, i8 *%a1, <4 x i64> %a2, <4 x i64> %a3) {
3002 ; GENERIC-LABEL: test_pgatherqq_ymm:
3004 ; GENERIC-NEXT: vpgatherqq %ymm2, (%rdi,%ymm1,2), %ymm0
3005 ; GENERIC-NEXT: retq # sched: [1:1.00]
3007 ; HASWELL-LABEL: test_pgatherqq_ymm:
3009 ; HASWELL-NEXT: vpgatherqq %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [1:?]
3010 ; HASWELL-NEXT: retq # sched: [2:1.00]
3012 ; BROADWELL-LABEL: test_pgatherqq_ymm:
3013 ; BROADWELL: # BB#0:
3014 ; BROADWELL-NEXT: vpgatherqq %ymm2, (%rdi,%ymm1,2), %ymm0
3015 ; BROADWELL-NEXT: retq # sched: [7:1.00]
3017 ; SKYLAKE-LABEL: test_pgatherqq_ymm:
3019 ; SKYLAKE-NEXT: vpgatherqq %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [25:1.00]
3020 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
3022 ; SKX-LABEL: test_pgatherqq_ymm:
3024 ; SKX-NEXT: vpgatherqq %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [25:1.00]
3025 ; SKX-NEXT: retq # sched: [7:1.00]
3027 ; ZNVER1-LABEL: test_pgatherqq_ymm:
3029 ; ZNVER1-NEXT: vpgatherqq %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [100:?]
3030 ; ZNVER1-NEXT: retq # sched: [1:0.50]
3031 %1 = call <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64> %a0, i8* %a1, <4 x i64> %a2, <4 x i64> %a3, i8 2)
3034 declare <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64>, i8*, <4 x i64>, <4 x i64>, i8) nounwind readonly
3036 define <8 x i32> @test_phaddd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
3037 ; GENERIC-LABEL: test_phaddd:
3039 ; GENERIC-NEXT: vphaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3040 ; GENERIC-NEXT: vphaddd (%rdi), %ymm0, %ymm0 # sched: [5:0.50]
3041 ; GENERIC-NEXT: retq # sched: [1:1.00]
3043 ; HASWELL-LABEL: test_phaddd:
3045 ; HASWELL-NEXT: vphaddd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
3046 ; HASWELL-NEXT: vphaddd (%rdi), %ymm0, %ymm0 # sched: [3:2.00]
3047 ; HASWELL-NEXT: retq # sched: [2:1.00]
3049 ; BROADWELL-LABEL: test_phaddd:
3050 ; BROADWELL: # BB#0:
3051 ; BROADWELL-NEXT: vphaddd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
3052 ; BROADWELL-NEXT: vphaddd (%rdi), %ymm0, %ymm0 # sched: [9:2.00]
3053 ; BROADWELL-NEXT: retq # sched: [7:1.00]
3055 ; SKYLAKE-LABEL: test_phaddd:
3057 ; SKYLAKE-NEXT: vphaddd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
3058 ; SKYLAKE-NEXT: vphaddd (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
3059 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
3061 ; SKX-LABEL: test_phaddd:
3063 ; SKX-NEXT: vphaddd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
3064 ; SKX-NEXT: vphaddd (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
3065 ; SKX-NEXT: retq # sched: [7:1.00]
3067 ; ZNVER1-LABEL: test_phaddd:
3069 ; ZNVER1-NEXT: vphaddd %ymm1, %ymm0, %ymm0 # sched: [100:?]
3070 ; ZNVER1-NEXT: vphaddd (%rdi), %ymm0, %ymm0 # sched: [100:?]
3071 ; ZNVER1-NEXT: retq # sched: [1:0.50]
3072 %1 = call <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32> %a0, <8 x i32> %a1)
3073 %2 = load <8 x i32>, <8 x i32> *%a2, align 32
3074 %3 = call <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32> %1, <8 x i32> %2)
3077 declare <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32>, <8 x i32>) nounwind readnone
3079 define <16 x i16> @test_phaddsw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
3080 ; GENERIC-LABEL: test_phaddsw:
3082 ; GENERIC-NEXT: vphaddsw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
3083 ; GENERIC-NEXT: vphaddsw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
3084 ; GENERIC-NEXT: retq # sched: [1:1.00]
3086 ; HASWELL-LABEL: test_phaddsw:
3088 ; HASWELL-NEXT: vphaddsw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
3089 ; HASWELL-NEXT: vphaddsw (%rdi), %ymm0, %ymm0 # sched: [3:2.00]
3090 ; HASWELL-NEXT: retq # sched: [2:1.00]
3092 ; BROADWELL-LABEL: test_phaddsw:
3093 ; BROADWELL: # BB#0:
3094 ; BROADWELL-NEXT: vphaddsw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
3095 ; BROADWELL-NEXT: vphaddsw (%rdi), %ymm0, %ymm0 # sched: [9:2.00]
3096 ; BROADWELL-NEXT: retq # sched: [7:1.00]
3098 ; SKYLAKE-LABEL: test_phaddsw:
3100 ; SKYLAKE-NEXT: vphaddsw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
3101 ; SKYLAKE-NEXT: vphaddsw (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
3102 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
3104 ; SKX-LABEL: test_phaddsw:
3106 ; SKX-NEXT: vphaddsw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
3107 ; SKX-NEXT: vphaddsw (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
3108 ; SKX-NEXT: retq # sched: [7:1.00]
3110 ; ZNVER1-LABEL: test_phaddsw:
3112 ; ZNVER1-NEXT: vphaddsw %ymm1, %ymm0, %ymm0 # sched: [100:?]
3113 ; ZNVER1-NEXT: vphaddsw (%rdi), %ymm0, %ymm0 # sched: [100:?]
3114 ; ZNVER1-NEXT: retq # sched: [1:0.50]
3115 %1 = call <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16> %a0, <16 x i16> %a1)
3116 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
3117 %3 = call <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16> %1, <16 x i16> %2)
3120 declare <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16>, <16 x i16>) nounwind readnone
3122 define <16 x i16> @test_phaddw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
3123 ; GENERIC-LABEL: test_phaddw:
3125 ; GENERIC-NEXT: vphaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3126 ; GENERIC-NEXT: vphaddw (%rdi), %ymm0, %ymm0 # sched: [5:0.50]
3127 ; GENERIC-NEXT: retq # sched: [1:1.00]
3129 ; HASWELL-LABEL: test_phaddw:
3131 ; HASWELL-NEXT: vphaddw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
3132 ; HASWELL-NEXT: vphaddw (%rdi), %ymm0, %ymm0 # sched: [3:2.00]
3133 ; HASWELL-NEXT: retq # sched: [2:1.00]
3135 ; BROADWELL-LABEL: test_phaddw:
3136 ; BROADWELL: # BB#0:
3137 ; BROADWELL-NEXT: vphaddw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
3138 ; BROADWELL-NEXT: vphaddw (%rdi), %ymm0, %ymm0 # sched: [9:2.00]
3139 ; BROADWELL-NEXT: retq # sched: [7:1.00]
3141 ; SKYLAKE-LABEL: test_phaddw:
3143 ; SKYLAKE-NEXT: vphaddw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
3144 ; SKYLAKE-NEXT: vphaddw (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
3145 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
3147 ; SKX-LABEL: test_phaddw:
3149 ; SKX-NEXT: vphaddw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
3150 ; SKX-NEXT: vphaddw (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
3151 ; SKX-NEXT: retq # sched: [7:1.00]
3153 ; ZNVER1-LABEL: test_phaddw:
3155 ; ZNVER1-NEXT: vphaddw %ymm1, %ymm0, %ymm0 # sched: [100:?]
3156 ; ZNVER1-NEXT: vphaddw (%rdi), %ymm0, %ymm0 # sched: [100:?]
3157 ; ZNVER1-NEXT: retq # sched: [1:0.50]
3158 %1 = call <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16> %a0, <16 x i16> %a1)
3159 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
3160 %3 = call <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16> %1, <16 x i16> %2)
3163 declare <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16>, <16 x i16>) nounwind readnone
3165 define <8 x i32> @test_phsubd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
3166 ; GENERIC-LABEL: test_phsubd:
3168 ; GENERIC-NEXT: vphsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3169 ; GENERIC-NEXT: vphsubd (%rdi), %ymm0, %ymm0 # sched: [5:0.50]
3170 ; GENERIC-NEXT: retq # sched: [1:1.00]
3172 ; HASWELL-LABEL: test_phsubd:
3174 ; HASWELL-NEXT: vphsubd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
3175 ; HASWELL-NEXT: vphsubd (%rdi), %ymm0, %ymm0 # sched: [3:2.00]
3176 ; HASWELL-NEXT: retq # sched: [2:1.00]
3178 ; BROADWELL-LABEL: test_phsubd:
3179 ; BROADWELL: # BB#0:
3180 ; BROADWELL-NEXT: vphsubd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
3181 ; BROADWELL-NEXT: vphsubd (%rdi), %ymm0, %ymm0 # sched: [9:2.00]
3182 ; BROADWELL-NEXT: retq # sched: [7:1.00]
3184 ; SKYLAKE-LABEL: test_phsubd:
3186 ; SKYLAKE-NEXT: vphsubd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
3187 ; SKYLAKE-NEXT: vphsubd (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
3188 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
3190 ; SKX-LABEL: test_phsubd:
3192 ; SKX-NEXT: vphsubd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
3193 ; SKX-NEXT: vphsubd (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
3194 ; SKX-NEXT: retq # sched: [7:1.00]
3196 ; ZNVER1-LABEL: test_phsubd:
3198 ; ZNVER1-NEXT: vphsubd %ymm1, %ymm0, %ymm0 # sched: [100:?]
3199 ; ZNVER1-NEXT: vphsubd (%rdi), %ymm0, %ymm0 # sched: [100:?]
3200 ; ZNVER1-NEXT: retq # sched: [1:0.50]
3201 %1 = call <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32> %a0, <8 x i32> %a1)
3202 %2 = load <8 x i32>, <8 x i32> *%a2, align 32
3203 %3 = call <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32> %1, <8 x i32> %2)
3206 declare <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32>, <8 x i32>) nounwind readnone
3208 define <16 x i16> @test_phsubsw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
3209 ; GENERIC-LABEL: test_phsubsw:
3211 ; GENERIC-NEXT: vphsubsw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
3212 ; GENERIC-NEXT: vphsubsw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
3213 ; GENERIC-NEXT: retq # sched: [1:1.00]
3215 ; HASWELL-LABEL: test_phsubsw:
3217 ; HASWELL-NEXT: vphsubsw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
3218 ; HASWELL-NEXT: vphsubsw (%rdi), %ymm0, %ymm0 # sched: [3:2.00]
3219 ; HASWELL-NEXT: retq # sched: [2:1.00]
3221 ; BROADWELL-LABEL: test_phsubsw:
3222 ; BROADWELL: # BB#0:
3223 ; BROADWELL-NEXT: vphsubsw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
3224 ; BROADWELL-NEXT: vphsubsw (%rdi), %ymm0, %ymm0 # sched: [9:2.00]
3225 ; BROADWELL-NEXT: retq # sched: [7:1.00]
3227 ; SKYLAKE-LABEL: test_phsubsw:
3229 ; SKYLAKE-NEXT: vphsubsw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
3230 ; SKYLAKE-NEXT: vphsubsw (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
3231 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
3233 ; SKX-LABEL: test_phsubsw:
3235 ; SKX-NEXT: vphsubsw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
3236 ; SKX-NEXT: vphsubsw (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
3237 ; SKX-NEXT: retq # sched: [7:1.00]
3239 ; ZNVER1-LABEL: test_phsubsw:
3241 ; ZNVER1-NEXT: vphsubsw %ymm1, %ymm0, %ymm0 # sched: [100:?]
3242 ; ZNVER1-NEXT: vphsubsw (%rdi), %ymm0, %ymm0 # sched: [100:?]
3243 ; ZNVER1-NEXT: retq # sched: [1:0.50]
3244 %1 = call <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16> %a0, <16 x i16> %a1)
3245 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
3246 %3 = call <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16> %1, <16 x i16> %2)
3249 declare <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16>, <16 x i16>) nounwind readnone
3251 define <16 x i16> @test_phsubw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
3252 ; GENERIC-LABEL: test_phsubw:
3254 ; GENERIC-NEXT: vphsubw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3255 ; GENERIC-NEXT: vphsubw (%rdi), %ymm0, %ymm0 # sched: [5:0.50]
3256 ; GENERIC-NEXT: retq # sched: [1:1.00]
3258 ; HASWELL-LABEL: test_phsubw:
3260 ; HASWELL-NEXT: vphsubw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
3261 ; HASWELL-NEXT: vphsubw (%rdi), %ymm0, %ymm0 # sched: [3:2.00]
3262 ; HASWELL-NEXT: retq # sched: [2:1.00]
3264 ; BROADWELL-LABEL: test_phsubw:
3265 ; BROADWELL: # BB#0:
3266 ; BROADWELL-NEXT: vphsubw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
3267 ; BROADWELL-NEXT: vphsubw (%rdi), %ymm0, %ymm0 # sched: [9:2.00]
3268 ; BROADWELL-NEXT: retq # sched: [7:1.00]
3270 ; SKYLAKE-LABEL: test_phsubw:
3272 ; SKYLAKE-NEXT: vphsubw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
3273 ; SKYLAKE-NEXT: vphsubw (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
3274 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
3276 ; SKX-LABEL: test_phsubw:
3278 ; SKX-NEXT: vphsubw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
3279 ; SKX-NEXT: vphsubw (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
3280 ; SKX-NEXT: retq # sched: [7:1.00]
3282 ; ZNVER1-LABEL: test_phsubw:
3284 ; ZNVER1-NEXT: vphsubw %ymm1, %ymm0, %ymm0 # sched: [100:?]
3285 ; ZNVER1-NEXT: vphsubw (%rdi), %ymm0, %ymm0 # sched: [100:?]
3286 ; ZNVER1-NEXT: retq # sched: [1:0.50]
3287 %1 = call <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16> %a0, <16 x i16> %a1)
3288 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
3289 %3 = call <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16> %1, <16 x i16> %2)
3292 declare <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16>, <16 x i16>) nounwind readnone
3294 define <16 x i16> @test_pmaddubsw(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
3295 ; GENERIC-LABEL: test_pmaddubsw:
3297 ; GENERIC-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
3298 ; GENERIC-NEXT: vpmaddubsw (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
3299 ; GENERIC-NEXT: retq # sched: [1:1.00]
3301 ; HASWELL-LABEL: test_pmaddubsw:
3303 ; HASWELL-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
3304 ; HASWELL-NEXT: vpmaddubsw (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
3305 ; HASWELL-NEXT: retq # sched: [2:1.00]
3307 ; BROADWELL-LABEL: test_pmaddubsw:
3308 ; BROADWELL: # BB#0:
3309 ; BROADWELL-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
3310 ; BROADWELL-NEXT: vpmaddubsw (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
3311 ; BROADWELL-NEXT: retq # sched: [7:1.00]
3313 ; SKYLAKE-LABEL: test_pmaddubsw:
3315 ; SKYLAKE-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
3316 ; SKYLAKE-NEXT: vpmaddubsw (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
3317 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
3319 ; SKX-LABEL: test_pmaddubsw:
3321 ; SKX-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
3322 ; SKX-NEXT: vpmaddubsw (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
3323 ; SKX-NEXT: retq # sched: [7:1.00]
3325 ; ZNVER1-LABEL: test_pmaddubsw:
3327 ; ZNVER1-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm0 # sched: [4:1.00]
3328 ; ZNVER1-NEXT: vpmaddubsw (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
3329 ; ZNVER1-NEXT: retq # sched: [1:0.50]
3330 %1 = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> %a0, <32 x i8> %a1)
3331 %2 = bitcast <16 x i16> %1 to <32 x i8>
3332 %3 = load <32 x i8>, <32 x i8> *%a2, align 32
3333 %4 = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> %2, <32 x i8> %3)
3336 declare <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8>, <32 x i8>) nounwind readnone
3338 define <8 x i32> @test_pmaddwd(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
3339 ; GENERIC-LABEL: test_pmaddwd:
3341 ; GENERIC-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
3342 ; GENERIC-NEXT: vpmaddwd (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
3343 ; GENERIC-NEXT: retq # sched: [1:1.00]
3345 ; HASWELL-LABEL: test_pmaddwd:
3347 ; HASWELL-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
3348 ; HASWELL-NEXT: vpmaddwd (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
3349 ; HASWELL-NEXT: retq # sched: [2:1.00]
3351 ; BROADWELL-LABEL: test_pmaddwd:
3352 ; BROADWELL: # BB#0:
3353 ; BROADWELL-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
3354 ; BROADWELL-NEXT: vpmaddwd (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
3355 ; BROADWELL-NEXT: retq # sched: [7:1.00]
3357 ; SKYLAKE-LABEL: test_pmaddwd:
3359 ; SKYLAKE-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
3360 ; SKYLAKE-NEXT: vpmaddwd (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
3361 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
3363 ; SKX-LABEL: test_pmaddwd:
3365 ; SKX-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
3366 ; SKX-NEXT: vpmaddwd (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
3367 ; SKX-NEXT: retq # sched: [7:1.00]
3369 ; ZNVER1-LABEL: test_pmaddwd:
3371 ; ZNVER1-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0 # sched: [4:1.00]
3372 ; ZNVER1-NEXT: vpmaddwd (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
3373 ; ZNVER1-NEXT: retq # sched: [1:0.50]
3374 %1 = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> %a0, <16 x i16> %a1)
3375 %2 = bitcast <8 x i32> %1 to <16 x i16>
3376 %3 = load <16 x i16>, <16 x i16> *%a2, align 32
3377 %4 = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> %2, <16 x i16> %3)
3380 declare <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16>, <16 x i16>) nounwind readnone
3382 define <4 x i32> @test_pmaskmovd(i8* %a0, <4 x i32> %a1, <4 x i32> %a2) {
3383 ; GENERIC-LABEL: test_pmaskmovd:
3385 ; GENERIC-NEXT: vpmaskmovd (%rdi), %xmm0, %xmm2
3386 ; GENERIC-NEXT: vpmaskmovd %xmm1, %xmm0, (%rdi)
3387 ; GENERIC-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.50]
3388 ; GENERIC-NEXT: retq # sched: [1:1.00]
3390 ; HASWELL-LABEL: test_pmaskmovd:
3392 ; HASWELL-NEXT: vpmaskmovd (%rdi), %xmm0, %xmm2 # sched: [2:2.00]
3393 ; HASWELL-NEXT: vpmaskmovd %xmm1, %xmm0, (%rdi) # sched: [4:1.00]
3394 ; HASWELL-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.25]
3395 ; HASWELL-NEXT: retq # sched: [2:1.00]
3397 ; BROADWELL-LABEL: test_pmaskmovd:
3398 ; BROADWELL: # BB#0:
3399 ; BROADWELL-NEXT: vpmaskmovd (%rdi), %xmm0, %xmm2 # sched: [7:2.00]
3400 ; BROADWELL-NEXT: vpmaskmovd %xmm1, %xmm0, (%rdi) # sched: [5:1.00]
3401 ; BROADWELL-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.25]
3402 ; BROADWELL-NEXT: retq # sched: [7:1.00]
3404 ; SKYLAKE-LABEL: test_pmaskmovd:
3406 ; SKYLAKE-NEXT: vpmaskmovd (%rdi), %xmm0, %xmm2 # sched: [7:0.50]
3407 ; SKYLAKE-NEXT: vpmaskmovd %xmm1, %xmm0, (%rdi) # sched: [2:1.00]
3408 ; SKYLAKE-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.25]
3409 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
3411 ; SKX-LABEL: test_pmaskmovd:
3413 ; SKX-NEXT: vpmaskmovd (%rdi), %xmm0, %xmm2 # sched: [7:0.50]
3414 ; SKX-NEXT: vpmaskmovd %xmm1, %xmm0, (%rdi) # sched: [2:1.00]
3415 ; SKX-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.25]
3416 ; SKX-NEXT: retq # sched: [7:1.00]
3418 ; ZNVER1-LABEL: test_pmaskmovd:
3420 ; ZNVER1-NEXT: vpmaskmovd (%rdi), %xmm0, %xmm2 # sched: [100:?]
3421 ; ZNVER1-NEXT: vpmaskmovd %xmm1, %xmm0, (%rdi) # sched: [100:?]
3422 ; ZNVER1-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.25]
3423 ; ZNVER1-NEXT: retq # sched: [1:0.50]
3424 %1 = call <4 x i32> @llvm.x86.avx2.maskload.d(i8* %a0, <4 x i32> %a1)
3425 call void @llvm.x86.avx2.maskstore.d(i8* %a0, <4 x i32> %a1, <4 x i32> %a2)
3428 declare <4 x i32> @llvm.x86.avx2.maskload.d(i8*, <4 x i32>) nounwind readonly
3429 declare void @llvm.x86.avx2.maskstore.d(i8*, <4 x i32>, <4 x i32>) nounwind
3431 define <8 x i32> @test_pmaskmovd_ymm(i8* %a0, <8 x i32> %a1, <8 x i32> %a2) {
3432 ; GENERIC-LABEL: test_pmaskmovd_ymm:
3434 ; GENERIC-NEXT: vpmaskmovd (%rdi), %ymm0, %ymm2
3435 ; GENERIC-NEXT: vpmaskmovd %ymm1, %ymm0, (%rdi)
3436 ; GENERIC-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.50]
3437 ; GENERIC-NEXT: retq # sched: [1:1.00]
3439 ; HASWELL-LABEL: test_pmaskmovd_ymm:
3441 ; HASWELL-NEXT: vpmaskmovd (%rdi), %ymm0, %ymm2 # sched: [2:2.00]
3442 ; HASWELL-NEXT: vpmaskmovd %ymm1, %ymm0, (%rdi) # sched: [4:1.00]
3443 ; HASWELL-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.25]
3444 ; HASWELL-NEXT: retq # sched: [2:1.00]
3446 ; BROADWELL-LABEL: test_pmaskmovd_ymm:
3447 ; BROADWELL: # BB#0:
3448 ; BROADWELL-NEXT: vpmaskmovd (%rdi), %ymm0, %ymm2 # sched: [8:2.00]
3449 ; BROADWELL-NEXT: vpmaskmovd %ymm1, %ymm0, (%rdi) # sched: [5:1.00]
3450 ; BROADWELL-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.25]
3451 ; BROADWELL-NEXT: retq # sched: [7:1.00]
3453 ; SKYLAKE-LABEL: test_pmaskmovd_ymm:
3455 ; SKYLAKE-NEXT: vpmaskmovd (%rdi), %ymm0, %ymm2 # sched: [8:0.50]
3456 ; SKYLAKE-NEXT: vpmaskmovd %ymm1, %ymm0, (%rdi) # sched: [2:1.00]
3457 ; SKYLAKE-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.25]
3458 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
3460 ; SKX-LABEL: test_pmaskmovd_ymm:
3462 ; SKX-NEXT: vpmaskmovd (%rdi), %ymm0, %ymm2 # sched: [8:0.50]
3463 ; SKX-NEXT: vpmaskmovd %ymm1, %ymm0, (%rdi) # sched: [2:1.00]
3464 ; SKX-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.25]
3465 ; SKX-NEXT: retq # sched: [7:1.00]
3467 ; ZNVER1-LABEL: test_pmaskmovd_ymm:
3469 ; ZNVER1-NEXT: vpmaskmovd (%rdi), %ymm0, %ymm2 # sched: [100:?]
3470 ; ZNVER1-NEXT: vpmaskmovd %ymm1, %ymm0, (%rdi) # sched: [100:?]
3471 ; ZNVER1-NEXT: vmovdqa %ymm2, %ymm0 # sched: [2:0.25]
3472 ; ZNVER1-NEXT: retq # sched: [1:0.50]
3473 %1 = call <8 x i32> @llvm.x86.avx2.maskload.d.256(i8* %a0, <8 x i32> %a1)
3474 call void @llvm.x86.avx2.maskstore.d.256(i8* %a0, <8 x i32> %a1, <8 x i32> %a2)
3477 declare <8 x i32> @llvm.x86.avx2.maskload.d.256(i8*, <8 x i32>) nounwind readonly
3478 declare void @llvm.x86.avx2.maskstore.d.256(i8*, <8 x i32>, <8 x i32>) nounwind
3480 define <2 x i64> @test_pmaskmovq(i8* %a0, <2 x i64> %a1, <2 x i64> %a2) {
3481 ; GENERIC-LABEL: test_pmaskmovq:
3483 ; GENERIC-NEXT: vpmaskmovq (%rdi), %xmm0, %xmm2
3484 ; GENERIC-NEXT: vpmaskmovq %xmm1, %xmm0, (%rdi)
3485 ; GENERIC-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.50]
3486 ; GENERIC-NEXT: retq # sched: [1:1.00]
3488 ; HASWELL-LABEL: test_pmaskmovq:
3490 ; HASWELL-NEXT: vpmaskmovq (%rdi), %xmm0, %xmm2 # sched: [2:2.00]
3491 ; HASWELL-NEXT: vpmaskmovq %xmm1, %xmm0, (%rdi) # sched: [4:1.00]
3492 ; HASWELL-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.25]
3493 ; HASWELL-NEXT: retq # sched: [2:1.00]
3495 ; BROADWELL-LABEL: test_pmaskmovq:
3496 ; BROADWELL: # BB#0:
3497 ; BROADWELL-NEXT: vpmaskmovq (%rdi), %xmm0, %xmm2 # sched: [7:2.00]
3498 ; BROADWELL-NEXT: vpmaskmovq %xmm1, %xmm0, (%rdi) # sched: [5:1.00]
3499 ; BROADWELL-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.25]
3500 ; BROADWELL-NEXT: retq # sched: [7:1.00]
3502 ; SKYLAKE-LABEL: test_pmaskmovq:
3504 ; SKYLAKE-NEXT: vpmaskmovq (%rdi), %xmm0, %xmm2 # sched: [7:0.50]
3505 ; SKYLAKE-NEXT: vpmaskmovq %xmm1, %xmm0, (%rdi) # sched: [2:1.00]
3506 ; SKYLAKE-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.25]
3507 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
3509 ; SKX-LABEL: test_pmaskmovq:
3511 ; SKX-NEXT: vpmaskmovq (%rdi), %xmm0, %xmm2 # sched: [7:0.50]
3512 ; SKX-NEXT: vpmaskmovq %xmm1, %xmm0, (%rdi) # sched: [2:1.00]
3513 ; SKX-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.25]
3514 ; SKX-NEXT: retq # sched: [7:1.00]
3516 ; ZNVER1-LABEL: test_pmaskmovq:
3518 ; ZNVER1-NEXT: vpmaskmovq (%rdi), %xmm0, %xmm2 # sched: [8:1.00]
3519 ; ZNVER1-NEXT: vpmaskmovq %xmm1, %xmm0, (%rdi) # sched: [100:?]
3520 ; ZNVER1-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.25]
3521 ; ZNVER1-NEXT: retq # sched: [1:0.50]
3522 %1 = call <2 x i64> @llvm.x86.avx2.maskload.q(i8* %a0, <2 x i64> %a1)
3523 call void @llvm.x86.avx2.maskstore.q(i8* %a0, <2 x i64> %a1, <2 x i64> %a2)
3526 declare <2 x i64> @llvm.x86.avx2.maskload.q(i8*, <2 x i64>) nounwind readonly
3527 declare void @llvm.x86.avx2.maskstore.q(i8*, <2 x i64>, <2 x i64>) nounwind
3529 define <4 x i64> @test_pmaskmovq_ymm(i8* %a0, <4 x i64> %a1, <4 x i64> %a2) {
3530 ; GENERIC-LABEL: test_pmaskmovq_ymm:
3532 ; GENERIC-NEXT: vpmaskmovq (%rdi), %ymm0, %ymm2
3533 ; GENERIC-NEXT: vpmaskmovq %ymm1, %ymm0, (%rdi)
3534 ; GENERIC-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.50]
3535 ; GENERIC-NEXT: retq # sched: [1:1.00]
3537 ; HASWELL-LABEL: test_pmaskmovq_ymm:
3539 ; HASWELL-NEXT: vpmaskmovq (%rdi), %ymm0, %ymm2 # sched: [2:2.00]
3540 ; HASWELL-NEXT: vpmaskmovq %ymm1, %ymm0, (%rdi) # sched: [4:1.00]
3541 ; HASWELL-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.25]
3542 ; HASWELL-NEXT: retq # sched: [2:1.00]
3544 ; BROADWELL-LABEL: test_pmaskmovq_ymm:
3545 ; BROADWELL: # BB#0:
3546 ; BROADWELL-NEXT: vpmaskmovq (%rdi), %ymm0, %ymm2 # sched: [8:2.00]
3547 ; BROADWELL-NEXT: vpmaskmovq %ymm1, %ymm0, (%rdi) # sched: [5:1.00]
3548 ; BROADWELL-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.25]
3549 ; BROADWELL-NEXT: retq # sched: [7:1.00]
3551 ; SKYLAKE-LABEL: test_pmaskmovq_ymm:
3553 ; SKYLAKE-NEXT: vpmaskmovq (%rdi), %ymm0, %ymm2 # sched: [8:0.50]
3554 ; SKYLAKE-NEXT: vpmaskmovq %ymm1, %ymm0, (%rdi) # sched: [2:1.00]
3555 ; SKYLAKE-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.25]
3556 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
3558 ; SKX-LABEL: test_pmaskmovq_ymm:
3560 ; SKX-NEXT: vpmaskmovq (%rdi), %ymm0, %ymm2 # sched: [8:0.50]
3561 ; SKX-NEXT: vpmaskmovq %ymm1, %ymm0, (%rdi) # sched: [2:1.00]
3562 ; SKX-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.25]
3563 ; SKX-NEXT: retq # sched: [7:1.00]
3565 ; ZNVER1-LABEL: test_pmaskmovq_ymm:
3567 ; ZNVER1-NEXT: vpmaskmovq (%rdi), %ymm0, %ymm2 # sched: [9:1.50]
3568 ; ZNVER1-NEXT: vpmaskmovq %ymm1, %ymm0, (%rdi) # sched: [100:?]
3569 ; ZNVER1-NEXT: vmovdqa %ymm2, %ymm0 # sched: [2:0.25]
3570 ; ZNVER1-NEXT: retq # sched: [1:0.50]
3571 %1 = call <4 x i64> @llvm.x86.avx2.maskload.q.256(i8* %a0, <4 x i64> %a1)
3572 call void @llvm.x86.avx2.maskstore.q.256(i8* %a0, <4 x i64> %a1, <4 x i64> %a2)
3575 declare <4 x i64> @llvm.x86.avx2.maskload.q.256(i8*, <4 x i64>) nounwind readonly
3576 declare void @llvm.x86.avx2.maskstore.q.256(i8*, <4 x i64>, <4 x i64>) nounwind
3578 define <32 x i8> @test_pmaxsb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
3579 ; GENERIC-LABEL: test_pmaxsb:
3581 ; GENERIC-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
3582 ; GENERIC-NEXT: vpmaxsb (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
3583 ; GENERIC-NEXT: retq # sched: [1:1.00]
3585 ; HASWELL-LABEL: test_pmaxsb:
3587 ; HASWELL-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3588 ; HASWELL-NEXT: vpmaxsb (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
3589 ; HASWELL-NEXT: retq # sched: [2:1.00]
3591 ; BROADWELL-LABEL: test_pmaxsb:
3592 ; BROADWELL: # BB#0:
3593 ; BROADWELL-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3594 ; BROADWELL-NEXT: vpmaxsb (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
3595 ; BROADWELL-NEXT: retq # sched: [7:1.00]
3597 ; SKYLAKE-LABEL: test_pmaxsb:
3599 ; SKYLAKE-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3600 ; SKYLAKE-NEXT: vpmaxsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3601 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
3603 ; SKX-LABEL: test_pmaxsb:
3605 ; SKX-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3606 ; SKX-NEXT: vpmaxsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3607 ; SKX-NEXT: retq # sched: [7:1.00]
3609 ; ZNVER1-LABEL: test_pmaxsb:
3611 ; ZNVER1-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
3612 ; ZNVER1-NEXT: vpmaxsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3613 ; ZNVER1-NEXT: retq # sched: [1:0.50]
3614 %1 = call <32 x i8> @llvm.x86.avx2.pmaxs.b(<32 x i8> %a0, <32 x i8> %a1)
3615 %2 = load <32 x i8>, <32 x i8> *%a2, align 32
3616 %3 = call <32 x i8> @llvm.x86.avx2.pmaxs.b(<32 x i8> %1, <32 x i8> %2)
3619 declare <32 x i8> @llvm.x86.avx2.pmaxs.b(<32 x i8>, <32 x i8>) nounwind readnone
3621 define <8 x i32> @test_pmaxsd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
3622 ; GENERIC-LABEL: test_pmaxsd:
3624 ; GENERIC-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
3625 ; GENERIC-NEXT: vpmaxsd (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
3626 ; GENERIC-NEXT: retq # sched: [1:1.00]
3628 ; HASWELL-LABEL: test_pmaxsd:
3630 ; HASWELL-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3631 ; HASWELL-NEXT: vpmaxsd (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
3632 ; HASWELL-NEXT: retq # sched: [2:1.00]
3634 ; BROADWELL-LABEL: test_pmaxsd:
3635 ; BROADWELL: # BB#0:
3636 ; BROADWELL-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3637 ; BROADWELL-NEXT: vpmaxsd (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
3638 ; BROADWELL-NEXT: retq # sched: [7:1.00]
3640 ; SKYLAKE-LABEL: test_pmaxsd:
3642 ; SKYLAKE-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3643 ; SKYLAKE-NEXT: vpmaxsd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3644 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
3646 ; SKX-LABEL: test_pmaxsd:
3648 ; SKX-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3649 ; SKX-NEXT: vpmaxsd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3650 ; SKX-NEXT: retq # sched: [7:1.00]
3652 ; ZNVER1-LABEL: test_pmaxsd:
3654 ; ZNVER1-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
3655 ; ZNVER1-NEXT: vpmaxsd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3656 ; ZNVER1-NEXT: retq # sched: [1:0.50]
3657 %1 = call <8 x i32> @llvm.x86.avx2.pmaxs.d(<8 x i32> %a0, <8 x i32> %a1)
3658 %2 = load <8 x i32>, <8 x i32> *%a2, align 32
3659 %3 = call <8 x i32> @llvm.x86.avx2.pmaxs.d(<8 x i32> %1, <8 x i32> %2)
3662 declare <8 x i32> @llvm.x86.avx2.pmaxs.d(<8 x i32>, <8 x i32>) nounwind readnone
3664 define <16 x i16> @test_pmaxsw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
3665 ; GENERIC-LABEL: test_pmaxsw:
3667 ; GENERIC-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
3668 ; GENERIC-NEXT: vpmaxsw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
3669 ; GENERIC-NEXT: retq # sched: [1:1.00]
3671 ; HASWELL-LABEL: test_pmaxsw:
3673 ; HASWELL-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3674 ; HASWELL-NEXT: vpmaxsw (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
3675 ; HASWELL-NEXT: retq # sched: [2:1.00]
3677 ; BROADWELL-LABEL: test_pmaxsw:
3678 ; BROADWELL: # BB#0:
3679 ; BROADWELL-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3680 ; BROADWELL-NEXT: vpmaxsw (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
3681 ; BROADWELL-NEXT: retq # sched: [7:1.00]
3683 ; SKYLAKE-LABEL: test_pmaxsw:
3685 ; SKYLAKE-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3686 ; SKYLAKE-NEXT: vpmaxsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3687 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
3689 ; SKX-LABEL: test_pmaxsw:
3691 ; SKX-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3692 ; SKX-NEXT: vpmaxsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3693 ; SKX-NEXT: retq # sched: [7:1.00]
3695 ; ZNVER1-LABEL: test_pmaxsw:
3697 ; ZNVER1-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
3698 ; ZNVER1-NEXT: vpmaxsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3699 ; ZNVER1-NEXT: retq # sched: [1:0.50]
3700 %1 = call <16 x i16> @llvm.x86.avx2.pmaxs.w(<16 x i16> %a0, <16 x i16> %a1)
3701 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
3702 %3 = call <16 x i16> @llvm.x86.avx2.pmaxs.w(<16 x i16> %1, <16 x i16> %2)
3705 declare <16 x i16> @llvm.x86.avx2.pmaxs.w(<16 x i16>, <16 x i16>) nounwind readnone
3707 define <32 x i8> @test_pmaxub(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
3708 ; GENERIC-LABEL: test_pmaxub:
3710 ; GENERIC-NEXT: vpmaxub %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
3711 ; GENERIC-NEXT: vpmaxub (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
3712 ; GENERIC-NEXT: retq # sched: [1:1.00]
3714 ; HASWELL-LABEL: test_pmaxub:
3716 ; HASWELL-NEXT: vpmaxub %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3717 ; HASWELL-NEXT: vpmaxub (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
3718 ; HASWELL-NEXT: retq # sched: [2:1.00]
3720 ; BROADWELL-LABEL: test_pmaxub:
3721 ; BROADWELL: # BB#0:
3722 ; BROADWELL-NEXT: vpmaxub %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3723 ; BROADWELL-NEXT: vpmaxub (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
3724 ; BROADWELL-NEXT: retq # sched: [7:1.00]
3726 ; SKYLAKE-LABEL: test_pmaxub:
3728 ; SKYLAKE-NEXT: vpmaxub %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3729 ; SKYLAKE-NEXT: vpmaxub (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3730 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
3732 ; SKX-LABEL: test_pmaxub:
3734 ; SKX-NEXT: vpmaxub %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3735 ; SKX-NEXT: vpmaxub (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3736 ; SKX-NEXT: retq # sched: [7:1.00]
3738 ; ZNVER1-LABEL: test_pmaxub:
3740 ; ZNVER1-NEXT: vpmaxub %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
3741 ; ZNVER1-NEXT: vpmaxub (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3742 ; ZNVER1-NEXT: retq # sched: [1:0.50]
3743 %1 = call <32 x i8> @llvm.x86.avx2.pmaxu.b(<32 x i8> %a0, <32 x i8> %a1)
3744 %2 = load <32 x i8>, <32 x i8> *%a2, align 32
3745 %3 = call <32 x i8> @llvm.x86.avx2.pmaxu.b(<32 x i8> %1, <32 x i8> %2)
3748 declare <32 x i8> @llvm.x86.avx2.pmaxu.b(<32 x i8>, <32 x i8>) nounwind readnone
3750 define <8 x i32> @test_pmaxud(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
3751 ; GENERIC-LABEL: test_pmaxud:
3753 ; GENERIC-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
3754 ; GENERIC-NEXT: vpmaxud (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
3755 ; GENERIC-NEXT: retq # sched: [1:1.00]
3757 ; HASWELL-LABEL: test_pmaxud:
3759 ; HASWELL-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3760 ; HASWELL-NEXT: vpmaxud (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
3761 ; HASWELL-NEXT: retq # sched: [2:1.00]
3763 ; BROADWELL-LABEL: test_pmaxud:
3764 ; BROADWELL: # BB#0:
3765 ; BROADWELL-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3766 ; BROADWELL-NEXT: vpmaxud (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
3767 ; BROADWELL-NEXT: retq # sched: [7:1.00]
3769 ; SKYLAKE-LABEL: test_pmaxud:
3771 ; SKYLAKE-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3772 ; SKYLAKE-NEXT: vpmaxud (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3773 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
3775 ; SKX-LABEL: test_pmaxud:
3777 ; SKX-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3778 ; SKX-NEXT: vpmaxud (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3779 ; SKX-NEXT: retq # sched: [7:1.00]
3781 ; ZNVER1-LABEL: test_pmaxud:
3783 ; ZNVER1-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
3784 ; ZNVER1-NEXT: vpmaxud (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3785 ; ZNVER1-NEXT: retq # sched: [1:0.50]
3786 %1 = call <8 x i32> @llvm.x86.avx2.pmaxu.d(<8 x i32> %a0, <8 x i32> %a1)
3787 %2 = load <8 x i32>, <8 x i32> *%a2, align 32
3788 %3 = call <8 x i32> @llvm.x86.avx2.pmaxu.d(<8 x i32> %1, <8 x i32> %2)
3791 declare <8 x i32> @llvm.x86.avx2.pmaxu.d(<8 x i32>, <8 x i32>) nounwind readnone
3793 define <16 x i16> @test_pmaxuw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
3794 ; GENERIC-LABEL: test_pmaxuw:
3796 ; GENERIC-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
3797 ; GENERIC-NEXT: vpmaxuw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
3798 ; GENERIC-NEXT: retq # sched: [1:1.00]
3800 ; HASWELL-LABEL: test_pmaxuw:
3802 ; HASWELL-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3803 ; HASWELL-NEXT: vpmaxuw (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
3804 ; HASWELL-NEXT: retq # sched: [2:1.00]
3806 ; BROADWELL-LABEL: test_pmaxuw:
3807 ; BROADWELL: # BB#0:
3808 ; BROADWELL-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3809 ; BROADWELL-NEXT: vpmaxuw (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
3810 ; BROADWELL-NEXT: retq # sched: [7:1.00]
3812 ; SKYLAKE-LABEL: test_pmaxuw:
3814 ; SKYLAKE-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3815 ; SKYLAKE-NEXT: vpmaxuw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3816 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
3818 ; SKX-LABEL: test_pmaxuw:
3820 ; SKX-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3821 ; SKX-NEXT: vpmaxuw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3822 ; SKX-NEXT: retq # sched: [7:1.00]
3824 ; ZNVER1-LABEL: test_pmaxuw:
3826 ; ZNVER1-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
3827 ; ZNVER1-NEXT: vpmaxuw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3828 ; ZNVER1-NEXT: retq # sched: [1:0.50]
3829 %1 = call <16 x i16> @llvm.x86.avx2.pmaxu.w(<16 x i16> %a0, <16 x i16> %a1)
3830 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
3831 %3 = call <16 x i16> @llvm.x86.avx2.pmaxu.w(<16 x i16> %1, <16 x i16> %2)
3834 declare <16 x i16> @llvm.x86.avx2.pmaxu.w(<16 x i16>, <16 x i16>) nounwind readnone
3836 define <32 x i8> @test_pminsb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
3837 ; GENERIC-LABEL: test_pminsb:
3839 ; GENERIC-NEXT: vpminsb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
3840 ; GENERIC-NEXT: vpminsb (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
3841 ; GENERIC-NEXT: retq # sched: [1:1.00]
3843 ; HASWELL-LABEL: test_pminsb:
3845 ; HASWELL-NEXT: vpminsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3846 ; HASWELL-NEXT: vpminsb (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
3847 ; HASWELL-NEXT: retq # sched: [2:1.00]
3849 ; BROADWELL-LABEL: test_pminsb:
3850 ; BROADWELL: # BB#0:
3851 ; BROADWELL-NEXT: vpminsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3852 ; BROADWELL-NEXT: vpminsb (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
3853 ; BROADWELL-NEXT: retq # sched: [7:1.00]
3855 ; SKYLAKE-LABEL: test_pminsb:
3857 ; SKYLAKE-NEXT: vpminsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3858 ; SKYLAKE-NEXT: vpminsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3859 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
3861 ; SKX-LABEL: test_pminsb:
3863 ; SKX-NEXT: vpminsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3864 ; SKX-NEXT: vpminsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3865 ; SKX-NEXT: retq # sched: [7:1.00]
3867 ; ZNVER1-LABEL: test_pminsb:
3869 ; ZNVER1-NEXT: vpminsb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
3870 ; ZNVER1-NEXT: vpminsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3871 ; ZNVER1-NEXT: retq # sched: [1:0.50]
3872 %1 = call <32 x i8> @llvm.x86.avx2.pmins.b(<32 x i8> %a0, <32 x i8> %a1)
3873 %2 = load <32 x i8>, <32 x i8> *%a2, align 32
3874 %3 = call <32 x i8> @llvm.x86.avx2.pmins.b(<32 x i8> %1, <32 x i8> %2)
3877 declare <32 x i8> @llvm.x86.avx2.pmins.b(<32 x i8>, <32 x i8>) nounwind readnone
3879 define <8 x i32> @test_pminsd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
3880 ; GENERIC-LABEL: test_pminsd:
3882 ; GENERIC-NEXT: vpminsd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
3883 ; GENERIC-NEXT: vpminsd (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
3884 ; GENERIC-NEXT: retq # sched: [1:1.00]
3886 ; HASWELL-LABEL: test_pminsd:
3888 ; HASWELL-NEXT: vpminsd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3889 ; HASWELL-NEXT: vpminsd (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
3890 ; HASWELL-NEXT: retq # sched: [2:1.00]
3892 ; BROADWELL-LABEL: test_pminsd:
3893 ; BROADWELL: # BB#0:
3894 ; BROADWELL-NEXT: vpminsd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3895 ; BROADWELL-NEXT: vpminsd (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
3896 ; BROADWELL-NEXT: retq # sched: [7:1.00]
3898 ; SKYLAKE-LABEL: test_pminsd:
3900 ; SKYLAKE-NEXT: vpminsd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3901 ; SKYLAKE-NEXT: vpminsd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3902 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
3904 ; SKX-LABEL: test_pminsd:
3906 ; SKX-NEXT: vpminsd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3907 ; SKX-NEXT: vpminsd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3908 ; SKX-NEXT: retq # sched: [7:1.00]
3910 ; ZNVER1-LABEL: test_pminsd:
3912 ; ZNVER1-NEXT: vpminsd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
3913 ; ZNVER1-NEXT: vpminsd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3914 ; ZNVER1-NEXT: retq # sched: [1:0.50]
3915 %1 = call <8 x i32> @llvm.x86.avx2.pmins.d(<8 x i32> %a0, <8 x i32> %a1)
3916 %2 = load <8 x i32>, <8 x i32> *%a2, align 32
3917 %3 = call <8 x i32> @llvm.x86.avx2.pmins.d(<8 x i32> %1, <8 x i32> %2)
3920 declare <8 x i32> @llvm.x86.avx2.pmins.d(<8 x i32>, <8 x i32>) nounwind readnone
3922 define <16 x i16> @test_pminsw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
3923 ; GENERIC-LABEL: test_pminsw:
3925 ; GENERIC-NEXT: vpminsw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
3926 ; GENERIC-NEXT: vpminsw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
3927 ; GENERIC-NEXT: retq # sched: [1:1.00]
3929 ; HASWELL-LABEL: test_pminsw:
3931 ; HASWELL-NEXT: vpminsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3932 ; HASWELL-NEXT: vpminsw (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
3933 ; HASWELL-NEXT: retq # sched: [2:1.00]
3935 ; BROADWELL-LABEL: test_pminsw:
3936 ; BROADWELL: # BB#0:
3937 ; BROADWELL-NEXT: vpminsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3938 ; BROADWELL-NEXT: vpminsw (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
3939 ; BROADWELL-NEXT: retq # sched: [7:1.00]
3941 ; SKYLAKE-LABEL: test_pminsw:
3943 ; SKYLAKE-NEXT: vpminsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3944 ; SKYLAKE-NEXT: vpminsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3945 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
3947 ; SKX-LABEL: test_pminsw:
3949 ; SKX-NEXT: vpminsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3950 ; SKX-NEXT: vpminsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3951 ; SKX-NEXT: retq # sched: [7:1.00]
3953 ; ZNVER1-LABEL: test_pminsw:
3955 ; ZNVER1-NEXT: vpminsw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
3956 ; ZNVER1-NEXT: vpminsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3957 ; ZNVER1-NEXT: retq # sched: [1:0.50]
3958 %1 = call <16 x i16> @llvm.x86.avx2.pmins.w(<16 x i16> %a0, <16 x i16> %a1)
3959 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
3960 %3 = call <16 x i16> @llvm.x86.avx2.pmins.w(<16 x i16> %1, <16 x i16> %2)
3963 declare <16 x i16> @llvm.x86.avx2.pmins.w(<16 x i16>, <16 x i16>) nounwind readnone
3965 define <32 x i8> @test_pminub(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
3966 ; GENERIC-LABEL: test_pminub:
3968 ; GENERIC-NEXT: vpminub %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
3969 ; GENERIC-NEXT: vpminub (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
3970 ; GENERIC-NEXT: retq # sched: [1:1.00]
3972 ; HASWELL-LABEL: test_pminub:
3974 ; HASWELL-NEXT: vpminub %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3975 ; HASWELL-NEXT: vpminub (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
3976 ; HASWELL-NEXT: retq # sched: [2:1.00]
3978 ; BROADWELL-LABEL: test_pminub:
3979 ; BROADWELL: # BB#0:
3980 ; BROADWELL-NEXT: vpminub %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3981 ; BROADWELL-NEXT: vpminub (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
3982 ; BROADWELL-NEXT: retq # sched: [7:1.00]
3984 ; SKYLAKE-LABEL: test_pminub:
3986 ; SKYLAKE-NEXT: vpminub %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3987 ; SKYLAKE-NEXT: vpminub (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3988 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
3990 ; SKX-LABEL: test_pminub:
3992 ; SKX-NEXT: vpminub %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3993 ; SKX-NEXT: vpminub (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3994 ; SKX-NEXT: retq # sched: [7:1.00]
3996 ; ZNVER1-LABEL: test_pminub:
3998 ; ZNVER1-NEXT: vpminub %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
3999 ; ZNVER1-NEXT: vpminub (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
4000 ; ZNVER1-NEXT: retq # sched: [1:0.50]
4001 %1 = call <32 x i8> @llvm.x86.avx2.pminu.b(<32 x i8> %a0, <32 x i8> %a1)
4002 %2 = load <32 x i8>, <32 x i8> *%a2, align 32
4003 %3 = call <32 x i8> @llvm.x86.avx2.pminu.b(<32 x i8> %1, <32 x i8> %2)
4006 declare <32 x i8> @llvm.x86.avx2.pminu.b(<32 x i8>, <32 x i8>) nounwind readnone
4008 define <8 x i32> @test_pminud(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
4009 ; GENERIC-LABEL: test_pminud:
4011 ; GENERIC-NEXT: vpminud %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4012 ; GENERIC-NEXT: vpminud (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
4013 ; GENERIC-NEXT: retq # sched: [1:1.00]
4015 ; HASWELL-LABEL: test_pminud:
4017 ; HASWELL-NEXT: vpminud %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4018 ; HASWELL-NEXT: vpminud (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
4019 ; HASWELL-NEXT: retq # sched: [2:1.00]
4021 ; BROADWELL-LABEL: test_pminud:
4022 ; BROADWELL: # BB#0:
4023 ; BROADWELL-NEXT: vpminud %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4024 ; BROADWELL-NEXT: vpminud (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
4025 ; BROADWELL-NEXT: retq # sched: [7:1.00]
4027 ; SKYLAKE-LABEL: test_pminud:
4029 ; SKYLAKE-NEXT: vpminud %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4030 ; SKYLAKE-NEXT: vpminud (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
4031 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
4033 ; SKX-LABEL: test_pminud:
4035 ; SKX-NEXT: vpminud %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4036 ; SKX-NEXT: vpminud (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
4037 ; SKX-NEXT: retq # sched: [7:1.00]
4039 ; ZNVER1-LABEL: test_pminud:
4041 ; ZNVER1-NEXT: vpminud %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
4042 ; ZNVER1-NEXT: vpminud (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
4043 ; ZNVER1-NEXT: retq # sched: [1:0.50]
4044 %1 = call <8 x i32> @llvm.x86.avx2.pminu.d(<8 x i32> %a0, <8 x i32> %a1)
4045 %2 = load <8 x i32>, <8 x i32> *%a2, align 32
4046 %3 = call <8 x i32> @llvm.x86.avx2.pminu.d(<8 x i32> %1, <8 x i32> %2)
4049 declare <8 x i32> @llvm.x86.avx2.pminu.d(<8 x i32>, <8 x i32>) nounwind readnone
4051 define <16 x i16> @test_pminuw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
4052 ; GENERIC-LABEL: test_pminuw:
4054 ; GENERIC-NEXT: vpminuw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4055 ; GENERIC-NEXT: vpminuw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
4056 ; GENERIC-NEXT: retq # sched: [1:1.00]
4058 ; HASWELL-LABEL: test_pminuw:
4060 ; HASWELL-NEXT: vpminuw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4061 ; HASWELL-NEXT: vpminuw (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
4062 ; HASWELL-NEXT: retq # sched: [2:1.00]
4064 ; BROADWELL-LABEL: test_pminuw:
4065 ; BROADWELL: # BB#0:
4066 ; BROADWELL-NEXT: vpminuw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4067 ; BROADWELL-NEXT: vpminuw (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
4068 ; BROADWELL-NEXT: retq # sched: [7:1.00]
4070 ; SKYLAKE-LABEL: test_pminuw:
4072 ; SKYLAKE-NEXT: vpminuw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4073 ; SKYLAKE-NEXT: vpminuw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
4074 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
4076 ; SKX-LABEL: test_pminuw:
4078 ; SKX-NEXT: vpminuw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4079 ; SKX-NEXT: vpminuw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
4080 ; SKX-NEXT: retq # sched: [7:1.00]
4082 ; ZNVER1-LABEL: test_pminuw:
4084 ; ZNVER1-NEXT: vpminuw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
4085 ; ZNVER1-NEXT: vpminuw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
4086 ; ZNVER1-NEXT: retq # sched: [1:0.50]
4087 %1 = call <16 x i16> @llvm.x86.avx2.pminu.w(<16 x i16> %a0, <16 x i16> %a1)
4088 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
4089 %3 = call <16 x i16> @llvm.x86.avx2.pminu.w(<16 x i16> %1, <16 x i16> %2)
4092 declare <16 x i16> @llvm.x86.avx2.pminu.w(<16 x i16>, <16 x i16>) nounwind readnone
4094 define i32 @test_pmovmskb(<32 x i8> %a0) {
4095 ; GENERIC-LABEL: test_pmovmskb:
4097 ; GENERIC-NEXT: vpmovmskb %ymm0, %eax # sched: [1:1.00]
4098 ; GENERIC-NEXT: vzeroupper
4099 ; GENERIC-NEXT: retq # sched: [1:1.00]
4101 ; HASWELL-LABEL: test_pmovmskb:
4103 ; HASWELL-NEXT: vpmovmskb %ymm0, %eax # sched: [3:1.00]
4104 ; HASWELL-NEXT: vzeroupper # sched: [4:1.00]
4105 ; HASWELL-NEXT: retq # sched: [2:1.00]
4107 ; BROADWELL-LABEL: test_pmovmskb:
4108 ; BROADWELL: # BB#0:
4109 ; BROADWELL-NEXT: vpmovmskb %ymm0, %eax # sched: [3:1.00]
4110 ; BROADWELL-NEXT: vzeroupper # sched: [4:1.00]
4111 ; BROADWELL-NEXT: retq # sched: [7:1.00]
4113 ; SKYLAKE-LABEL: test_pmovmskb:
4115 ; SKYLAKE-NEXT: vpmovmskb %ymm0, %eax # sched: [2:1.00]
4116 ; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00]
4117 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
4119 ; SKX-LABEL: test_pmovmskb:
4121 ; SKX-NEXT: vpmovmskb %ymm0, %eax # sched: [2:1.00]
4122 ; SKX-NEXT: vzeroupper # sched: [4:1.00]
4123 ; SKX-NEXT: retq # sched: [7:1.00]
4125 ; ZNVER1-LABEL: test_pmovmskb:
4127 ; ZNVER1-NEXT: vpmovmskb %ymm0, %eax # sched: [2:1.00]
4128 ; ZNVER1-NEXT: vzeroupper # sched: [100:?]
4129 ; ZNVER1-NEXT: retq # sched: [1:0.50]
4130 %1 = call i32 @llvm.x86.avx2.pmovmskb(<32 x i8> %a0)
4133 declare i32 @llvm.x86.avx2.pmovmskb(<32 x i8>) nounwind readnone
4135 define <8 x i32> @test_pmovsxbd(<16 x i8> %a0, <16 x i8> *%a1) {
4136 ; GENERIC-LABEL: test_pmovsxbd:
4138 ; GENERIC-NEXT: vpmovsxbd %xmm0, %ymm0 # sched: [1:1.00]
4139 ; GENERIC-NEXT: vpmovsxbd (%rdi), %ymm1 # sched: [5:1.00]
4140 ; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4141 ; GENERIC-NEXT: retq # sched: [1:1.00]
4143 ; HASWELL-LABEL: test_pmovsxbd:
4145 ; HASWELL-NEXT: vpmovsxbd %xmm0, %ymm0 # sched: [3:1.00]
4146 ; HASWELL-NEXT: vpmovsxbd (%rdi), %ymm1 # sched: [3:1.00]
4147 ; HASWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4148 ; HASWELL-NEXT: retq # sched: [2:1.00]
4150 ; BROADWELL-LABEL: test_pmovsxbd:
4151 ; BROADWELL: # BB#0:
4152 ; BROADWELL-NEXT: vpmovsxbd %xmm0, %ymm0 # sched: [3:1.00]
4153 ; BROADWELL-NEXT: vpmovsxbd (%rdi), %ymm1 # sched: [8:1.00]
4154 ; BROADWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4155 ; BROADWELL-NEXT: retq # sched: [7:1.00]
4157 ; SKYLAKE-LABEL: test_pmovsxbd:
4159 ; SKYLAKE-NEXT: vpmovsxbd %xmm0, %ymm0 # sched: [3:1.00]
4160 ; SKYLAKE-NEXT: vpmovsxbd (%rdi), %ymm1 # sched: [8:1.00]
4161 ; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4162 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
4164 ; SKX-LABEL: test_pmovsxbd:
4166 ; SKX-NEXT: vpmovsxbd %xmm0, %ymm0 # sched: [3:1.00]
4167 ; SKX-NEXT: vpmovsxbd (%rdi), %ymm1 # sched: [8:1.00]
4168 ; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4169 ; SKX-NEXT: retq # sched: [7:1.00]
4171 ; ZNVER1-LABEL: test_pmovsxbd:
4173 ; ZNVER1-NEXT: vpmovsxbd (%rdi), %ymm1 # sched: [8:0.50]
4174 ; ZNVER1-NEXT: vpmovsxbd %xmm0, %ymm0 # sched: [1:0.25]
4175 ; ZNVER1-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
4176 ; ZNVER1-NEXT: retq # sched: [1:0.50]
4177 %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
4178 %2 = sext <8 x i8> %1 to <8 x i32>
4179 %3 = load <16 x i8>, <16 x i8> *%a1, align 16
4180 %4 = shufflevector <16 x i8> %3, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
4181 %5 = sext <8 x i8> %4 to <8 x i32>
4182 %6 = add <8 x i32> %2, %5
4186 define <4 x i64> @test_pmovsxbq(<16 x i8> %a0, <16 x i8> *%a1) {
4187 ; GENERIC-LABEL: test_pmovsxbq:
4189 ; GENERIC-NEXT: vpmovsxbq %xmm0, %ymm0 # sched: [1:1.00]
4190 ; GENERIC-NEXT: vpmovsxbq (%rdi), %ymm1 # sched: [5:1.00]
4191 ; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4192 ; GENERIC-NEXT: retq # sched: [1:1.00]
4194 ; HASWELL-LABEL: test_pmovsxbq:
4196 ; HASWELL-NEXT: vpmovsxbq %xmm0, %ymm0 # sched: [3:1.00]
4197 ; HASWELL-NEXT: vpmovsxbq (%rdi), %ymm1 # sched: [3:1.00]
4198 ; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4199 ; HASWELL-NEXT: retq # sched: [2:1.00]
4201 ; BROADWELL-LABEL: test_pmovsxbq:
4202 ; BROADWELL: # BB#0:
4203 ; BROADWELL-NEXT: vpmovsxbq %xmm0, %ymm0 # sched: [3:1.00]
4204 ; BROADWELL-NEXT: vpmovsxbq (%rdi), %ymm1 # sched: [8:1.00]
4205 ; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4206 ; BROADWELL-NEXT: retq # sched: [7:1.00]
4208 ; SKYLAKE-LABEL: test_pmovsxbq:
4210 ; SKYLAKE-NEXT: vpmovsxbq %xmm0, %ymm0 # sched: [3:1.00]
4211 ; SKYLAKE-NEXT: vpmovsxbq (%rdi), %ymm1 # sched: [8:1.00]
4212 ; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4213 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
4215 ; SKX-LABEL: test_pmovsxbq:
4217 ; SKX-NEXT: vpmovsxbq %xmm0, %ymm0 # sched: [3:1.00]
4218 ; SKX-NEXT: vpmovsxbq (%rdi), %ymm1 # sched: [8:1.00]
4219 ; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4220 ; SKX-NEXT: retq # sched: [7:1.00]
4222 ; ZNVER1-LABEL: test_pmovsxbq:
4224 ; ZNVER1-NEXT: vpmovsxbq (%rdi), %ymm1 # sched: [8:0.50]
4225 ; ZNVER1-NEXT: vpmovsxbq %xmm0, %ymm0 # sched: [1:0.50]
4226 ; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
4227 ; ZNVER1-NEXT: retq # sched: [1:0.50]
4228 %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4229 %2 = sext <4 x i8> %1 to <4 x i64>
4230 %3 = load <16 x i8>, <16 x i8> *%a1, align 16
4231 %4 = shufflevector <16 x i8> %3, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4232 %5 = sext <4 x i8> %4 to <4 x i64>
4233 %6 = add <4 x i64> %2, %5
4237 define <16 x i16> @test_pmovsxbw(<16 x i8> %a0, <16 x i8> *%a1) {
4238 ; GENERIC-LABEL: test_pmovsxbw:
4240 ; GENERIC-NEXT: vpmovsxbw %xmm0, %ymm0 # sched: [1:1.00]
4241 ; GENERIC-NEXT: vpmovsxbw (%rdi), %ymm1 # sched: [5:1.00]
4242 ; GENERIC-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4243 ; GENERIC-NEXT: retq # sched: [1:1.00]
4245 ; HASWELL-LABEL: test_pmovsxbw:
4247 ; HASWELL-NEXT: vpmovsxbw %xmm0, %ymm0 # sched: [3:1.00]
4248 ; HASWELL-NEXT: vpmovsxbw (%rdi), %ymm1 # sched: [3:1.00]
4249 ; HASWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4250 ; HASWELL-NEXT: retq # sched: [2:1.00]
4252 ; BROADWELL-LABEL: test_pmovsxbw:
4253 ; BROADWELL: # BB#0:
4254 ; BROADWELL-NEXT: vpmovsxbw %xmm0, %ymm0 # sched: [3:1.00]
4255 ; BROADWELL-NEXT: vpmovsxbw (%rdi), %ymm1 # sched: [8:1.00]
4256 ; BROADWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4257 ; BROADWELL-NEXT: retq # sched: [7:1.00]
4259 ; SKYLAKE-LABEL: test_pmovsxbw:
4261 ; SKYLAKE-NEXT: vpmovsxbw %xmm0, %ymm0 # sched: [3:1.00]
4262 ; SKYLAKE-NEXT: vpmovsxbw (%rdi), %ymm1 # sched: [9:1.00]
4263 ; SKYLAKE-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4264 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
4266 ; SKX-LABEL: test_pmovsxbw:
4268 ; SKX-NEXT: vpmovsxbw %xmm0, %ymm0 # sched: [3:1.00]
4269 ; SKX-NEXT: vpmovsxbw (%rdi), %ymm1 # sched: [9:1.00]
4270 ; SKX-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4271 ; SKX-NEXT: retq # sched: [7:1.00]
4273 ; ZNVER1-LABEL: test_pmovsxbw:
4275 ; ZNVER1-NEXT: vpmovsxbw (%rdi), %ymm1 # sched: [8:0.50]
4276 ; ZNVER1-NEXT: vpmovsxbw %xmm0, %ymm0 # sched: [1:0.50]
4277 ; ZNVER1-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
4278 ; ZNVER1-NEXT: retq # sched: [1:0.50]
4279 %1 = sext <16 x i8> %a0 to <16 x i16>
4280 %2 = load <16 x i8>, <16 x i8> *%a1, align 16
4281 %3 = sext <16 x i8> %2 to <16 x i16>
4282 %4 = add <16 x i16> %1, %3
4286 define <4 x i64> @test_pmovsxdq(<4 x i32> %a0, <4 x i32> *%a1) {
4287 ; GENERIC-LABEL: test_pmovsxdq:
4289 ; GENERIC-NEXT: vpmovsxdq %xmm0, %ymm0 # sched: [1:1.00]
4290 ; GENERIC-NEXT: vpmovsxdq (%rdi), %ymm1 # sched: [5:1.00]
4291 ; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4292 ; GENERIC-NEXT: retq # sched: [1:1.00]
4294 ; HASWELL-LABEL: test_pmovsxdq:
4296 ; HASWELL-NEXT: vpmovsxdq %xmm0, %ymm0 # sched: [3:1.00]
4297 ; HASWELL-NEXT: vpmovsxdq (%rdi), %ymm1 # sched: [3:1.00]
4298 ; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4299 ; HASWELL-NEXT: retq # sched: [2:1.00]
4301 ; BROADWELL-LABEL: test_pmovsxdq:
4302 ; BROADWELL: # BB#0:
4303 ; BROADWELL-NEXT: vpmovsxdq %xmm0, %ymm0 # sched: [3:1.00]
4304 ; BROADWELL-NEXT: vpmovsxdq (%rdi), %ymm1 # sched: [8:1.00]
4305 ; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4306 ; BROADWELL-NEXT: retq # sched: [7:1.00]
4308 ; SKYLAKE-LABEL: test_pmovsxdq:
4310 ; SKYLAKE-NEXT: vpmovsxdq %xmm0, %ymm0 # sched: [3:1.00]
4311 ; SKYLAKE-NEXT: vpmovsxdq (%rdi), %ymm1 # sched: [9:1.00]
4312 ; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4313 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
4315 ; SKX-LABEL: test_pmovsxdq:
4317 ; SKX-NEXT: vpmovsxdq %xmm0, %ymm0 # sched: [3:1.00]
4318 ; SKX-NEXT: vpmovsxdq (%rdi), %ymm1 # sched: [9:1.00]
4319 ; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4320 ; SKX-NEXT: retq # sched: [7:1.00]
4322 ; ZNVER1-LABEL: test_pmovsxdq:
4324 ; ZNVER1-NEXT: vpmovsxdq (%rdi), %ymm1 # sched: [8:0.50]
4325 ; ZNVER1-NEXT: vpmovsxdq %xmm0, %ymm0 # sched: [1:0.50]
4326 ; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
4327 ; ZNVER1-NEXT: retq # sched: [1:0.50]
4328 %1 = sext <4 x i32> %a0 to <4 x i64>
4329 %2 = load <4 x i32>, <4 x i32> *%a1, align 16
4330 %3 = sext <4 x i32> %2 to <4 x i64>
4331 %4 = add <4 x i64> %1, %3
4335 define <8 x i32> @test_pmovsxwd(<8 x i16> %a0, <8 x i16> *%a1) {
4336 ; GENERIC-LABEL: test_pmovsxwd:
4338 ; GENERIC-NEXT: vpmovsxwd %xmm0, %ymm0 # sched: [1:1.00]
4339 ; GENERIC-NEXT: vpmovsxwd (%rdi), %ymm1 # sched: [5:1.00]
4340 ; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4341 ; GENERIC-NEXT: retq # sched: [1:1.00]
4343 ; HASWELL-LABEL: test_pmovsxwd:
4345 ; HASWELL-NEXT: vpmovsxwd %xmm0, %ymm0 # sched: [3:1.00]
4346 ; HASWELL-NEXT: vpmovsxwd (%rdi), %ymm1 # sched: [3:1.00]
4347 ; HASWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4348 ; HASWELL-NEXT: retq # sched: [2:1.00]
4350 ; BROADWELL-LABEL: test_pmovsxwd:
4351 ; BROADWELL: # BB#0:
4352 ; BROADWELL-NEXT: vpmovsxwd %xmm0, %ymm0 # sched: [3:1.00]
4353 ; BROADWELL-NEXT: vpmovsxwd (%rdi), %ymm1 # sched: [8:1.00]
4354 ; BROADWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4355 ; BROADWELL-NEXT: retq # sched: [7:1.00]
4357 ; SKYLAKE-LABEL: test_pmovsxwd:
4359 ; SKYLAKE-NEXT: vpmovsxwd %xmm0, %ymm0 # sched: [3:1.00]
4360 ; SKYLAKE-NEXT: vpmovsxwd (%rdi), %ymm1 # sched: [9:1.00]
4361 ; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4362 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
4364 ; SKX-LABEL: test_pmovsxwd:
4366 ; SKX-NEXT: vpmovsxwd %xmm0, %ymm0 # sched: [3:1.00]
4367 ; SKX-NEXT: vpmovsxwd (%rdi), %ymm1 # sched: [9:1.00]
4368 ; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4369 ; SKX-NEXT: retq # sched: [7:1.00]
4371 ; ZNVER1-LABEL: test_pmovsxwd:
4373 ; ZNVER1-NEXT: vpmovsxwd (%rdi), %ymm1 # sched: [8:0.50]
4374 ; ZNVER1-NEXT: vpmovsxwd %xmm0, %ymm0 # sched: [1:0.25]
4375 ; ZNVER1-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
4376 ; ZNVER1-NEXT: retq # sched: [1:0.50]
4377 %1 = sext <8 x i16> %a0 to <8 x i32>
4378 %2 = load <8 x i16>, <8 x i16> *%a1, align 16
4379 %3 = sext <8 x i16> %2 to <8 x i32>
4380 %4 = add <8 x i32> %1, %3
4384 define <4 x i64> @test_pmovsxwq(<8 x i16> %a0, <8 x i16> *%a1) {
4385 ; GENERIC-LABEL: test_pmovsxwq:
4387 ; GENERIC-NEXT: vpmovsxwq %xmm0, %ymm0 # sched: [1:1.00]
4388 ; GENERIC-NEXT: vpmovsxwq (%rdi), %ymm1 # sched: [5:1.00]
4389 ; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4390 ; GENERIC-NEXT: retq # sched: [1:1.00]
4392 ; HASWELL-LABEL: test_pmovsxwq:
4394 ; HASWELL-NEXT: vpmovsxwq %xmm0, %ymm0 # sched: [3:1.00]
4395 ; HASWELL-NEXT: vpmovsxwq (%rdi), %ymm1 # sched: [3:1.00]
4396 ; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4397 ; HASWELL-NEXT: retq # sched: [2:1.00]
4399 ; BROADWELL-LABEL: test_pmovsxwq:
4400 ; BROADWELL: # BB#0:
4401 ; BROADWELL-NEXT: vpmovsxwq %xmm0, %ymm0 # sched: [3:1.00]
4402 ; BROADWELL-NEXT: vpmovsxwq (%rdi), %ymm1 # sched: [8:1.00]
4403 ; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4404 ; BROADWELL-NEXT: retq # sched: [7:1.00]
4406 ; SKYLAKE-LABEL: test_pmovsxwq:
4408 ; SKYLAKE-NEXT: vpmovsxwq %xmm0, %ymm0 # sched: [3:1.00]
4409 ; SKYLAKE-NEXT: vpmovsxwq (%rdi), %ymm1 # sched: [8:1.00]
4410 ; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4411 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
4413 ; SKX-LABEL: test_pmovsxwq:
4415 ; SKX-NEXT: vpmovsxwq %xmm0, %ymm0 # sched: [3:1.00]
4416 ; SKX-NEXT: vpmovsxwq (%rdi), %ymm1 # sched: [8:1.00]
4417 ; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4418 ; SKX-NEXT: retq # sched: [7:1.00]
4420 ; ZNVER1-LABEL: test_pmovsxwq:
4422 ; ZNVER1-NEXT: vpmovsxwq (%rdi), %ymm1 # sched: [8:0.50]
4423 ; ZNVER1-NEXT: vpmovsxwq %xmm0, %ymm0 # sched: [1:0.25]
4424 ; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
4425 ; ZNVER1-NEXT: retq # sched: [1:0.50]
4426 %1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4427 %2 = sext <4 x i16> %1 to <4 x i64>
4428 %3 = load <8 x i16>, <8 x i16> *%a1, align 16
4429 %4 = shufflevector <8 x i16> %3, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4430 %5 = sext <4 x i16> %4 to <4 x i64>
4431 %6 = add <4 x i64> %2, %5
4435 define <8 x i32> @test_pmovzxbd(<16 x i8> %a0, <16 x i8> *%a1) {
4436 ; GENERIC-LABEL: test_pmovzxbd:
4438 ; GENERIC-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [1:1.00]
4439 ; GENERIC-NEXT: vpmovzxbd {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [5:1.00]
4440 ; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4441 ; GENERIC-NEXT: retq # sched: [1:1.00]
4443 ; HASWELL-LABEL: test_pmovzxbd:
4445 ; HASWELL-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [3:1.00]
4446 ; HASWELL-NEXT: vpmovzxbd {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [3:1.00]
4447 ; HASWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4448 ; HASWELL-NEXT: retq # sched: [2:1.00]
4450 ; BROADWELL-LABEL: test_pmovzxbd:
4451 ; BROADWELL: # BB#0:
4452 ; BROADWELL-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [3:1.00]
4453 ; BROADWELL-NEXT: vpmovzxbd {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [9:1.00]
4454 ; BROADWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4455 ; BROADWELL-NEXT: retq # sched: [7:1.00]
4457 ; SKYLAKE-LABEL: test_pmovzxbd:
4459 ; SKYLAKE-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [3:1.00]
4460 ; SKYLAKE-NEXT: vpmovzxbd {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [10:1.00]
4461 ; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4462 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
4464 ; SKX-LABEL: test_pmovzxbd:
4466 ; SKX-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [3:1.00]
4467 ; SKX-NEXT: vpmovzxbd {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [10:1.00]
4468 ; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4469 ; SKX-NEXT: retq # sched: [7:1.00]
4471 ; ZNVER1-LABEL: test_pmovzxbd:
4473 ; ZNVER1-NEXT: vpmovzxbd {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [8:0.50]
4474 ; ZNVER1-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [1:0.25]
4475 ; ZNVER1-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
4476 ; ZNVER1-NEXT: retq # sched: [1:0.50]
4477 %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
4478 %2 = zext <8 x i8> %1 to <8 x i32>
4479 %3 = load <16 x i8>, <16 x i8> *%a1, align 16
4480 %4 = shufflevector <16 x i8> %3, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
4481 %5 = zext <8 x i8> %4 to <8 x i32>
4482 %6 = add <8 x i32> %2, %5
4486 define <4 x i64> @test_pmovzxbq(<16 x i8> %a0, <16 x i8> *%a1) {
4487 ; GENERIC-LABEL: test_pmovzxbq:
4489 ; GENERIC-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00]
4490 ; GENERIC-NEXT: vpmovzxbq {{.*#+}} ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [5:1.00]
4491 ; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4492 ; GENERIC-NEXT: retq # sched: [1:1.00]
4494 ; HASWELL-LABEL: test_pmovzxbq:
4496 ; HASWELL-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero sched: [3:1.00]
4497 ; HASWELL-NEXT: vpmovzxbq {{.*#+}} ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [3:1.00]
4498 ; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4499 ; HASWELL-NEXT: retq # sched: [2:1.00]
4501 ; BROADWELL-LABEL: test_pmovzxbq:
4502 ; BROADWELL: # BB#0:
4503 ; BROADWELL-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero sched: [3:1.00]
4504 ; BROADWELL-NEXT: vpmovzxbq {{.*#+}} ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [9:1.00]
4505 ; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4506 ; BROADWELL-NEXT: retq # sched: [7:1.00]
4508 ; SKYLAKE-LABEL: test_pmovzxbq:
4510 ; SKYLAKE-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero sched: [3:1.00]
4511 ; SKYLAKE-NEXT: vpmovzxbq {{.*#+}} ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [10:1.00]
4512 ; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4513 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
4515 ; SKX-LABEL: test_pmovzxbq:
4517 ; SKX-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero sched: [3:1.00]
4518 ; SKX-NEXT: vpmovzxbq {{.*#+}} ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [10:1.00]
4519 ; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4520 ; SKX-NEXT: retq # sched: [7:1.00]
4522 ; ZNVER1-LABEL: test_pmovzxbq:
4524 ; ZNVER1-NEXT: vpmovzxbq {{.*#+}} ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [8:0.50]
4525 ; ZNVER1-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero sched: [1:0.50]
4526 ; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
4527 ; ZNVER1-NEXT: retq # sched: [1:0.50]
4528 %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4529 %2 = zext <4 x i8> %1 to <4 x i64>
4530 %3 = load <16 x i8>, <16 x i8> *%a1, align 16
4531 %4 = shufflevector <16 x i8> %3, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4532 %5 = zext <4 x i8> %4 to <4 x i64>
4533 %6 = add <4 x i64> %2, %5
4537 define <16 x i16> @test_pmovzxbw(<16 x i8> %a0, <16 x i8> *%a1) {
4538 ; GENERIC-LABEL: test_pmovzxbw:
4540 ; GENERIC-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [1:1.00]
4541 ; GENERIC-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [5:1.00]
4542 ; GENERIC-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4543 ; GENERIC-NEXT: retq # sched: [1:1.00]
4545 ; HASWELL-LABEL: test_pmovzxbw:
4547 ; HASWELL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [3:1.00]
4548 ; HASWELL-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [3:1.00]
4549 ; HASWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4550 ; HASWELL-NEXT: retq # sched: [2:1.00]
4552 ; BROADWELL-LABEL: test_pmovzxbw:
4553 ; BROADWELL: # BB#0:
4554 ; BROADWELL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [3:1.00]
4555 ; BROADWELL-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [9:1.00]
4556 ; BROADWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4557 ; BROADWELL-NEXT: retq # sched: [7:1.00]
4559 ; SKYLAKE-LABEL: test_pmovzxbw:
4561 ; SKYLAKE-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [3:1.00]
4562 ; SKYLAKE-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [10:1.00]
4563 ; SKYLAKE-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4564 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
4566 ; SKX-LABEL: test_pmovzxbw:
4568 ; SKX-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [3:1.00]
4569 ; SKX-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [10:1.00]
4570 ; SKX-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4571 ; SKX-NEXT: retq # sched: [7:1.00]
4573 ; ZNVER1-LABEL: test_pmovzxbw:
4575 ; ZNVER1-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [8:0.50]
4576 ; ZNVER1-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [1:0.50]
4577 ; ZNVER1-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
4578 ; ZNVER1-NEXT: retq # sched: [1:0.50]
4579 %1 = zext <16 x i8> %a0 to <16 x i16>
4580 %2 = load <16 x i8>, <16 x i8> *%a1, align 16
4581 %3 = zext <16 x i8> %2 to <16 x i16>
4582 %4 = add <16 x i16> %1, %3
4586 define <4 x i64> @test_pmovzxdq(<4 x i32> %a0, <4 x i32> *%a1) {
4587 ; GENERIC-LABEL: test_pmovzxdq:
4589 ; GENERIC-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00]
4590 ; GENERIC-NEXT: vpmovzxdq {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [5:1.00]
4591 ; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4592 ; GENERIC-NEXT: retq # sched: [1:1.00]
4594 ; HASWELL-LABEL: test_pmovzxdq:
4596 ; HASWELL-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [3:1.00]
4597 ; HASWELL-NEXT: vpmovzxdq {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [3:1.00]
4598 ; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4599 ; HASWELL-NEXT: retq # sched: [2:1.00]
4601 ; BROADWELL-LABEL: test_pmovzxdq:
4602 ; BROADWELL: # BB#0:
4603 ; BROADWELL-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [3:1.00]
4604 ; BROADWELL-NEXT: vpmovzxdq {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [9:1.00]
4605 ; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4606 ; BROADWELL-NEXT: retq # sched: [7:1.00]
4608 ; SKYLAKE-LABEL: test_pmovzxdq:
4610 ; SKYLAKE-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [3:1.00]
4611 ; SKYLAKE-NEXT: vpmovzxdq {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [10:1.00]
4612 ; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4613 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
4615 ; SKX-LABEL: test_pmovzxdq:
4617 ; SKX-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [3:1.00]
4618 ; SKX-NEXT: vpmovzxdq {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [10:1.00]
4619 ; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4620 ; SKX-NEXT: retq # sched: [7:1.00]
4622 ; ZNVER1-LABEL: test_pmovzxdq:
4624 ; ZNVER1-NEXT: vpmovzxdq {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [8:0.50]
4625 ; ZNVER1-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:0.50]
4626 ; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
4627 ; ZNVER1-NEXT: retq # sched: [1:0.50]
4628 %1 = zext <4 x i32> %a0 to <4 x i64>
4629 %2 = load <4 x i32>, <4 x i32> *%a1, align 16
4630 %3 = zext <4 x i32> %2 to <4 x i64>
4631 %4 = add <4 x i64> %1, %3
4635 define <8 x i32> @test_pmovzxwd(<8 x i16> %a0, <8 x i16> *%a1) {
4636 ; GENERIC-LABEL: test_pmovzxwd:
4638 ; GENERIC-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00]
4639 ; GENERIC-NEXT: vpmovzxwd {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [5:1.00]
4640 ; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4641 ; GENERIC-NEXT: retq # sched: [1:1.00]
4643 ; HASWELL-LABEL: test_pmovzxwd:
4645 ; HASWELL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00]
4646 ; HASWELL-NEXT: vpmovzxwd {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [3:1.00]
4647 ; HASWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4648 ; HASWELL-NEXT: retq # sched: [2:1.00]
4650 ; BROADWELL-LABEL: test_pmovzxwd:
4651 ; BROADWELL: # BB#0:
4652 ; BROADWELL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00]
4653 ; BROADWELL-NEXT: vpmovzxwd {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [8:1.00]
4654 ; BROADWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4655 ; BROADWELL-NEXT: retq # sched: [7:1.00]
4657 ; SKYLAKE-LABEL: test_pmovzxwd:
4659 ; SKYLAKE-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00]
4660 ; SKYLAKE-NEXT: vpmovzxwd {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [9:1.00]
4661 ; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4662 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
4664 ; SKX-LABEL: test_pmovzxwd:
4666 ; SKX-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00]
4667 ; SKX-NEXT: vpmovzxwd {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [9:1.00]
4668 ; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4669 ; SKX-NEXT: retq # sched: [7:1.00]
4671 ; ZNVER1-LABEL: test_pmovzxwd:
4673 ; ZNVER1-NEXT: vpmovzxwd {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [8:0.50]
4674 ; ZNVER1-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:0.25]
4675 ; ZNVER1-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
4676 ; ZNVER1-NEXT: retq # sched: [1:0.50]
4677 %1 = zext <8 x i16> %a0 to <8 x i32>
4678 %2 = load <8 x i16>, <8 x i16> *%a1, align 16
4679 %3 = zext <8 x i16> %2 to <8 x i32>
4680 %4 = add <8 x i32> %1, %3
4684 define <4 x i64> @test_pmovzxwq(<8 x i16> %a0, <8 x i16> *%a1) {
4685 ; GENERIC-LABEL: test_pmovzxwq:
4687 ; GENERIC-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00]
4688 ; GENERIC-NEXT: vpmovzxwq {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [5:1.00]
4689 ; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4690 ; GENERIC-NEXT: retq # sched: [1:1.00]
4692 ; HASWELL-LABEL: test_pmovzxwq:
4694 ; HASWELL-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [3:1.00]
4695 ; HASWELL-NEXT: vpmovzxwq {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [3:1.00]
4696 ; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4697 ; HASWELL-NEXT: retq # sched: [2:1.00]
4699 ; BROADWELL-LABEL: test_pmovzxwq:
4700 ; BROADWELL: # BB#0:
4701 ; BROADWELL-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [3:1.00]
4702 ; BROADWELL-NEXT: vpmovzxwq {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [9:1.00]
4703 ; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4704 ; BROADWELL-NEXT: retq # sched: [7:1.00]
4706 ; SKYLAKE-LABEL: test_pmovzxwq:
4708 ; SKYLAKE-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [3:1.00]
4709 ; SKYLAKE-NEXT: vpmovzxwq {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [10:1.00]
4710 ; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4711 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
4713 ; SKX-LABEL: test_pmovzxwq:
4715 ; SKX-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [3:1.00]
4716 ; SKX-NEXT: vpmovzxwq {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [10:1.00]
4717 ; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
4718 ; SKX-NEXT: retq # sched: [7:1.00]
4720 ; ZNVER1-LABEL: test_pmovzxwq:
4722 ; ZNVER1-NEXT: vpmovzxwq {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [8:0.50]
4723 ; ZNVER1-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:0.25]
4724 ; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
4725 ; ZNVER1-NEXT: retq # sched: [1:0.50]
4726 %1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4727 %2 = zext <4 x i16> %1 to <4 x i64>
4728 %3 = load <8 x i16>, <8 x i16> *%a1, align 16
4729 %4 = shufflevector <8 x i16> %3, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4730 %5 = zext <4 x i16> %4 to <4 x i64>
4731 %6 = add <4 x i64> %2, %5
4735 define <4 x i64> @test_pmuldq(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
4736 ; GENERIC-LABEL: test_pmuldq:
4738 ; GENERIC-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
4739 ; GENERIC-NEXT: vpmuldq (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
4740 ; GENERIC-NEXT: retq # sched: [1:1.00]
4742 ; HASWELL-LABEL: test_pmuldq:
4744 ; HASWELL-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
4745 ; HASWELL-NEXT: vpmuldq (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
4746 ; HASWELL-NEXT: retq # sched: [2:1.00]
4748 ; BROADWELL-LABEL: test_pmuldq:
4749 ; BROADWELL: # BB#0:
4750 ; BROADWELL-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
4751 ; BROADWELL-NEXT: vpmuldq (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
4752 ; BROADWELL-NEXT: retq # sched: [7:1.00]
4754 ; SKYLAKE-LABEL: test_pmuldq:
4756 ; SKYLAKE-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
4757 ; SKYLAKE-NEXT: vpmuldq (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
4758 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
4760 ; SKX-LABEL: test_pmuldq:
4762 ; SKX-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
4763 ; SKX-NEXT: vpmuldq (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
4764 ; SKX-NEXT: retq # sched: [7:1.00]
4766 ; ZNVER1-LABEL: test_pmuldq:
4768 ; ZNVER1-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 # sched: [4:1.00]
4769 ; ZNVER1-NEXT: vpmuldq (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
4770 ; ZNVER1-NEXT: retq # sched: [1:0.50]
4771 %1 = call <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32> %a0, <8 x i32> %a1)
4772 %2 = bitcast <4 x i64> %1 to <8 x i32>
4773 %3 = load <8 x i32>, <8 x i32> *%a2, align 32
4774 %4 = call <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32> %2, <8 x i32> %3)
4777 declare <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32>, <8 x i32>) nounwind readnone
4779 define <16 x i16> @test_pmulhrsw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
4780 ; GENERIC-LABEL: test_pmulhrsw:
4782 ; GENERIC-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
4783 ; GENERIC-NEXT: vpmulhrsw (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
4784 ; GENERIC-NEXT: retq # sched: [1:1.00]
4786 ; HASWELL-LABEL: test_pmulhrsw:
4788 ; HASWELL-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
4789 ; HASWELL-NEXT: vpmulhrsw (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
4790 ; HASWELL-NEXT: retq # sched: [2:1.00]
4792 ; BROADWELL-LABEL: test_pmulhrsw:
4793 ; BROADWELL: # BB#0:
4794 ; BROADWELL-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
4795 ; BROADWELL-NEXT: vpmulhrsw (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
4796 ; BROADWELL-NEXT: retq # sched: [7:1.00]
4798 ; SKYLAKE-LABEL: test_pmulhrsw:
4800 ; SKYLAKE-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
4801 ; SKYLAKE-NEXT: vpmulhrsw (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
4802 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
4804 ; SKX-LABEL: test_pmulhrsw:
4806 ; SKX-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
4807 ; SKX-NEXT: vpmulhrsw (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
4808 ; SKX-NEXT: retq # sched: [7:1.00]
4810 ; ZNVER1-LABEL: test_pmulhrsw:
4812 ; ZNVER1-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm0 # sched: [4:1.00]
4813 ; ZNVER1-NEXT: vpmulhrsw (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
4814 ; ZNVER1-NEXT: retq # sched: [1:0.50]
4815 %1 = call <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16> %a0, <16 x i16> %a1)
4816 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
4817 %3 = call <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16> %1, <16 x i16> %2)
4820 declare <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16>, <16 x i16>) nounwind readnone
4822 define <16 x i16> @test_pmulhuw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
4823 ; GENERIC-LABEL: test_pmulhuw:
4825 ; GENERIC-NEXT: vpmulhuw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
4826 ; GENERIC-NEXT: vpmulhuw (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
4827 ; GENERIC-NEXT: retq # sched: [1:1.00]
4829 ; HASWELL-LABEL: test_pmulhuw:
4831 ; HASWELL-NEXT: vpmulhuw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
4832 ; HASWELL-NEXT: vpmulhuw (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
4833 ; HASWELL-NEXT: retq # sched: [2:1.00]
4835 ; BROADWELL-LABEL: test_pmulhuw:
4836 ; BROADWELL: # BB#0:
4837 ; BROADWELL-NEXT: vpmulhuw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
4838 ; BROADWELL-NEXT: vpmulhuw (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
4839 ; BROADWELL-NEXT: retq # sched: [7:1.00]
4841 ; SKYLAKE-LABEL: test_pmulhuw:
4843 ; SKYLAKE-NEXT: vpmulhuw %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
4844 ; SKYLAKE-NEXT: vpmulhuw (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
4845 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
4847 ; SKX-LABEL: test_pmulhuw:
4849 ; SKX-NEXT: vpmulhuw %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
4850 ; SKX-NEXT: vpmulhuw (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
4851 ; SKX-NEXT: retq # sched: [7:1.00]
4853 ; ZNVER1-LABEL: test_pmulhuw:
4855 ; ZNVER1-NEXT: vpmulhuw %ymm1, %ymm0, %ymm0 # sched: [4:1.00]
4856 ; ZNVER1-NEXT: vpmulhuw (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
4857 ; ZNVER1-NEXT: retq # sched: [1:0.50]
4858 %1 = call <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16> %a0, <16 x i16> %a1)
4859 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
4860 %3 = call <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16> %1, <16 x i16> %2)
4863 declare <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16>, <16 x i16>) nounwind readnone
4865 define <16 x i16> @test_pmulhw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
4866 ; GENERIC-LABEL: test_pmulhw:
4868 ; GENERIC-NEXT: vpmulhw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
4869 ; GENERIC-NEXT: vpmulhw (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
4870 ; GENERIC-NEXT: retq # sched: [1:1.00]
4872 ; HASWELL-LABEL: test_pmulhw:
4874 ; HASWELL-NEXT: vpmulhw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
4875 ; HASWELL-NEXT: vpmulhw (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
4876 ; HASWELL-NEXT: retq # sched: [2:1.00]
4878 ; BROADWELL-LABEL: test_pmulhw:
4879 ; BROADWELL: # BB#0:
4880 ; BROADWELL-NEXT: vpmulhw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
4881 ; BROADWELL-NEXT: vpmulhw (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
4882 ; BROADWELL-NEXT: retq # sched: [7:1.00]
4884 ; SKYLAKE-LABEL: test_pmulhw:
4886 ; SKYLAKE-NEXT: vpmulhw %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
4887 ; SKYLAKE-NEXT: vpmulhw (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
4888 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
4890 ; SKX-LABEL: test_pmulhw:
4892 ; SKX-NEXT: vpmulhw %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
4893 ; SKX-NEXT: vpmulhw (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
4894 ; SKX-NEXT: retq # sched: [7:1.00]
4896 ; ZNVER1-LABEL: test_pmulhw:
4898 ; ZNVER1-NEXT: vpmulhw %ymm1, %ymm0, %ymm0 # sched: [4:1.00]
4899 ; ZNVER1-NEXT: vpmulhw (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
4900 ; ZNVER1-NEXT: retq # sched: [1:0.50]
4901 %1 = call <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16> %a0, <16 x i16> %a1)
4902 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
4903 %3 = call <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16> %1, <16 x i16> %2)
4906 declare <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16>, <16 x i16>) nounwind readnone
4908 define <8 x i32> @test_pmulld(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
4909 ; GENERIC-LABEL: test_pmulld:
4911 ; GENERIC-NEXT: vpmulld %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
4912 ; GENERIC-NEXT: vpmulld (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
4913 ; GENERIC-NEXT: retq # sched: [1:1.00]
4915 ; HASWELL-LABEL: test_pmulld:
4917 ; HASWELL-NEXT: vpmulld %ymm1, %ymm0, %ymm0 # sched: [10:2.00]
4918 ; HASWELL-NEXT: vpmulld (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
4919 ; HASWELL-NEXT: retq # sched: [2:1.00]
4921 ; BROADWELL-LABEL: test_pmulld:
4922 ; BROADWELL: # BB#0:
4923 ; BROADWELL-NEXT: vpmulld %ymm1, %ymm0, %ymm0 # sched: [10:2.00]
4924 ; BROADWELL-NEXT: vpmulld (%rdi), %ymm0, %ymm0 # sched: [16:2.00]
4925 ; BROADWELL-NEXT: retq # sched: [7:1.00]
4927 ; SKYLAKE-LABEL: test_pmulld:
4929 ; SKYLAKE-NEXT: vpmulld %ymm1, %ymm0, %ymm0 # sched: [8:0.67]
4930 ; SKYLAKE-NEXT: vpmulld (%rdi), %ymm0, %ymm0 # sched: [15:0.67]
4931 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
4933 ; SKX-LABEL: test_pmulld:
4935 ; SKX-NEXT: vpmulld %ymm1, %ymm0, %ymm0 # sched: [8:0.67]
4936 ; SKX-NEXT: vpmulld (%rdi), %ymm0, %ymm0 # sched: [15:0.67]
4937 ; SKX-NEXT: retq # sched: [7:1.00]
4939 ; ZNVER1-LABEL: test_pmulld:
4941 ; ZNVER1-NEXT: vpmulld %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
4942 ; ZNVER1-NEXT: vpmulld (%rdi), %ymm0, %ymm0 # sched: [12:2.00]
4943 ; ZNVER1-NEXT: retq # sched: [1:0.50]
4944 %1 = mul <8 x i32> %a0, %a1
4945 %2 = load <8 x i32>, <8 x i32> *%a2, align 32
4946 %3 = mul <8 x i32> %1, %2
4950 define <16 x i16> @test_pmullw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
4951 ; GENERIC-LABEL: test_pmullw:
4953 ; GENERIC-NEXT: vpmullw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
4954 ; GENERIC-NEXT: vpmullw (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
4955 ; GENERIC-NEXT: retq # sched: [1:1.00]
4957 ; HASWELL-LABEL: test_pmullw:
4959 ; HASWELL-NEXT: vpmullw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
4960 ; HASWELL-NEXT: vpmullw (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
4961 ; HASWELL-NEXT: retq # sched: [2:1.00]
4963 ; BROADWELL-LABEL: test_pmullw:
4964 ; BROADWELL: # BB#0:
4965 ; BROADWELL-NEXT: vpmullw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
4966 ; BROADWELL-NEXT: vpmullw (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
4967 ; BROADWELL-NEXT: retq # sched: [7:1.00]
4969 ; SKYLAKE-LABEL: test_pmullw:
4971 ; SKYLAKE-NEXT: vpmullw %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
4972 ; SKYLAKE-NEXT: vpmullw (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
4973 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
4975 ; SKX-LABEL: test_pmullw:
4977 ; SKX-NEXT: vpmullw %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
4978 ; SKX-NEXT: vpmullw (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
4979 ; SKX-NEXT: retq # sched: [7:1.00]
4981 ; ZNVER1-LABEL: test_pmullw:
4983 ; ZNVER1-NEXT: vpmullw %ymm1, %ymm0, %ymm0 # sched: [4:1.00]
4984 ; ZNVER1-NEXT: vpmullw (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
4985 ; ZNVER1-NEXT: retq # sched: [1:0.50]
4986 %1 = mul <16 x i16> %a0, %a1
4987 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
4988 %3 = mul <16 x i16> %1, %2
4992 define <4 x i64> @test_pmuludq(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
4993 ; GENERIC-LABEL: test_pmuludq:
4995 ; GENERIC-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
4996 ; GENERIC-NEXT: vpmuludq (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
4997 ; GENERIC-NEXT: retq # sched: [1:1.00]
4999 ; HASWELL-LABEL: test_pmuludq:
5001 ; HASWELL-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
5002 ; HASWELL-NEXT: vpmuludq (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
5003 ; HASWELL-NEXT: retq # sched: [2:1.00]
5005 ; BROADWELL-LABEL: test_pmuludq:
5006 ; BROADWELL: # BB#0:
5007 ; BROADWELL-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
5008 ; BROADWELL-NEXT: vpmuludq (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
5009 ; BROADWELL-NEXT: retq # sched: [7:1.00]
5011 ; SKYLAKE-LABEL: test_pmuludq:
5013 ; SKYLAKE-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
5014 ; SKYLAKE-NEXT: vpmuludq (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
5015 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
5017 ; SKX-LABEL: test_pmuludq:
5019 ; SKX-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
5020 ; SKX-NEXT: vpmuludq (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
5021 ; SKX-NEXT: retq # sched: [7:1.00]
5023 ; ZNVER1-LABEL: test_pmuludq:
5025 ; ZNVER1-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 # sched: [4:1.00]
5026 ; ZNVER1-NEXT: vpmuludq (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
5027 ; ZNVER1-NEXT: retq # sched: [1:0.50]
5028 %1 = call <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32> %a0, <8 x i32> %a1)
5029 %2 = bitcast <4 x i64> %1 to <8 x i32>
5030 %3 = load <8 x i32>, <8 x i32> *%a2, align 32
5031 %4 = call <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32> %2, <8 x i32> %3)
5034 declare <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32>, <8 x i32>) nounwind readnone
5036 define <4 x i64> @test_por(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
5037 ; GENERIC-LABEL: test_por:
5039 ; GENERIC-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
5040 ; GENERIC-NEXT: vpor (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
5041 ; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
5042 ; GENERIC-NEXT: retq # sched: [1:1.00]
5044 ; HASWELL-LABEL: test_por:
5046 ; HASWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
5047 ; HASWELL-NEXT: vpor (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
5048 ; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5049 ; HASWELL-NEXT: retq # sched: [2:1.00]
5051 ; BROADWELL-LABEL: test_por:
5052 ; BROADWELL: # BB#0:
5053 ; BROADWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
5054 ; BROADWELL-NEXT: vpor (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
5055 ; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5056 ; BROADWELL-NEXT: retq # sched: [7:1.00]
5058 ; SKYLAKE-LABEL: test_por:
5060 ; SKYLAKE-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
5061 ; SKYLAKE-NEXT: vpor (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5062 ; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
5063 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
5065 ; SKX-LABEL: test_por:
5067 ; SKX-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
5068 ; SKX-NEXT: vpor (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5069 ; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
5070 ; SKX-NEXT: retq # sched: [7:1.00]
5072 ; ZNVER1-LABEL: test_por:
5074 ; ZNVER1-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
5075 ; ZNVER1-NEXT: vpor (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5076 ; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
5077 ; ZNVER1-NEXT: retq # sched: [1:0.50]
5078 %1 = or <4 x i64> %a0, %a1
5079 %2 = load <4 x i64>, <4 x i64> *%a2, align 32
5080 %3 = or <4 x i64> %1, %2
5081 %4 = add <4 x i64> %3, %a1
5085 define <4 x i64> @test_psadbw(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
5086 ; GENERIC-LABEL: test_psadbw:
5088 ; GENERIC-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
5089 ; GENERIC-NEXT: vpsadbw (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
5090 ; GENERIC-NEXT: retq # sched: [1:1.00]
5092 ; HASWELL-LABEL: test_psadbw:
5094 ; HASWELL-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
5095 ; HASWELL-NEXT: vpsadbw (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
5096 ; HASWELL-NEXT: retq # sched: [2:1.00]
5098 ; BROADWELL-LABEL: test_psadbw:
5099 ; BROADWELL: # BB#0:
5100 ; BROADWELL-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
5101 ; BROADWELL-NEXT: vpsadbw (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
5102 ; BROADWELL-NEXT: retq # sched: [7:1.00]
5104 ; SKYLAKE-LABEL: test_psadbw:
5106 ; SKYLAKE-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
5107 ; SKYLAKE-NEXT: vpsadbw (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
5108 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
5110 ; SKX-LABEL: test_psadbw:
5112 ; SKX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
5113 ; SKX-NEXT: vpsadbw (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
5114 ; SKX-NEXT: retq # sched: [7:1.00]
5116 ; ZNVER1-LABEL: test_psadbw:
5118 ; ZNVER1-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 # sched: [4:1.00]
5119 ; ZNVER1-NEXT: vpsadbw (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
5120 ; ZNVER1-NEXT: retq # sched: [1:0.50]
5121 %1 = call <4 x i64> @llvm.x86.avx2.psad.bw(<32 x i8> %a0, <32 x i8> %a1)
5122 %2 = bitcast <4 x i64> %1 to <32 x i8>
5123 %3 = load <32 x i8>, <32 x i8> *%a2, align 32
5124 %4 = call <4 x i64> @llvm.x86.avx2.psad.bw(<32 x i8> %2, <32 x i8> %3)
5127 declare <4 x i64> @llvm.x86.avx2.psad.bw(<32 x i8>, <32 x i8>) nounwind readnone
5129 define <32 x i8> @test_pshufb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
5130 ; GENERIC-LABEL: test_pshufb:
5132 ; GENERIC-NEXT: vpshufb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
5133 ; GENERIC-NEXT: vpshufb (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
5134 ; GENERIC-NEXT: retq # sched: [1:1.00]
5136 ; HASWELL-LABEL: test_pshufb:
5138 ; HASWELL-NEXT: vpshufb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
5139 ; HASWELL-NEXT: vpshufb (%rdi), %ymm0, %ymm0 # sched: [1:1.00]
5140 ; HASWELL-NEXT: retq # sched: [2:1.00]
5142 ; BROADWELL-LABEL: test_pshufb:
5143 ; BROADWELL: # BB#0:
5144 ; BROADWELL-NEXT: vpshufb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
5145 ; BROADWELL-NEXT: vpshufb (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
5146 ; BROADWELL-NEXT: retq # sched: [7:1.00]
5148 ; SKYLAKE-LABEL: test_pshufb:
5150 ; SKYLAKE-NEXT: vpshufb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
5151 ; SKYLAKE-NEXT: vpshufb (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
5152 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
5154 ; SKX-LABEL: test_pshufb:
5156 ; SKX-NEXT: vpshufb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
5157 ; SKX-NEXT: vpshufb (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
5158 ; SKX-NEXT: retq # sched: [7:1.00]
5160 ; ZNVER1-LABEL: test_pshufb:
5162 ; ZNVER1-NEXT: vpshufb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
5163 ; ZNVER1-NEXT: vpshufb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5164 ; ZNVER1-NEXT: retq # sched: [1:0.50]
5165 %1 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> %a1)
5166 %2 = load <32 x i8>, <32 x i8> *%a2, align 32
5167 %3 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %1, <32 x i8> %2)
5170 declare <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8>, <32 x i8>) nounwind readnone
5172 define <8 x i32> @test_pshufd(<8 x i32> %a0, <8 x i32> *%a1) {
5173 ; GENERIC-LABEL: test_pshufd:
5175 ; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00]
5176 ; GENERIC-NEXT: vpshufd {{.*#+}} ymm1 = mem[1,0,3,2,5,4,7,6] sched: [5:1.00]
5177 ; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
5178 ; GENERIC-NEXT: retq # sched: [1:1.00]
5180 ; HASWELL-LABEL: test_pshufd:
5182 ; HASWELL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00]
5183 ; HASWELL-NEXT: vpshufd {{.*#+}} ymm1 = mem[1,0,3,2,5,4,7,6] sched: [1:1.00]
5184 ; HASWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5185 ; HASWELL-NEXT: retq # sched: [2:1.00]
5187 ; BROADWELL-LABEL: test_pshufd:
5188 ; BROADWELL: # BB#0:
5189 ; BROADWELL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00]
5190 ; BROADWELL-NEXT: vpshufd {{.*#+}} ymm1 = mem[1,0,3,2,5,4,7,6] sched: [7:1.00]
5191 ; BROADWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5192 ; BROADWELL-NEXT: retq # sched: [7:1.00]
5194 ; SKYLAKE-LABEL: test_pshufd:
5196 ; SKYLAKE-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00]
5197 ; SKYLAKE-NEXT: vpshufd {{.*#+}} ymm1 = mem[1,0,3,2,5,4,7,6] sched: [8:1.00]
5198 ; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
5199 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
5201 ; SKX-LABEL: test_pshufd:
5203 ; SKX-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00]
5204 ; SKX-NEXT: vpshufd {{.*#+}} ymm1 = mem[1,0,3,2,5,4,7,6] sched: [8:1.00]
5205 ; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
5206 ; SKX-NEXT: retq # sched: [7:1.00]
5208 ; ZNVER1-LABEL: test_pshufd:
5210 ; ZNVER1-NEXT: vpshufd {{.*#+}} ymm1 = mem[1,0,3,2,5,4,7,6] sched: [8:0.50]
5211 ; ZNVER1-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:0.25]
5212 ; ZNVER1-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
5213 ; ZNVER1-NEXT: retq # sched: [1:0.50]
5214 %1 = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
5215 %2 = load <8 x i32>, <8 x i32> *%a1, align 32
5216 %3 = shufflevector <8 x i32> %2, <8 x i32> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
5217 %4 = add <8 x i32> %1, %3
5221 define <16 x i16> @test_pshufhw(<16 x i16> %a0, <16 x i16> *%a1) {
5222 ; GENERIC-LABEL: test_pshufhw:
5224 ; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,6,5,4,8,9,10,11,15,14,13,12] sched: [1:1.00]
5225 ; GENERIC-NEXT: vpshufhw {{.*#+}} ymm1 = mem[0,1,2,3,5,4,7,6,8,9,10,11,13,12,15,14] sched: [5:1.00]
5226 ; GENERIC-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
5227 ; GENERIC-NEXT: retq # sched: [1:1.00]
5229 ; HASWELL-LABEL: test_pshufhw:
5231 ; HASWELL-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,6,5,4,8,9,10,11,15,14,13,12] sched: [1:1.00]
5232 ; HASWELL-NEXT: vpshufhw {{.*#+}} ymm1 = mem[0,1,2,3,5,4,7,6,8,9,10,11,13,12,15,14] sched: [1:1.00]
5233 ; HASWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
5234 ; HASWELL-NEXT: retq # sched: [2:1.00]
5236 ; BROADWELL-LABEL: test_pshufhw:
5237 ; BROADWELL: # BB#0:
5238 ; BROADWELL-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,6,5,4,8,9,10,11,15,14,13,12] sched: [1:1.00]
5239 ; BROADWELL-NEXT: vpshufhw {{.*#+}} ymm1 = mem[0,1,2,3,5,4,7,6,8,9,10,11,13,12,15,14] sched: [7:1.00]
5240 ; BROADWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
5241 ; BROADWELL-NEXT: retq # sched: [7:1.00]
5243 ; SKYLAKE-LABEL: test_pshufhw:
5245 ; SKYLAKE-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,6,5,4,8,9,10,11,15,14,13,12] sched: [1:1.00]
5246 ; SKYLAKE-NEXT: vpshufhw {{.*#+}} ymm1 = mem[0,1,2,3,5,4,7,6,8,9,10,11,13,12,15,14] sched: [8:1.00]
5247 ; SKYLAKE-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
5248 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
5250 ; SKX-LABEL: test_pshufhw:
5252 ; SKX-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,6,5,4,8,9,10,11,15,14,13,12] sched: [1:1.00]
5253 ; SKX-NEXT: vpshufhw {{.*#+}} ymm1 = mem[0,1,2,3,5,4,7,6,8,9,10,11,13,12,15,14] sched: [8:1.00]
5254 ; SKX-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
5255 ; SKX-NEXT: retq # sched: [7:1.00]
5257 ; ZNVER1-LABEL: test_pshufhw:
5259 ; ZNVER1-NEXT: vpshufhw {{.*#+}} ymm1 = mem[0,1,2,3,5,4,7,6,8,9,10,11,13,12,15,14] sched: [8:0.50]
5260 ; ZNVER1-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,6,5,4,8,9,10,11,15,14,13,12] sched: [1:0.25]
5261 ; ZNVER1-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
5262 ; ZNVER1-NEXT: retq # sched: [1:0.50]
5263 %1 = shufflevector <16 x i16> %a0, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 6, i32 5, i32 4, i32 8, i32 9, i32 10, i32 11, i32 15, i32 14, i32 13, i32 12>
5264 %2 = load <16 x i16>, <16 x i16> *%a1, align 32
5265 %3 = shufflevector <16 x i16> %2, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 7, i32 6, i32 8, i32 9, i32 10, i32 11, i32 13, i32 12, i32 15, i32 14>
5266 %4 = or <16 x i16> %1, %3
5270 define <16 x i16> @test_pshuflw(<16 x i16> %a0, <16 x i16> *%a1) {
5271 ; GENERIC-LABEL: test_pshuflw:
5273 ; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15] sched: [1:1.00]
5274 ; GENERIC-NEXT: vpshuflw {{.*#+}} ymm1 = mem[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15] sched: [5:1.00]
5275 ; GENERIC-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
5276 ; GENERIC-NEXT: retq # sched: [1:1.00]
5278 ; HASWELL-LABEL: test_pshuflw:
5280 ; HASWELL-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15] sched: [1:1.00]
5281 ; HASWELL-NEXT: vpshuflw {{.*#+}} ymm1 = mem[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15] sched: [1:1.00]
5282 ; HASWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
5283 ; HASWELL-NEXT: retq # sched: [2:1.00]
5285 ; BROADWELL-LABEL: test_pshuflw:
5286 ; BROADWELL: # BB#0:
5287 ; BROADWELL-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15] sched: [1:1.00]
5288 ; BROADWELL-NEXT: vpshuflw {{.*#+}} ymm1 = mem[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15] sched: [7:1.00]
5289 ; BROADWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
5290 ; BROADWELL-NEXT: retq # sched: [7:1.00]
5292 ; SKYLAKE-LABEL: test_pshuflw:
5294 ; SKYLAKE-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15] sched: [1:1.00]
5295 ; SKYLAKE-NEXT: vpshuflw {{.*#+}} ymm1 = mem[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15] sched: [8:1.00]
5296 ; SKYLAKE-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
5297 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
5299 ; SKX-LABEL: test_pshuflw:
5301 ; SKX-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15] sched: [1:1.00]
5302 ; SKX-NEXT: vpshuflw {{.*#+}} ymm1 = mem[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15] sched: [8:1.00]
5303 ; SKX-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
5304 ; SKX-NEXT: retq # sched: [7:1.00]
5306 ; ZNVER1-LABEL: test_pshuflw:
5308 ; ZNVER1-NEXT: vpshuflw {{.*#+}} ymm1 = mem[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15] sched: [8:0.50]
5309 ; ZNVER1-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15] sched: [1:0.25]
5310 ; ZNVER1-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
5311 ; ZNVER1-NEXT: retq # sched: [1:0.50]
5312 %1 = shufflevector <16 x i16> %a0, <16 x i16> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7, i32 11, i32 10, i32 9, i32 8, i32 12, i32 13, i32 14, i32 15>
5313 %2 = load <16 x i16>, <16 x i16> *%a1, align 32
5314 %3 = shufflevector <16 x i16> %2, <16 x i16> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7, i32 9, i32 8, i32 11, i32 10, i32 12, i32 13, i32 14, i32 15>
5315 %4 = or <16 x i16> %1, %3
5319 define <32 x i8> @test_psignb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
5320 ; GENERIC-LABEL: test_psignb:
5322 ; GENERIC-NEXT: vpsignb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
5323 ; GENERIC-NEXT: vpsignb (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
5324 ; GENERIC-NEXT: retq # sched: [1:1.00]
5326 ; HASWELL-LABEL: test_psignb:
5328 ; HASWELL-NEXT: vpsignb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5329 ; HASWELL-NEXT: vpsignb (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
5330 ; HASWELL-NEXT: retq # sched: [2:1.00]
5332 ; BROADWELL-LABEL: test_psignb:
5333 ; BROADWELL: # BB#0:
5334 ; BROADWELL-NEXT: vpsignb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5335 ; BROADWELL-NEXT: vpsignb (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
5336 ; BROADWELL-NEXT: retq # sched: [7:1.00]
5338 ; SKYLAKE-LABEL: test_psignb:
5340 ; SKYLAKE-NEXT: vpsignb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5341 ; SKYLAKE-NEXT: vpsignb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5342 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
5344 ; SKX-LABEL: test_psignb:
5346 ; SKX-NEXT: vpsignb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5347 ; SKX-NEXT: vpsignb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5348 ; SKX-NEXT: retq # sched: [7:1.00]
5350 ; ZNVER1-LABEL: test_psignb:
5352 ; ZNVER1-NEXT: vpsignb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
5353 ; ZNVER1-NEXT: vpsignb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5354 ; ZNVER1-NEXT: retq # sched: [1:0.50]
5355 %1 = call <32 x i8> @llvm.x86.avx2.psign.b(<32 x i8> %a0, <32 x i8> %a1)
5356 %2 = load <32 x i8>, <32 x i8> *%a2, align 32
5357 %3 = call <32 x i8> @llvm.x86.avx2.psign.b(<32 x i8> %1, <32 x i8> %2)
5360 declare <32 x i8> @llvm.x86.avx2.psign.b(<32 x i8>, <32 x i8>) nounwind readnone
5362 define <8 x i32> @test_psignd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
5363 ; GENERIC-LABEL: test_psignd:
5365 ; GENERIC-NEXT: vpsignd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
5366 ; GENERIC-NEXT: vpsignd (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
5367 ; GENERIC-NEXT: retq # sched: [1:1.00]
5369 ; HASWELL-LABEL: test_psignd:
5371 ; HASWELL-NEXT: vpsignd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5372 ; HASWELL-NEXT: vpsignd (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
5373 ; HASWELL-NEXT: retq # sched: [2:1.00]
5375 ; BROADWELL-LABEL: test_psignd:
5376 ; BROADWELL: # BB#0:
5377 ; BROADWELL-NEXT: vpsignd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5378 ; BROADWELL-NEXT: vpsignd (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
5379 ; BROADWELL-NEXT: retq # sched: [7:1.00]
5381 ; SKYLAKE-LABEL: test_psignd:
5383 ; SKYLAKE-NEXT: vpsignd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5384 ; SKYLAKE-NEXT: vpsignd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5385 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
5387 ; SKX-LABEL: test_psignd:
5389 ; SKX-NEXT: vpsignd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5390 ; SKX-NEXT: vpsignd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5391 ; SKX-NEXT: retq # sched: [7:1.00]
5393 ; ZNVER1-LABEL: test_psignd:
5395 ; ZNVER1-NEXT: vpsignd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
5396 ; ZNVER1-NEXT: vpsignd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5397 ; ZNVER1-NEXT: retq # sched: [1:0.50]
5398 %1 = call <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32> %a0, <8 x i32> %a1)
5399 %2 = load <8 x i32>, <8 x i32> *%a2, align 32
5400 %3 = call <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32> %1, <8 x i32> %2)
5403 declare <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32>, <8 x i32>) nounwind readnone
5405 define <16 x i16> @test_psignw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
5406 ; GENERIC-LABEL: test_psignw:
5408 ; GENERIC-NEXT: vpsignw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
5409 ; GENERIC-NEXT: vpsignw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
5410 ; GENERIC-NEXT: retq # sched: [1:1.00]
5412 ; HASWELL-LABEL: test_psignw:
5414 ; HASWELL-NEXT: vpsignw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5415 ; HASWELL-NEXT: vpsignw (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
5416 ; HASWELL-NEXT: retq # sched: [2:1.00]
5418 ; BROADWELL-LABEL: test_psignw:
5419 ; BROADWELL: # BB#0:
5420 ; BROADWELL-NEXT: vpsignw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5421 ; BROADWELL-NEXT: vpsignw (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
5422 ; BROADWELL-NEXT: retq # sched: [7:1.00]
5424 ; SKYLAKE-LABEL: test_psignw:
5426 ; SKYLAKE-NEXT: vpsignw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5427 ; SKYLAKE-NEXT: vpsignw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5428 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
5430 ; SKX-LABEL: test_psignw:
5432 ; SKX-NEXT: vpsignw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5433 ; SKX-NEXT: vpsignw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5434 ; SKX-NEXT: retq # sched: [7:1.00]
5436 ; ZNVER1-LABEL: test_psignw:
5438 ; ZNVER1-NEXT: vpsignw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
5439 ; ZNVER1-NEXT: vpsignw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5440 ; ZNVER1-NEXT: retq # sched: [1:0.50]
5441 %1 = call <16 x i16> @llvm.x86.avx2.psign.w(<16 x i16> %a0, <16 x i16> %a1)
5442 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
5443 %3 = call <16 x i16> @llvm.x86.avx2.psign.w(<16 x i16> %1, <16 x i16> %2)
5446 declare <16 x i16> @llvm.x86.avx2.psign.w(<16 x i16>, <16 x i16>) nounwind readnone
5448 define <8 x i32> @test_pslld(<8 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
5449 ; GENERIC-LABEL: test_pslld:
5451 ; GENERIC-NEXT: vpslld %xmm1, %ymm0, %ymm0 # sched: [1:1.00]
5452 ; GENERIC-NEXT: vpslld (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
5453 ; GENERIC-NEXT: vpslld $2, %ymm0, %ymm0 # sched: [1:1.00]
5454 ; GENERIC-NEXT: retq # sched: [1:1.00]
5456 ; HASWELL-LABEL: test_pslld:
5458 ; HASWELL-NEXT: vpslld %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
5459 ; HASWELL-NEXT: vpslld (%rdi), %ymm0, %ymm0 # sched: [1:1.00]
5460 ; HASWELL-NEXT: vpslld $2, %ymm0, %ymm0 # sched: [1:1.00]
5461 ; HASWELL-NEXT: retq # sched: [2:1.00]
5463 ; BROADWELL-LABEL: test_pslld:
5464 ; BROADWELL: # BB#0:
5465 ; BROADWELL-NEXT: vpslld %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
5466 ; BROADWELL-NEXT: vpslld (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
5467 ; BROADWELL-NEXT: vpslld $2, %ymm0, %ymm0 # sched: [1:1.00]
5468 ; BROADWELL-NEXT: retq # sched: [7:1.00]
5470 ; SKYLAKE-LABEL: test_pslld:
5472 ; SKYLAKE-NEXT: vpslld %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
5473 ; SKYLAKE-NEXT: vpslld (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5474 ; SKYLAKE-NEXT: vpslld $2, %ymm0, %ymm0 # sched: [1:0.50]
5475 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
5477 ; SKX-LABEL: test_pslld:
5479 ; SKX-NEXT: vpslld %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
5480 ; SKX-NEXT: vpslld (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5481 ; SKX-NEXT: vpslld $2, %ymm0, %ymm0 # sched: [1:0.50]
5482 ; SKX-NEXT: retq # sched: [7:1.00]
5484 ; ZNVER1-LABEL: test_pslld:
5486 ; ZNVER1-NEXT: vpslld %xmm1, %ymm0, %ymm0 # sched: [2:1.00]
5487 ; ZNVER1-NEXT: vpslld (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
5488 ; ZNVER1-NEXT: vpslld $2, %ymm0, %ymm0 # sched: [1:0.25]
5489 ; ZNVER1-NEXT: retq # sched: [1:0.50]
5490 %1 = call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %a0, <4 x i32> %a1)
5491 %2 = load <4 x i32>, <4 x i32> *%a2, align 16
5492 %3 = call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %1, <4 x i32> %2)
5493 %4 = shl <8 x i32> %3, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
5496 declare <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32>, <4 x i32>) nounwind readnone
5498 define <32 x i8> @test_pslldq(<32 x i8> %a0) {
5499 ; GENERIC-LABEL: test_pslldq:
5501 ; GENERIC-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12],zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28] sched: [1:1.00]
5502 ; GENERIC-NEXT: retq # sched: [1:1.00]
5504 ; HASWELL-LABEL: test_pslldq:
5506 ; HASWELL-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12],zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28] sched: [1:1.00]
5507 ; HASWELL-NEXT: retq # sched: [2:1.00]
5509 ; BROADWELL-LABEL: test_pslldq:
5510 ; BROADWELL: # BB#0:
5511 ; BROADWELL-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12],zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28] sched: [1:1.00]
5512 ; BROADWELL-NEXT: retq # sched: [7:1.00]
5514 ; SKYLAKE-LABEL: test_pslldq:
5516 ; SKYLAKE-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12],zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28] sched: [1:1.00]
5517 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
5519 ; SKX-LABEL: test_pslldq:
5521 ; SKX-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12],zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28] sched: [1:1.00]
5522 ; SKX-NEXT: retq # sched: [7:1.00]
5524 ; ZNVER1-LABEL: test_pslldq:
5526 ; ZNVER1-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12],zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28] sched: [2:1.00]
5527 ; ZNVER1-NEXT: retq # sched: [1:0.50]
5528 %1 = shufflevector <32 x i8> zeroinitializer, <32 x i8> %a0, <32 x i32> <i32 13, i32 14, i32 15, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 29, i32 30, i32 31, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60>
5532 define <4 x i64> @test_psllq(<4 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
5533 ; GENERIC-LABEL: test_psllq:
5535 ; GENERIC-NEXT: vpsllq %xmm1, %ymm0, %ymm0 # sched: [1:1.00]
5536 ; GENERIC-NEXT: vpsllq (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
5537 ; GENERIC-NEXT: vpsllq $2, %ymm0, %ymm0 # sched: [1:1.00]
5538 ; GENERIC-NEXT: retq # sched: [1:1.00]
5540 ; HASWELL-LABEL: test_psllq:
5542 ; HASWELL-NEXT: vpsllq %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
5543 ; HASWELL-NEXT: vpsllq (%rdi), %ymm0, %ymm0 # sched: [1:1.00]
5544 ; HASWELL-NEXT: vpsllq $2, %ymm0, %ymm0 # sched: [1:1.00]
5545 ; HASWELL-NEXT: retq # sched: [2:1.00]
5547 ; BROADWELL-LABEL: test_psllq:
5548 ; BROADWELL: # BB#0:
5549 ; BROADWELL-NEXT: vpsllq %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
5550 ; BROADWELL-NEXT: vpsllq (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
5551 ; BROADWELL-NEXT: vpsllq $2, %ymm0, %ymm0 # sched: [1:1.00]
5552 ; BROADWELL-NEXT: retq # sched: [7:1.00]
5554 ; SKYLAKE-LABEL: test_psllq:
5556 ; SKYLAKE-NEXT: vpsllq %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
5557 ; SKYLAKE-NEXT: vpsllq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5558 ; SKYLAKE-NEXT: vpsllq $2, %ymm0, %ymm0 # sched: [1:0.50]
5559 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
5561 ; SKX-LABEL: test_psllq:
5563 ; SKX-NEXT: vpsllq %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
5564 ; SKX-NEXT: vpsllq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5565 ; SKX-NEXT: vpsllq $2, %ymm0, %ymm0 # sched: [1:0.50]
5566 ; SKX-NEXT: retq # sched: [7:1.00]
5568 ; ZNVER1-LABEL: test_psllq:
5570 ; ZNVER1-NEXT: vpsllq %xmm1, %ymm0, %ymm0 # sched: [2:1.00]
5571 ; ZNVER1-NEXT: vpsllq (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
5572 ; ZNVER1-NEXT: vpsllq $2, %ymm0, %ymm0 # sched: [1:0.25]
5573 ; ZNVER1-NEXT: retq # sched: [1:0.50]
5574 %1 = call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %a0, <2 x i64> %a1)
5575 %2 = load <2 x i64>, <2 x i64> *%a2, align 16
5576 %3 = call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %1, <2 x i64> %2)
5577 %4 = shl <4 x i64> %3, <i64 2, i64 2, i64 2, i64 2>
5580 declare <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64>, <2 x i64>) nounwind readnone
5582 define <4 x i32> @test_psllvd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
5583 ; GENERIC-LABEL: test_psllvd:
5585 ; GENERIC-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
5586 ; GENERIC-NEXT: vpsllvd (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
5587 ; GENERIC-NEXT: retq # sched: [1:1.00]
5589 ; HASWELL-LABEL: test_psllvd:
5591 ; HASWELL-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
5592 ; HASWELL-NEXT: vpsllvd (%rdi), %xmm0, %xmm0 # sched: [3:2.00]
5593 ; HASWELL-NEXT: retq # sched: [2:1.00]
5595 ; BROADWELL-LABEL: test_psllvd:
5596 ; BROADWELL: # BB#0:
5597 ; BROADWELL-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
5598 ; BROADWELL-NEXT: vpsllvd (%rdi), %xmm0, %xmm0 # sched: [8:2.00]
5599 ; BROADWELL-NEXT: retq # sched: [7:1.00]
5601 ; SKYLAKE-LABEL: test_psllvd:
5603 ; SKYLAKE-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
5604 ; SKYLAKE-NEXT: vpsllvd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
5605 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
5607 ; SKX-LABEL: test_psllvd:
5609 ; SKX-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
5610 ; SKX-NEXT: vpsllvd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
5611 ; SKX-NEXT: retq # sched: [7:1.00]
5613 ; ZNVER1-LABEL: test_psllvd:
5615 ; ZNVER1-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
5616 ; ZNVER1-NEXT: vpsllvd (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
5617 ; ZNVER1-NEXT: retq # sched: [1:0.50]
5618 %1 = call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %a0, <4 x i32> %a1)
5619 %2 = load <4 x i32>, <4 x i32> *%a2, align 16
5620 %3 = call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %1, <4 x i32> %2)
5623 declare <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32>, <4 x i32>) nounwind readnone
5625 define <8 x i32> @test_psllvd_ymm(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
5626 ; GENERIC-LABEL: test_psllvd_ymm:
5628 ; GENERIC-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
5629 ; GENERIC-NEXT: vpsllvd (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
5630 ; GENERIC-NEXT: retq # sched: [1:1.00]
5632 ; HASWELL-LABEL: test_psllvd_ymm:
5634 ; HASWELL-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
5635 ; HASWELL-NEXT: vpsllvd (%rdi), %ymm0, %ymm0 # sched: [3:2.00]
5636 ; HASWELL-NEXT: retq # sched: [2:1.00]
5638 ; BROADWELL-LABEL: test_psllvd_ymm:
5639 ; BROADWELL: # BB#0:
5640 ; BROADWELL-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
5641 ; BROADWELL-NEXT: vpsllvd (%rdi), %ymm0, %ymm0 # sched: [9:2.00]
5642 ; BROADWELL-NEXT: retq # sched: [7:1.00]
5644 ; SKYLAKE-LABEL: test_psllvd_ymm:
5646 ; SKYLAKE-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5647 ; SKYLAKE-NEXT: vpsllvd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5648 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
5650 ; SKX-LABEL: test_psllvd_ymm:
5652 ; SKX-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5653 ; SKX-NEXT: vpsllvd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5654 ; SKX-NEXT: retq # sched: [7:1.00]
5656 ; ZNVER1-LABEL: test_psllvd_ymm:
5658 ; ZNVER1-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5659 ; ZNVER1-NEXT: vpsllvd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5660 ; ZNVER1-NEXT: retq # sched: [1:0.50]
5661 %1 = call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %a0, <8 x i32> %a1)
5662 %2 = load <8 x i32>, <8 x i32> *%a2, align 32
5663 %3 = call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %1, <8 x i32> %2)
5666 declare <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32>, <8 x i32>) nounwind readnone
5668 define <2 x i64> @test_psllvq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
5669 ; GENERIC-LABEL: test_psllvq:
5671 ; GENERIC-NEXT: vpsllvq %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
5672 ; GENERIC-NEXT: vpsllvq (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
5673 ; GENERIC-NEXT: retq # sched: [1:1.00]
5675 ; HASWELL-LABEL: test_psllvq:
5677 ; HASWELL-NEXT: vpsllvq %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
5678 ; HASWELL-NEXT: vpsllvq (%rdi), %xmm0, %xmm0 # sched: [1:1.00]
5679 ; HASWELL-NEXT: retq # sched: [2:1.00]
5681 ; BROADWELL-LABEL: test_psllvq:
5682 ; BROADWELL: # BB#0:
5683 ; BROADWELL-NEXT: vpsllvq %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
5684 ; BROADWELL-NEXT: vpsllvq (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
5685 ; BROADWELL-NEXT: retq # sched: [7:1.00]
5687 ; SKYLAKE-LABEL: test_psllvq:
5689 ; SKYLAKE-NEXT: vpsllvq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
5690 ; SKYLAKE-NEXT: vpsllvq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
5691 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
5693 ; SKX-LABEL: test_psllvq:
5695 ; SKX-NEXT: vpsllvq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
5696 ; SKX-NEXT: vpsllvq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
5697 ; SKX-NEXT: retq # sched: [7:1.00]
5699 ; ZNVER1-LABEL: test_psllvq:
5701 ; ZNVER1-NEXT: vpsllvq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
5702 ; ZNVER1-NEXT: vpsllvq (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
5703 ; ZNVER1-NEXT: retq # sched: [1:0.50]
5704 %1 = call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %a0, <2 x i64> %a1)
5705 %2 = load <2 x i64>, <2 x i64> *%a2, align 16
5706 %3 = call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %1, <2 x i64> %2)
5709 declare <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64>, <2 x i64>) nounwind readnone
5711 define <4 x i64> @test_psllvq_ymm(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
5712 ; GENERIC-LABEL: test_psllvq_ymm:
5714 ; GENERIC-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
5715 ; GENERIC-NEXT: vpsllvq (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
5716 ; GENERIC-NEXT: retq # sched: [1:1.00]
5718 ; HASWELL-LABEL: test_psllvq_ymm:
5720 ; HASWELL-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
5721 ; HASWELL-NEXT: vpsllvq (%rdi), %ymm0, %ymm0 # sched: [1:1.00]
5722 ; HASWELL-NEXT: retq # sched: [2:1.00]
5724 ; BROADWELL-LABEL: test_psllvq_ymm:
5725 ; BROADWELL: # BB#0:
5726 ; BROADWELL-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
5727 ; BROADWELL-NEXT: vpsllvq (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
5728 ; BROADWELL-NEXT: retq # sched: [7:1.00]
5730 ; SKYLAKE-LABEL: test_psllvq_ymm:
5732 ; SKYLAKE-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5733 ; SKYLAKE-NEXT: vpsllvq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5734 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
5736 ; SKX-LABEL: test_psllvq_ymm:
5738 ; SKX-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5739 ; SKX-NEXT: vpsllvq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5740 ; SKX-NEXT: retq # sched: [7:1.00]
5742 ; ZNVER1-LABEL: test_psllvq_ymm:
5744 ; ZNVER1-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5745 ; ZNVER1-NEXT: vpsllvq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5746 ; ZNVER1-NEXT: retq # sched: [1:0.50]
5747 %1 = call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %a0, <4 x i64> %a1)
5748 %2 = load <4 x i64>, <4 x i64> *%a2, align 32
5749 %3 = call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %1, <4 x i64> %2)
5752 declare <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64>, <4 x i64>) nounwind readnone
5754 define <16 x i16> @test_psllw(<16 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
5755 ; GENERIC-LABEL: test_psllw:
5757 ; GENERIC-NEXT: vpsllw %xmm1, %ymm0, %ymm0 # sched: [1:1.00]
5758 ; GENERIC-NEXT: vpsllw (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
5759 ; GENERIC-NEXT: vpsllw $2, %ymm0, %ymm0 # sched: [1:1.00]
5760 ; GENERIC-NEXT: retq # sched: [1:1.00]
5762 ; HASWELL-LABEL: test_psllw:
5764 ; HASWELL-NEXT: vpsllw %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
5765 ; HASWELL-NEXT: vpsllw (%rdi), %ymm0, %ymm0 # sched: [1:1.00]
5766 ; HASWELL-NEXT: vpsllw $2, %ymm0, %ymm0 # sched: [1:1.00]
5767 ; HASWELL-NEXT: retq # sched: [2:1.00]
5769 ; BROADWELL-LABEL: test_psllw:
5770 ; BROADWELL: # BB#0:
5771 ; BROADWELL-NEXT: vpsllw %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
5772 ; BROADWELL-NEXT: vpsllw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
5773 ; BROADWELL-NEXT: vpsllw $2, %ymm0, %ymm0 # sched: [1:1.00]
5774 ; BROADWELL-NEXT: retq # sched: [7:1.00]
5776 ; SKYLAKE-LABEL: test_psllw:
5778 ; SKYLAKE-NEXT: vpsllw %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
5779 ; SKYLAKE-NEXT: vpsllw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5780 ; SKYLAKE-NEXT: vpsllw $2, %ymm0, %ymm0 # sched: [1:0.50]
5781 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
5783 ; SKX-LABEL: test_psllw:
5785 ; SKX-NEXT: vpsllw %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
5786 ; SKX-NEXT: vpsllw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5787 ; SKX-NEXT: vpsllw $2, %ymm0, %ymm0 # sched: [1:0.50]
5788 ; SKX-NEXT: retq # sched: [7:1.00]
5790 ; ZNVER1-LABEL: test_psllw:
5792 ; ZNVER1-NEXT: vpsllw %xmm1, %ymm0, %ymm0 # sched: [2:1.00]
5793 ; ZNVER1-NEXT: vpsllw (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
5794 ; ZNVER1-NEXT: vpsllw $2, %ymm0, %ymm0 # sched: [1:0.25]
5795 ; ZNVER1-NEXT: retq # sched: [1:0.50]
5796 %1 = call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %a0, <8 x i16> %a1)
5797 %2 = load <8 x i16>, <8 x i16> *%a2, align 16
5798 %3 = call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %1, <8 x i16> %2)
5799 %4 = shl <16 x i16> %3, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
5802 declare <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16>, <8 x i16>) nounwind readnone
5804 define <8 x i32> @test_psrad(<8 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
5805 ; GENERIC-LABEL: test_psrad:
5807 ; GENERIC-NEXT: vpsrad %xmm1, %ymm0, %ymm0 # sched: [1:1.00]
5808 ; GENERIC-NEXT: vpsrad (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
5809 ; GENERIC-NEXT: vpsrad $2, %ymm0, %ymm0 # sched: [1:1.00]
5810 ; GENERIC-NEXT: retq # sched: [1:1.00]
5812 ; HASWELL-LABEL: test_psrad:
5814 ; HASWELL-NEXT: vpsrad %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
5815 ; HASWELL-NEXT: vpsrad (%rdi), %ymm0, %ymm0 # sched: [1:1.00]
5816 ; HASWELL-NEXT: vpsrad $2, %ymm0, %ymm0 # sched: [1:1.00]
5817 ; HASWELL-NEXT: retq # sched: [2:1.00]
5819 ; BROADWELL-LABEL: test_psrad:
5820 ; BROADWELL: # BB#0:
5821 ; BROADWELL-NEXT: vpsrad %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
5822 ; BROADWELL-NEXT: vpsrad (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
5823 ; BROADWELL-NEXT: vpsrad $2, %ymm0, %ymm0 # sched: [1:1.00]
5824 ; BROADWELL-NEXT: retq # sched: [7:1.00]
5826 ; SKYLAKE-LABEL: test_psrad:
5828 ; SKYLAKE-NEXT: vpsrad %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
5829 ; SKYLAKE-NEXT: vpsrad (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5830 ; SKYLAKE-NEXT: vpsrad $2, %ymm0, %ymm0 # sched: [1:0.50]
5831 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
5833 ; SKX-LABEL: test_psrad:
5835 ; SKX-NEXT: vpsrad %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
5836 ; SKX-NEXT: vpsrad (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5837 ; SKX-NEXT: vpsrad $2, %ymm0, %ymm0 # sched: [1:0.50]
5838 ; SKX-NEXT: retq # sched: [7:1.00]
5840 ; ZNVER1-LABEL: test_psrad:
5842 ; ZNVER1-NEXT: vpsrad %xmm1, %ymm0, %ymm0 # sched: [2:1.00]
5843 ; ZNVER1-NEXT: vpsrad (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
5844 ; ZNVER1-NEXT: vpsrad $2, %ymm0, %ymm0 # sched: [1:0.25]
5845 ; ZNVER1-NEXT: retq # sched: [1:0.50]
5846 %1 = call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %a0, <4 x i32> %a1)
5847 %2 = load <4 x i32>, <4 x i32> *%a2, align 16
5848 %3 = call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %1, <4 x i32> %2)
5849 %4 = ashr <8 x i32> %3, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
5852 declare <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32>, <4 x i32>) nounwind readnone
5854 define <4 x i32> @test_psravd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
5855 ; GENERIC-LABEL: test_psravd:
5857 ; GENERIC-NEXT: vpsravd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
5858 ; GENERIC-NEXT: vpsravd (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
5859 ; GENERIC-NEXT: retq # sched: [1:1.00]
5861 ; HASWELL-LABEL: test_psravd:
5863 ; HASWELL-NEXT: vpsravd %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
5864 ; HASWELL-NEXT: vpsravd (%rdi), %xmm0, %xmm0 # sched: [3:2.00]
5865 ; HASWELL-NEXT: retq # sched: [2:1.00]
5867 ; BROADWELL-LABEL: test_psravd:
5868 ; BROADWELL: # BB#0:
5869 ; BROADWELL-NEXT: vpsravd %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
5870 ; BROADWELL-NEXT: vpsravd (%rdi), %xmm0, %xmm0 # sched: [8:2.00]
5871 ; BROADWELL-NEXT: retq # sched: [7:1.00]
5873 ; SKYLAKE-LABEL: test_psravd:
5875 ; SKYLAKE-NEXT: vpsravd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
5876 ; SKYLAKE-NEXT: vpsravd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
5877 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
5879 ; SKX-LABEL: test_psravd:
5881 ; SKX-NEXT: vpsravd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
5882 ; SKX-NEXT: vpsravd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
5883 ; SKX-NEXT: retq # sched: [7:1.00]
5885 ; ZNVER1-LABEL: test_psravd:
5887 ; ZNVER1-NEXT: vpsravd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
5888 ; ZNVER1-NEXT: vpsravd (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
5889 ; ZNVER1-NEXT: retq # sched: [1:0.50]
5890 %1 = call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %a0, <4 x i32> %a1)
5891 %2 = load <4 x i32>, <4 x i32> *%a2, align 16
5892 %3 = call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %1, <4 x i32> %2)
5895 declare <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32>, <4 x i32>) nounwind readnone
5897 define <8 x i32> @test_psravd_ymm(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
5898 ; GENERIC-LABEL: test_psravd_ymm:
5900 ; GENERIC-NEXT: vpsravd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
5901 ; GENERIC-NEXT: vpsravd (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
5902 ; GENERIC-NEXT: retq # sched: [1:1.00]
5904 ; HASWELL-LABEL: test_psravd_ymm:
5906 ; HASWELL-NEXT: vpsravd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
5907 ; HASWELL-NEXT: vpsravd (%rdi), %ymm0, %ymm0 # sched: [3:2.00]
5908 ; HASWELL-NEXT: retq # sched: [2:1.00]
5910 ; BROADWELL-LABEL: test_psravd_ymm:
5911 ; BROADWELL: # BB#0:
5912 ; BROADWELL-NEXT: vpsravd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
5913 ; BROADWELL-NEXT: vpsravd (%rdi), %ymm0, %ymm0 # sched: [9:2.00]
5914 ; BROADWELL-NEXT: retq # sched: [7:1.00]
5916 ; SKYLAKE-LABEL: test_psravd_ymm:
5918 ; SKYLAKE-NEXT: vpsravd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5919 ; SKYLAKE-NEXT: vpsravd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5920 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
5922 ; SKX-LABEL: test_psravd_ymm:
5924 ; SKX-NEXT: vpsravd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5925 ; SKX-NEXT: vpsravd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5926 ; SKX-NEXT: retq # sched: [7:1.00]
5928 ; ZNVER1-LABEL: test_psravd_ymm:
5930 ; ZNVER1-NEXT: vpsravd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5931 ; ZNVER1-NEXT: vpsravd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5932 ; ZNVER1-NEXT: retq # sched: [1:0.50]
5933 %1 = call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %a0, <8 x i32> %a1)
5934 %2 = load <8 x i32>, <8 x i32> *%a2, align 32
5935 %3 = call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %1, <8 x i32> %2)
5938 declare <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32>, <8 x i32>) nounwind readnone
5940 define <16 x i16> @test_psraw(<16 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
5941 ; GENERIC-LABEL: test_psraw:
5943 ; GENERIC-NEXT: vpsraw %xmm1, %ymm0, %ymm0 # sched: [1:1.00]
5944 ; GENERIC-NEXT: vpsraw (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
5945 ; GENERIC-NEXT: vpsraw $2, %ymm0, %ymm0 # sched: [1:1.00]
5946 ; GENERIC-NEXT: retq # sched: [1:1.00]
5948 ; HASWELL-LABEL: test_psraw:
5950 ; HASWELL-NEXT: vpsraw %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
5951 ; HASWELL-NEXT: vpsraw (%rdi), %ymm0, %ymm0 # sched: [1:1.00]
5952 ; HASWELL-NEXT: vpsraw $2, %ymm0, %ymm0 # sched: [1:1.00]
5953 ; HASWELL-NEXT: retq # sched: [2:1.00]
5955 ; BROADWELL-LABEL: test_psraw:
5956 ; BROADWELL: # BB#0:
5957 ; BROADWELL-NEXT: vpsraw %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
5958 ; BROADWELL-NEXT: vpsraw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
5959 ; BROADWELL-NEXT: vpsraw $2, %ymm0, %ymm0 # sched: [1:1.00]
5960 ; BROADWELL-NEXT: retq # sched: [7:1.00]
5962 ; SKYLAKE-LABEL: test_psraw:
5964 ; SKYLAKE-NEXT: vpsraw %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
5965 ; SKYLAKE-NEXT: vpsraw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5966 ; SKYLAKE-NEXT: vpsraw $2, %ymm0, %ymm0 # sched: [1:0.50]
5967 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
5969 ; SKX-LABEL: test_psraw:
5971 ; SKX-NEXT: vpsraw %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
5972 ; SKX-NEXT: vpsraw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5973 ; SKX-NEXT: vpsraw $2, %ymm0, %ymm0 # sched: [1:0.50]
5974 ; SKX-NEXT: retq # sched: [7:1.00]
5976 ; ZNVER1-LABEL: test_psraw:
5978 ; ZNVER1-NEXT: vpsraw %xmm1, %ymm0, %ymm0 # sched: [2:1.00]
5979 ; ZNVER1-NEXT: vpsraw (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
5980 ; ZNVER1-NEXT: vpsraw $2, %ymm0, %ymm0 # sched: [1:0.25]
5981 ; ZNVER1-NEXT: retq # sched: [1:0.50]
5982 %1 = call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %a0, <8 x i16> %a1)
5983 %2 = load <8 x i16>, <8 x i16> *%a2, align 16
5984 %3 = call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %1, <8 x i16> %2)
5985 %4 = ashr <16 x i16> %3, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
5988 declare <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16>, <8 x i16>) nounwind readnone
5990 define <8 x i32> @test_psrld(<8 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
5991 ; GENERIC-LABEL: test_psrld:
5993 ; GENERIC-NEXT: vpsrld %xmm1, %ymm0, %ymm0 # sched: [1:1.00]
5994 ; GENERIC-NEXT: vpsrld (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
5995 ; GENERIC-NEXT: vpsrld $2, %ymm0, %ymm0 # sched: [1:1.00]
5996 ; GENERIC-NEXT: retq # sched: [1:1.00]
5998 ; HASWELL-LABEL: test_psrld:
6000 ; HASWELL-NEXT: vpsrld %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
6001 ; HASWELL-NEXT: vpsrld (%rdi), %ymm0, %ymm0 # sched: [1:1.00]
6002 ; HASWELL-NEXT: vpsrld $2, %ymm0, %ymm0 # sched: [1:1.00]
6003 ; HASWELL-NEXT: retq # sched: [2:1.00]
6005 ; BROADWELL-LABEL: test_psrld:
6006 ; BROADWELL: # BB#0:
6007 ; BROADWELL-NEXT: vpsrld %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
6008 ; BROADWELL-NEXT: vpsrld (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
6009 ; BROADWELL-NEXT: vpsrld $2, %ymm0, %ymm0 # sched: [1:1.00]
6010 ; BROADWELL-NEXT: retq # sched: [7:1.00]
6012 ; SKYLAKE-LABEL: test_psrld:
6014 ; SKYLAKE-NEXT: vpsrld %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
6015 ; SKYLAKE-NEXT: vpsrld (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6016 ; SKYLAKE-NEXT: vpsrld $2, %ymm0, %ymm0 # sched: [1:0.50]
6017 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
6019 ; SKX-LABEL: test_psrld:
6021 ; SKX-NEXT: vpsrld %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
6022 ; SKX-NEXT: vpsrld (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6023 ; SKX-NEXT: vpsrld $2, %ymm0, %ymm0 # sched: [1:0.50]
6024 ; SKX-NEXT: retq # sched: [7:1.00]
6026 ; ZNVER1-LABEL: test_psrld:
6028 ; ZNVER1-NEXT: vpsrld %xmm1, %ymm0, %ymm0 # sched: [2:1.00]
6029 ; ZNVER1-NEXT: vpsrld (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
6030 ; ZNVER1-NEXT: vpsrld $2, %ymm0, %ymm0 # sched: [1:0.25]
6031 ; ZNVER1-NEXT: retq # sched: [1:0.50]
6032 %1 = call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %a0, <4 x i32> %a1)
6033 %2 = load <4 x i32>, <4 x i32> *%a2, align 16
6034 %3 = call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %1, <4 x i32> %2)
6035 %4 = lshr <8 x i32> %3, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
6038 declare <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32>, <4 x i32>) nounwind readnone
6040 define <32 x i8> @test_psrldq(<32 x i8> %a0) {
6041 ; GENERIC-LABEL: test_psrldq:
6043 ; GENERIC-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,ymm0[19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero sched: [1:1.00]
6044 ; GENERIC-NEXT: retq # sched: [1:1.00]
6046 ; HASWELL-LABEL: test_psrldq:
6048 ; HASWELL-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,ymm0[19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero sched: [1:1.00]
6049 ; HASWELL-NEXT: retq # sched: [2:1.00]
6051 ; BROADWELL-LABEL: test_psrldq:
6052 ; BROADWELL: # BB#0:
6053 ; BROADWELL-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,ymm0[19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero sched: [1:1.00]
6054 ; BROADWELL-NEXT: retq # sched: [7:1.00]
6056 ; SKYLAKE-LABEL: test_psrldq:
6058 ; SKYLAKE-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,ymm0[19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero sched: [1:1.00]
6059 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
6061 ; SKX-LABEL: test_psrldq:
6063 ; SKX-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,ymm0[19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero sched: [1:1.00]
6064 ; SKX-NEXT: retq # sched: [7:1.00]
6066 ; ZNVER1-LABEL: test_psrldq:
6068 ; ZNVER1-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,ymm0[19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero sched: [2:1.00]
6069 ; ZNVER1-NEXT: retq # sched: [1:0.50]
6070 %1 = shufflevector <32 x i8> %a0, <32 x i8> zeroinitializer, <32 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 32, i32 33, i32 34, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 48, i32 49, i32 50>
6074 define <4 x i64> @test_psrlq(<4 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
6075 ; GENERIC-LABEL: test_psrlq:
6077 ; GENERIC-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 # sched: [1:1.00]
6078 ; GENERIC-NEXT: vpsrlq (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
6079 ; GENERIC-NEXT: vpsrlq $2, %ymm0, %ymm0 # sched: [1:1.00]
6080 ; GENERIC-NEXT: retq # sched: [1:1.00]
6082 ; HASWELL-LABEL: test_psrlq:
6084 ; HASWELL-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
6085 ; HASWELL-NEXT: vpsrlq (%rdi), %ymm0, %ymm0 # sched: [1:1.00]
6086 ; HASWELL-NEXT: vpsrlq $2, %ymm0, %ymm0 # sched: [1:1.00]
6087 ; HASWELL-NEXT: retq # sched: [2:1.00]
6089 ; BROADWELL-LABEL: test_psrlq:
6090 ; BROADWELL: # BB#0:
6091 ; BROADWELL-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
6092 ; BROADWELL-NEXT: vpsrlq (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
6093 ; BROADWELL-NEXT: vpsrlq $2, %ymm0, %ymm0 # sched: [1:1.00]
6094 ; BROADWELL-NEXT: retq # sched: [7:1.00]
6096 ; SKYLAKE-LABEL: test_psrlq:
6098 ; SKYLAKE-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
6099 ; SKYLAKE-NEXT: vpsrlq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6100 ; SKYLAKE-NEXT: vpsrlq $2, %ymm0, %ymm0 # sched: [1:0.50]
6101 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
6103 ; SKX-LABEL: test_psrlq:
6105 ; SKX-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
6106 ; SKX-NEXT: vpsrlq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6107 ; SKX-NEXT: vpsrlq $2, %ymm0, %ymm0 # sched: [1:0.50]
6108 ; SKX-NEXT: retq # sched: [7:1.00]
6110 ; ZNVER1-LABEL: test_psrlq:
6112 ; ZNVER1-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 # sched: [2:1.00]
6113 ; ZNVER1-NEXT: vpsrlq (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
6114 ; ZNVER1-NEXT: vpsrlq $2, %ymm0, %ymm0 # sched: [1:0.25]
6115 ; ZNVER1-NEXT: retq # sched: [1:0.50]
6116 %1 = call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %a0, <2 x i64> %a1)
6117 %2 = load <2 x i64>, <2 x i64> *%a2, align 16
6118 %3 = call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %1, <2 x i64> %2)
6119 %4 = lshr <4 x i64> %3, <i64 2, i64 2, i64 2, i64 2>
6122 declare <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64>, <2 x i64>) nounwind readnone
6124 define <4 x i32> @test_psrlvd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
6125 ; GENERIC-LABEL: test_psrlvd:
6127 ; GENERIC-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
6128 ; GENERIC-NEXT: vpsrlvd (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
6129 ; GENERIC-NEXT: retq # sched: [1:1.00]
6131 ; HASWELL-LABEL: test_psrlvd:
6133 ; HASWELL-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
6134 ; HASWELL-NEXT: vpsrlvd (%rdi), %xmm0, %xmm0 # sched: [3:2.00]
6135 ; HASWELL-NEXT: retq # sched: [2:1.00]
6137 ; BROADWELL-LABEL: test_psrlvd:
6138 ; BROADWELL: # BB#0:
6139 ; BROADWELL-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
6140 ; BROADWELL-NEXT: vpsrlvd (%rdi), %xmm0, %xmm0 # sched: [8:2.00]
6141 ; BROADWELL-NEXT: retq # sched: [7:1.00]
6143 ; SKYLAKE-LABEL: test_psrlvd:
6145 ; SKYLAKE-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
6146 ; SKYLAKE-NEXT: vpsrlvd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
6147 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
6149 ; SKX-LABEL: test_psrlvd:
6151 ; SKX-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
6152 ; SKX-NEXT: vpsrlvd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
6153 ; SKX-NEXT: retq # sched: [7:1.00]
6155 ; ZNVER1-LABEL: test_psrlvd:
6157 ; ZNVER1-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
6158 ; ZNVER1-NEXT: vpsrlvd (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
6159 ; ZNVER1-NEXT: retq # sched: [1:0.50]
6160 %1 = call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %a0, <4 x i32> %a1)
6161 %2 = load <4 x i32>, <4 x i32> *%a2, align 16
6162 %3 = call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %1, <4 x i32> %2)
6165 declare <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32>, <4 x i32>) nounwind readnone
6167 define <8 x i32> @test_psrlvd_ymm(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
6168 ; GENERIC-LABEL: test_psrlvd_ymm:
6170 ; GENERIC-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
6171 ; GENERIC-NEXT: vpsrlvd (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
6172 ; GENERIC-NEXT: retq # sched: [1:1.00]
6174 ; HASWELL-LABEL: test_psrlvd_ymm:
6176 ; HASWELL-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
6177 ; HASWELL-NEXT: vpsrlvd (%rdi), %ymm0, %ymm0 # sched: [3:2.00]
6178 ; HASWELL-NEXT: retq # sched: [2:1.00]
6180 ; BROADWELL-LABEL: test_psrlvd_ymm:
6181 ; BROADWELL: # BB#0:
6182 ; BROADWELL-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
6183 ; BROADWELL-NEXT: vpsrlvd (%rdi), %ymm0, %ymm0 # sched: [9:2.00]
6184 ; BROADWELL-NEXT: retq # sched: [7:1.00]
6186 ; SKYLAKE-LABEL: test_psrlvd_ymm:
6188 ; SKYLAKE-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6189 ; SKYLAKE-NEXT: vpsrlvd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6190 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
6192 ; SKX-LABEL: test_psrlvd_ymm:
6194 ; SKX-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6195 ; SKX-NEXT: vpsrlvd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6196 ; SKX-NEXT: retq # sched: [7:1.00]
6198 ; ZNVER1-LABEL: test_psrlvd_ymm:
6200 ; ZNVER1-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6201 ; ZNVER1-NEXT: vpsrlvd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6202 ; ZNVER1-NEXT: retq # sched: [1:0.50]
6203 %1 = call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %a0, <8 x i32> %a1)
6204 %2 = load <8 x i32>, <8 x i32> *%a2, align 32
6205 %3 = call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %1, <8 x i32> %2)
6208 declare <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32>, <8 x i32>) nounwind readnone
6210 define <2 x i64> @test_psrlvq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
6211 ; GENERIC-LABEL: test_psrlvq:
6213 ; GENERIC-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
6214 ; GENERIC-NEXT: vpsrlvq (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
6215 ; GENERIC-NEXT: retq # sched: [1:1.00]
6217 ; HASWELL-LABEL: test_psrlvq:
6219 ; HASWELL-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
6220 ; HASWELL-NEXT: vpsrlvq (%rdi), %xmm0, %xmm0 # sched: [1:1.00]
6221 ; HASWELL-NEXT: retq # sched: [2:1.00]
6223 ; BROADWELL-LABEL: test_psrlvq:
6224 ; BROADWELL: # BB#0:
6225 ; BROADWELL-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
6226 ; BROADWELL-NEXT: vpsrlvq (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
6227 ; BROADWELL-NEXT: retq # sched: [7:1.00]
6229 ; SKYLAKE-LABEL: test_psrlvq:
6231 ; SKYLAKE-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
6232 ; SKYLAKE-NEXT: vpsrlvq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
6233 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
6235 ; SKX-LABEL: test_psrlvq:
6237 ; SKX-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
6238 ; SKX-NEXT: vpsrlvq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
6239 ; SKX-NEXT: retq # sched: [7:1.00]
6241 ; ZNVER1-LABEL: test_psrlvq:
6243 ; ZNVER1-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
6244 ; ZNVER1-NEXT: vpsrlvq (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
6245 ; ZNVER1-NEXT: retq # sched: [1:0.50]
6246 %1 = call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %a0, <2 x i64> %a1)
6247 %2 = load <2 x i64>, <2 x i64> *%a2, align 16
6248 %3 = call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %1, <2 x i64> %2)
6251 declare <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64>, <2 x i64>) nounwind readnone
6253 define <4 x i64> @test_psrlvq_ymm(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
6254 ; GENERIC-LABEL: test_psrlvq_ymm:
6256 ; GENERIC-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
6257 ; GENERIC-NEXT: vpsrlvq (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
6258 ; GENERIC-NEXT: retq # sched: [1:1.00]
6260 ; HASWELL-LABEL: test_psrlvq_ymm:
6262 ; HASWELL-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
6263 ; HASWELL-NEXT: vpsrlvq (%rdi), %ymm0, %ymm0 # sched: [1:1.00]
6264 ; HASWELL-NEXT: retq # sched: [2:1.00]
6266 ; BROADWELL-LABEL: test_psrlvq_ymm:
6267 ; BROADWELL: # BB#0:
6268 ; BROADWELL-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
6269 ; BROADWELL-NEXT: vpsrlvq (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
6270 ; BROADWELL-NEXT: retq # sched: [7:1.00]
6272 ; SKYLAKE-LABEL: test_psrlvq_ymm:
6274 ; SKYLAKE-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6275 ; SKYLAKE-NEXT: vpsrlvq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6276 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
6278 ; SKX-LABEL: test_psrlvq_ymm:
6280 ; SKX-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6281 ; SKX-NEXT: vpsrlvq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6282 ; SKX-NEXT: retq # sched: [7:1.00]
6284 ; ZNVER1-LABEL: test_psrlvq_ymm:
6286 ; ZNVER1-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6287 ; ZNVER1-NEXT: vpsrlvq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6288 ; ZNVER1-NEXT: retq # sched: [1:0.50]
6289 %1 = call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %a0, <4 x i64> %a1)
6290 %2 = load <4 x i64>, <4 x i64> *%a2, align 32
6291 %3 = call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %1, <4 x i64> %2)
6294 declare <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64>, <4 x i64>) nounwind readnone
6296 define <16 x i16> @test_psrlw(<16 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
6297 ; GENERIC-LABEL: test_psrlw:
6299 ; GENERIC-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 # sched: [1:1.00]
6300 ; GENERIC-NEXT: vpsrlw (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
6301 ; GENERIC-NEXT: vpsrlw $2, %ymm0, %ymm0 # sched: [1:1.00]
6302 ; GENERIC-NEXT: retq # sched: [1:1.00]
6304 ; HASWELL-LABEL: test_psrlw:
6306 ; HASWELL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
6307 ; HASWELL-NEXT: vpsrlw (%rdi), %ymm0, %ymm0 # sched: [1:1.00]
6308 ; HASWELL-NEXT: vpsrlw $2, %ymm0, %ymm0 # sched: [1:1.00]
6309 ; HASWELL-NEXT: retq # sched: [2:1.00]
6311 ; BROADWELL-LABEL: test_psrlw:
6312 ; BROADWELL: # BB#0:
6313 ; BROADWELL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
6314 ; BROADWELL-NEXT: vpsrlw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
6315 ; BROADWELL-NEXT: vpsrlw $2, %ymm0, %ymm0 # sched: [1:1.00]
6316 ; BROADWELL-NEXT: retq # sched: [7:1.00]
6318 ; SKYLAKE-LABEL: test_psrlw:
6320 ; SKYLAKE-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
6321 ; SKYLAKE-NEXT: vpsrlw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6322 ; SKYLAKE-NEXT: vpsrlw $2, %ymm0, %ymm0 # sched: [1:0.50]
6323 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
6325 ; SKX-LABEL: test_psrlw:
6327 ; SKX-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
6328 ; SKX-NEXT: vpsrlw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6329 ; SKX-NEXT: vpsrlw $2, %ymm0, %ymm0 # sched: [1:0.50]
6330 ; SKX-NEXT: retq # sched: [7:1.00]
6332 ; ZNVER1-LABEL: test_psrlw:
6334 ; ZNVER1-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 # sched: [2:1.00]
6335 ; ZNVER1-NEXT: vpsrlw (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
6336 ; ZNVER1-NEXT: vpsrlw $2, %ymm0, %ymm0 # sched: [1:0.25]
6337 ; ZNVER1-NEXT: retq # sched: [1:0.50]
6338 %1 = call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %a0, <8 x i16> %a1)
6339 %2 = load <8 x i16>, <8 x i16> *%a2, align 16
6340 %3 = call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %1, <8 x i16> %2)
6341 %4 = lshr <16 x i16> %3, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
6344 declare <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16>, <8 x i16>) nounwind readnone
6346 define <32 x i8> @test_psubb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
6347 ; GENERIC-LABEL: test_psubb:
6349 ; GENERIC-NEXT: vpsubb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
6350 ; GENERIC-NEXT: vpsubb (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
6351 ; GENERIC-NEXT: retq # sched: [1:1.00]
6353 ; HASWELL-LABEL: test_psubb:
6355 ; HASWELL-NEXT: vpsubb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6356 ; HASWELL-NEXT: vpsubb (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
6357 ; HASWELL-NEXT: retq # sched: [2:1.00]
6359 ; BROADWELL-LABEL: test_psubb:
6360 ; BROADWELL: # BB#0:
6361 ; BROADWELL-NEXT: vpsubb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6362 ; BROADWELL-NEXT: vpsubb (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
6363 ; BROADWELL-NEXT: retq # sched: [7:1.00]
6365 ; SKYLAKE-LABEL: test_psubb:
6367 ; SKYLAKE-NEXT: vpsubb %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
6368 ; SKYLAKE-NEXT: vpsubb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6369 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
6371 ; SKX-LABEL: test_psubb:
6373 ; SKX-NEXT: vpsubb %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
6374 ; SKX-NEXT: vpsubb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6375 ; SKX-NEXT: retq # sched: [7:1.00]
6377 ; ZNVER1-LABEL: test_psubb:
6379 ; ZNVER1-NEXT: vpsubb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
6380 ; ZNVER1-NEXT: vpsubb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6381 ; ZNVER1-NEXT: retq # sched: [1:0.50]
6382 %1 = sub <32 x i8> %a0, %a1
6383 %2 = load <32 x i8>, <32 x i8> *%a2, align 32
6384 %3 = sub <32 x i8> %1, %2
6388 define <8 x i32> @test_psubd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
6389 ; GENERIC-LABEL: test_psubd:
6391 ; GENERIC-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
6392 ; GENERIC-NEXT: vpsubd (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
6393 ; GENERIC-NEXT: retq # sched: [1:1.00]
6395 ; HASWELL-LABEL: test_psubd:
6397 ; HASWELL-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6398 ; HASWELL-NEXT: vpsubd (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
6399 ; HASWELL-NEXT: retq # sched: [2:1.00]
6401 ; BROADWELL-LABEL: test_psubd:
6402 ; BROADWELL: # BB#0:
6403 ; BROADWELL-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6404 ; BROADWELL-NEXT: vpsubd (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
6405 ; BROADWELL-NEXT: retq # sched: [7:1.00]
6407 ; SKYLAKE-LABEL: test_psubd:
6409 ; SKYLAKE-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
6410 ; SKYLAKE-NEXT: vpsubd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6411 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
6413 ; SKX-LABEL: test_psubd:
6415 ; SKX-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
6416 ; SKX-NEXT: vpsubd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6417 ; SKX-NEXT: retq # sched: [7:1.00]
6419 ; ZNVER1-LABEL: test_psubd:
6421 ; ZNVER1-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
6422 ; ZNVER1-NEXT: vpsubd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6423 ; ZNVER1-NEXT: retq # sched: [1:0.50]
6424 %1 = sub <8 x i32> %a0, %a1
6425 %2 = load <8 x i32>, <8 x i32> *%a2, align 32
6426 %3 = sub <8 x i32> %1, %2
6430 define <4 x i64> @test_psubq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
6431 ; GENERIC-LABEL: test_psubq:
6433 ; GENERIC-NEXT: vpsubq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
6434 ; GENERIC-NEXT: vpsubq (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
6435 ; GENERIC-NEXT: retq # sched: [1:1.00]
6437 ; HASWELL-LABEL: test_psubq:
6439 ; HASWELL-NEXT: vpsubq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6440 ; HASWELL-NEXT: vpsubq (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
6441 ; HASWELL-NEXT: retq # sched: [2:1.00]
6443 ; BROADWELL-LABEL: test_psubq:
6444 ; BROADWELL: # BB#0:
6445 ; BROADWELL-NEXT: vpsubq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6446 ; BROADWELL-NEXT: vpsubq (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
6447 ; BROADWELL-NEXT: retq # sched: [7:1.00]
6449 ; SKYLAKE-LABEL: test_psubq:
6451 ; SKYLAKE-NEXT: vpsubq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
6452 ; SKYLAKE-NEXT: vpsubq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6453 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
6455 ; SKX-LABEL: test_psubq:
6457 ; SKX-NEXT: vpsubq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
6458 ; SKX-NEXT: vpsubq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6459 ; SKX-NEXT: retq # sched: [7:1.00]
6461 ; ZNVER1-LABEL: test_psubq:
6463 ; ZNVER1-NEXT: vpsubq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
6464 ; ZNVER1-NEXT: vpsubq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6465 ; ZNVER1-NEXT: retq # sched: [1:0.50]
6466 %1 = sub <4 x i64> %a0, %a1
6467 %2 = load <4 x i64>, <4 x i64> *%a2, align 32
6468 %3 = sub <4 x i64> %1, %2
6472 define <32 x i8> @test_psubsb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
6473 ; GENERIC-LABEL: test_psubsb:
6475 ; GENERIC-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
6476 ; GENERIC-NEXT: vpsubsb (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
6477 ; GENERIC-NEXT: retq # sched: [1:1.00]
6479 ; HASWELL-LABEL: test_psubsb:
6481 ; HASWELL-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6482 ; HASWELL-NEXT: vpsubsb (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
6483 ; HASWELL-NEXT: retq # sched: [2:1.00]
6485 ; BROADWELL-LABEL: test_psubsb:
6486 ; BROADWELL: # BB#0:
6487 ; BROADWELL-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6488 ; BROADWELL-NEXT: vpsubsb (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
6489 ; BROADWELL-NEXT: retq # sched: [7:1.00]
6491 ; SKYLAKE-LABEL: test_psubsb:
6493 ; SKYLAKE-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6494 ; SKYLAKE-NEXT: vpsubsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6495 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
6497 ; SKX-LABEL: test_psubsb:
6499 ; SKX-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6500 ; SKX-NEXT: vpsubsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6501 ; SKX-NEXT: retq # sched: [7:1.00]
6503 ; ZNVER1-LABEL: test_psubsb:
6505 ; ZNVER1-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
6506 ; ZNVER1-NEXT: vpsubsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6507 ; ZNVER1-NEXT: retq # sched: [1:0.50]
6508 %1 = call <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8> %a0, <32 x i8> %a1)
6509 %2 = load <32 x i8>, <32 x i8> *%a2, align 32
6510 %3 = call <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8> %1, <32 x i8> %2)
6513 declare <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8>, <32 x i8>) nounwind readnone
6515 define <16 x i16> @test_psubsw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
6516 ; GENERIC-LABEL: test_psubsw:
6518 ; GENERIC-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
6519 ; GENERIC-NEXT: vpsubsw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
6520 ; GENERIC-NEXT: retq # sched: [1:1.00]
6522 ; HASWELL-LABEL: test_psubsw:
6524 ; HASWELL-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6525 ; HASWELL-NEXT: vpsubsw (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
6526 ; HASWELL-NEXT: retq # sched: [2:1.00]
6528 ; BROADWELL-LABEL: test_psubsw:
6529 ; BROADWELL: # BB#0:
6530 ; BROADWELL-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6531 ; BROADWELL-NEXT: vpsubsw (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
6532 ; BROADWELL-NEXT: retq # sched: [7:1.00]
6534 ; SKYLAKE-LABEL: test_psubsw:
6536 ; SKYLAKE-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6537 ; SKYLAKE-NEXT: vpsubsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6538 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
6540 ; SKX-LABEL: test_psubsw:
6542 ; SKX-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6543 ; SKX-NEXT: vpsubsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6544 ; SKX-NEXT: retq # sched: [7:1.00]
6546 ; ZNVER1-LABEL: test_psubsw:
6548 ; ZNVER1-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
6549 ; ZNVER1-NEXT: vpsubsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6550 ; ZNVER1-NEXT: retq # sched: [1:0.50]
6551 %1 = call <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16> %a0, <16 x i16> %a1)
6552 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
6553 %3 = call <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16> %1, <16 x i16> %2)
6556 declare <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16>, <16 x i16>) nounwind readnone
6558 define <32 x i8> @test_psubusb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
6559 ; GENERIC-LABEL: test_psubusb:
6561 ; GENERIC-NEXT: vpsubusb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
6562 ; GENERIC-NEXT: vpsubusb (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
6563 ; GENERIC-NEXT: retq # sched: [1:1.00]
6565 ; HASWELL-LABEL: test_psubusb:
6567 ; HASWELL-NEXT: vpsubusb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6568 ; HASWELL-NEXT: vpsubusb (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
6569 ; HASWELL-NEXT: retq # sched: [2:1.00]
6571 ; BROADWELL-LABEL: test_psubusb:
6572 ; BROADWELL: # BB#0:
6573 ; BROADWELL-NEXT: vpsubusb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6574 ; BROADWELL-NEXT: vpsubusb (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
6575 ; BROADWELL-NEXT: retq # sched: [7:1.00]
6577 ; SKYLAKE-LABEL: test_psubusb:
6579 ; SKYLAKE-NEXT: vpsubusb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6580 ; SKYLAKE-NEXT: vpsubusb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6581 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
6583 ; SKX-LABEL: test_psubusb:
6585 ; SKX-NEXT: vpsubusb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6586 ; SKX-NEXT: vpsubusb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6587 ; SKX-NEXT: retq # sched: [7:1.00]
6589 ; ZNVER1-LABEL: test_psubusb:
6591 ; ZNVER1-NEXT: vpsubusb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
6592 ; ZNVER1-NEXT: vpsubusb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6593 ; ZNVER1-NEXT: retq # sched: [1:0.50]
6594 %1 = call <32 x i8> @llvm.x86.avx2.psubus.b(<32 x i8> %a0, <32 x i8> %a1)
6595 %2 = load <32 x i8>, <32 x i8> *%a2, align 32
6596 %3 = call <32 x i8> @llvm.x86.avx2.psubus.b(<32 x i8> %1, <32 x i8> %2)
6599 declare <32 x i8> @llvm.x86.avx2.psubus.b(<32 x i8>, <32 x i8>) nounwind readnone
6601 define <16 x i16> @test_psubusw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
6602 ; GENERIC-LABEL: test_psubusw:
6604 ; GENERIC-NEXT: vpsubusw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
6605 ; GENERIC-NEXT: vpsubusw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
6606 ; GENERIC-NEXT: retq # sched: [1:1.00]
6608 ; HASWELL-LABEL: test_psubusw:
6610 ; HASWELL-NEXT: vpsubusw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6611 ; HASWELL-NEXT: vpsubusw (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
6612 ; HASWELL-NEXT: retq # sched: [2:1.00]
6614 ; BROADWELL-LABEL: test_psubusw:
6615 ; BROADWELL: # BB#0:
6616 ; BROADWELL-NEXT: vpsubusw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6617 ; BROADWELL-NEXT: vpsubusw (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
6618 ; BROADWELL-NEXT: retq # sched: [7:1.00]
6620 ; SKYLAKE-LABEL: test_psubusw:
6622 ; SKYLAKE-NEXT: vpsubusw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6623 ; SKYLAKE-NEXT: vpsubusw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6624 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
6626 ; SKX-LABEL: test_psubusw:
6628 ; SKX-NEXT: vpsubusw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6629 ; SKX-NEXT: vpsubusw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6630 ; SKX-NEXT: retq # sched: [7:1.00]
6632 ; ZNVER1-LABEL: test_psubusw:
6634 ; ZNVER1-NEXT: vpsubusw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
6635 ; ZNVER1-NEXT: vpsubusw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6636 ; ZNVER1-NEXT: retq # sched: [1:0.50]
6637 %1 = call <16 x i16> @llvm.x86.avx2.psubus.w(<16 x i16> %a0, <16 x i16> %a1)
6638 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
6639 %3 = call <16 x i16> @llvm.x86.avx2.psubus.w(<16 x i16> %1, <16 x i16> %2)
6642 declare <16 x i16> @llvm.x86.avx2.psubus.w(<16 x i16>, <16 x i16>) nounwind readnone
6644 define <16 x i16> @test_psubw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
6645 ; GENERIC-LABEL: test_psubw:
6647 ; GENERIC-NEXT: vpsubw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
6648 ; GENERIC-NEXT: vpsubw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
6649 ; GENERIC-NEXT: retq # sched: [1:1.00]
6651 ; HASWELL-LABEL: test_psubw:
6653 ; HASWELL-NEXT: vpsubw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6654 ; HASWELL-NEXT: vpsubw (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
6655 ; HASWELL-NEXT: retq # sched: [2:1.00]
6657 ; BROADWELL-LABEL: test_psubw:
6658 ; BROADWELL: # BB#0:
6659 ; BROADWELL-NEXT: vpsubw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6660 ; BROADWELL-NEXT: vpsubw (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
6661 ; BROADWELL-NEXT: retq # sched: [7:1.00]
6663 ; SKYLAKE-LABEL: test_psubw:
6665 ; SKYLAKE-NEXT: vpsubw %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
6666 ; SKYLAKE-NEXT: vpsubw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6667 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
6669 ; SKX-LABEL: test_psubw:
6671 ; SKX-NEXT: vpsubw %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
6672 ; SKX-NEXT: vpsubw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6673 ; SKX-NEXT: retq # sched: [7:1.00]
6675 ; ZNVER1-LABEL: test_psubw:
6677 ; ZNVER1-NEXT: vpsubw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
6678 ; ZNVER1-NEXT: vpsubw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
6679 ; ZNVER1-NEXT: retq # sched: [1:0.50]
6680 %1 = sub <16 x i16> %a0, %a1
6681 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
6682 %3 = sub <16 x i16> %1, %2
6686 define <32 x i8> @test_punpckhbw(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
6687 ; GENERIC-LABEL: test_punpckhbw:
6689 ; GENERIC-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] sched: [1:1.00]
6690 ; GENERIC-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15],ymm0[24],mem[24],ymm0[25],mem[25],ymm0[26],mem[26],ymm0[27],mem[27],ymm0[28],mem[28],ymm0[29],mem[29],ymm0[30],mem[30],ymm0[31],mem[31] sched: [5:1.00]
6691 ; GENERIC-NEXT: retq # sched: [1:1.00]
6693 ; HASWELL-LABEL: test_punpckhbw:
6695 ; HASWELL-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] sched: [1:1.00]
6696 ; HASWELL-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15],ymm0[24],mem[24],ymm0[25],mem[25],ymm0[26],mem[26],ymm0[27],mem[27],ymm0[28],mem[28],ymm0[29],mem[29],ymm0[30],mem[30],ymm0[31],mem[31] sched: [1:1.00]
6697 ; HASWELL-NEXT: retq # sched: [2:1.00]
6699 ; BROADWELL-LABEL: test_punpckhbw:
6700 ; BROADWELL: # BB#0:
6701 ; BROADWELL-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] sched: [1:1.00]
6702 ; BROADWELL-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15],ymm0[24],mem[24],ymm0[25],mem[25],ymm0[26],mem[26],ymm0[27],mem[27],ymm0[28],mem[28],ymm0[29],mem[29],ymm0[30],mem[30],ymm0[31],mem[31] sched: [7:1.00]
6703 ; BROADWELL-NEXT: retq # sched: [7:1.00]
6705 ; SKYLAKE-LABEL: test_punpckhbw:
6707 ; SKYLAKE-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] sched: [1:1.00]
6708 ; SKYLAKE-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15],ymm0[24],mem[24],ymm0[25],mem[25],ymm0[26],mem[26],ymm0[27],mem[27],ymm0[28],mem[28],ymm0[29],mem[29],ymm0[30],mem[30],ymm0[31],mem[31] sched: [8:1.00]
6709 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
6711 ; SKX-LABEL: test_punpckhbw:
6713 ; SKX-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] sched: [1:1.00]
6714 ; SKX-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15],ymm0[24],mem[24],ymm0[25],mem[25],ymm0[26],mem[26],ymm0[27],mem[27],ymm0[28],mem[28],ymm0[29],mem[29],ymm0[30],mem[30],ymm0[31],mem[31] sched: [8:1.00]
6715 ; SKX-NEXT: retq # sched: [7:1.00]
6717 ; ZNVER1-LABEL: test_punpckhbw:
6719 ; ZNVER1-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] sched: [1:0.25]
6720 ; ZNVER1-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15],ymm0[24],mem[24],ymm0[25],mem[25],ymm0[26],mem[26],ymm0[27],mem[27],ymm0[28],mem[28],ymm0[29],mem[29],ymm0[30],mem[30],ymm0[31],mem[31] sched: [8:0.50]
6721 ; ZNVER1-NEXT: retq # sched: [1:0.50]
6722 %1 = shufflevector <32 x i8> %a0, <32 x i8> %a1, <32 x i32> <i32 8, i32 40, i32 9, i32 41, i32 10, i32 42, i32 11, i32 43, i32 12, i32 44, i32 13, i32 45, i32 14, i32 46, i32 15, i32 47, i32 24, i32 56, i32 25, i32 57, i32 26, i32 58, i32 27, i32 59, i32 28, i32 60, i32 29, i32 61, i32 30, i32 62, i32 31, i32 63>
6723 %2 = load <32 x i8>, <32 x i8> *%a2, align 32
6724 %3 = shufflevector <32 x i8> %1, <32 x i8> %2, <32 x i32> <i32 8, i32 40, i32 9, i32 41, i32 10, i32 42, i32 11, i32 43, i32 12, i32 44, i32 13, i32 45, i32 14, i32 46, i32 15, i32 47, i32 24, i32 56, i32 25, i32 57, i32 26, i32 58, i32 27, i32 59, i32 28, i32 60, i32 29, i32 61, i32 30, i32 62, i32 31, i32 63>
6728 define <8 x i32> @test_punpckhdq(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
6729 ; GENERIC-LABEL: test_punpckhdq:
6731 ; GENERIC-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00]
6732 ; GENERIC-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [5:1.00]
6733 ; GENERIC-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [3:1.00]
6734 ; GENERIC-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
6735 ; GENERIC-NEXT: retq # sched: [1:1.00]
6737 ; HASWELL-LABEL: test_punpckhdq:
6739 ; HASWELL-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00]
6740 ; HASWELL-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [1:1.00]
6741 ; HASWELL-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.50]
6742 ; HASWELL-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6743 ; HASWELL-NEXT: retq # sched: [2:1.00]
6745 ; BROADWELL-LABEL: test_punpckhdq:
6746 ; BROADWELL: # BB#0:
6747 ; BROADWELL-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00]
6748 ; BROADWELL-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [7:1.00]
6749 ; BROADWELL-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.50]
6750 ; BROADWELL-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6751 ; BROADWELL-NEXT: retq # sched: [7:1.00]
6753 ; SKYLAKE-LABEL: test_punpckhdq:
6755 ; SKYLAKE-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00]
6756 ; SKYLAKE-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00]
6757 ; SKYLAKE-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.50]
6758 ; SKYLAKE-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
6759 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
6761 ; SKX-LABEL: test_punpckhdq:
6763 ; SKX-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00]
6764 ; SKX-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00]
6765 ; SKX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.50]
6766 ; SKX-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
6767 ; SKX-NEXT: retq # sched: [7:1.00]
6769 ; ZNVER1-LABEL: test_punpckhdq:
6771 ; ZNVER1-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:0.25]
6772 ; ZNVER1-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:0.50]
6773 ; ZNVER1-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.25]
6774 ; ZNVER1-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
6775 ; ZNVER1-NEXT: retq # sched: [1:0.50]
6776 %1 = shufflevector <8 x i32> %a0, <8 x i32> %a1, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
6777 %2 = load <8 x i32>, <8 x i32> *%a2, align 32
6778 %3 = shufflevector <8 x i32> %1, <8 x i32> %2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
6779 %4 = add <8 x i32> %3, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
6783 define <4 x i64> @test_punpckhqdq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
6784 ; GENERIC-LABEL: test_punpckhqdq:
6786 ; GENERIC-NEXT: vpunpckhqdq {{.*#+}} ymm1 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00]
6787 ; GENERIC-NEXT: vpunpckhqdq {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3] sched: [5:1.00]
6788 ; GENERIC-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
6789 ; GENERIC-NEXT: retq # sched: [1:1.00]
6791 ; HASWELL-LABEL: test_punpckhqdq:
6793 ; HASWELL-NEXT: vpunpckhqdq {{.*#+}} ymm1 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00]
6794 ; HASWELL-NEXT: vpunpckhqdq {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3] sched: [1:1.00]
6795 ; HASWELL-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
6796 ; HASWELL-NEXT: retq # sched: [2:1.00]
6798 ; BROADWELL-LABEL: test_punpckhqdq:
6799 ; BROADWELL: # BB#0:
6800 ; BROADWELL-NEXT: vpunpckhqdq {{.*#+}} ymm1 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00]
6801 ; BROADWELL-NEXT: vpunpckhqdq {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3] sched: [7:1.00]
6802 ; BROADWELL-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
6803 ; BROADWELL-NEXT: retq # sched: [7:1.00]
6805 ; SKYLAKE-LABEL: test_punpckhqdq:
6807 ; SKYLAKE-NEXT: vpunpckhqdq {{.*#+}} ymm1 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00]
6808 ; SKYLAKE-NEXT: vpunpckhqdq {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00]
6809 ; SKYLAKE-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
6810 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
6812 ; SKX-LABEL: test_punpckhqdq:
6814 ; SKX-NEXT: vpunpckhqdq {{.*#+}} ymm1 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00]
6815 ; SKX-NEXT: vpunpckhqdq {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00]
6816 ; SKX-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
6817 ; SKX-NEXT: retq # sched: [7:1.00]
6819 ; ZNVER1-LABEL: test_punpckhqdq:
6821 ; ZNVER1-NEXT: vpunpckhqdq {{.*#+}} ymm1 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:0.25]
6822 ; ZNVER1-NEXT: vpunpckhqdq {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:0.50]
6823 ; ZNVER1-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.25]
6824 ; ZNVER1-NEXT: retq # sched: [1:0.50]
6825 %1 = shufflevector <4 x i64> %a0, <4 x i64> %a1, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
6826 %2 = load <4 x i64>, <4 x i64> *%a2, align 32
6827 %3 = shufflevector <4 x i64> %a0, <4 x i64> %2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
6828 %4 = add <4 x i64> %1, %3
6832 define <16 x i16> @test_punpckhwd(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
6833 ; GENERIC-LABEL: test_punpckhwd:
6835 ; GENERIC-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15] sched: [1:1.00]
6836 ; GENERIC-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15] sched: [5:1.00]
6837 ; GENERIC-NEXT: retq # sched: [1:1.00]
6839 ; HASWELL-LABEL: test_punpckhwd:
6841 ; HASWELL-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15] sched: [1:1.00]
6842 ; HASWELL-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15] sched: [1:1.00]
6843 ; HASWELL-NEXT: retq # sched: [2:1.00]
6845 ; BROADWELL-LABEL: test_punpckhwd:
6846 ; BROADWELL: # BB#0:
6847 ; BROADWELL-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15] sched: [1:1.00]
6848 ; BROADWELL-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15] sched: [7:1.00]
6849 ; BROADWELL-NEXT: retq # sched: [7:1.00]
6851 ; SKYLAKE-LABEL: test_punpckhwd:
6853 ; SKYLAKE-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15] sched: [1:1.00]
6854 ; SKYLAKE-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15] sched: [8:1.00]
6855 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
6857 ; SKX-LABEL: test_punpckhwd:
6859 ; SKX-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15] sched: [1:1.00]
6860 ; SKX-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15] sched: [8:1.00]
6861 ; SKX-NEXT: retq # sched: [7:1.00]
6863 ; ZNVER1-LABEL: test_punpckhwd:
6865 ; ZNVER1-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15] sched: [1:0.25]
6866 ; ZNVER1-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15] sched: [8:0.50]
6867 ; ZNVER1-NEXT: retq # sched: [1:0.50]
6868 %1 = shufflevector <16 x i16> %a0, <16 x i16> %a1, <16 x i32> <i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
6869 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
6870 %3 = shufflevector <16 x i16> %1, <16 x i16> %2, <16 x i32> <i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
6874 define <32 x i8> @test_punpcklbw(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
6875 ; GENERIC-LABEL: test_punpcklbw:
6877 ; GENERIC-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] sched: [1:1.00]
6878 ; GENERIC-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[16],mem[16],ymm0[17],mem[17],ymm0[18],mem[18],ymm0[19],mem[19],ymm0[20],mem[20],ymm0[21],mem[21],ymm0[22],mem[22],ymm0[23],mem[23] sched: [5:1.00]
6879 ; GENERIC-NEXT: retq # sched: [1:1.00]
6881 ; HASWELL-LABEL: test_punpcklbw:
6883 ; HASWELL-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] sched: [1:1.00]
6884 ; HASWELL-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[16],mem[16],ymm0[17],mem[17],ymm0[18],mem[18],ymm0[19],mem[19],ymm0[20],mem[20],ymm0[21],mem[21],ymm0[22],mem[22],ymm0[23],mem[23] sched: [1:1.00]
6885 ; HASWELL-NEXT: retq # sched: [2:1.00]
6887 ; BROADWELL-LABEL: test_punpcklbw:
6888 ; BROADWELL: # BB#0:
6889 ; BROADWELL-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] sched: [1:1.00]
6890 ; BROADWELL-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[16],mem[16],ymm0[17],mem[17],ymm0[18],mem[18],ymm0[19],mem[19],ymm0[20],mem[20],ymm0[21],mem[21],ymm0[22],mem[22],ymm0[23],mem[23] sched: [7:1.00]
6891 ; BROADWELL-NEXT: retq # sched: [7:1.00]
6893 ; SKYLAKE-LABEL: test_punpcklbw:
6895 ; SKYLAKE-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] sched: [1:1.00]
6896 ; SKYLAKE-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[16],mem[16],ymm0[17],mem[17],ymm0[18],mem[18],ymm0[19],mem[19],ymm0[20],mem[20],ymm0[21],mem[21],ymm0[22],mem[22],ymm0[23],mem[23] sched: [8:1.00]
6897 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
6899 ; SKX-LABEL: test_punpcklbw:
6901 ; SKX-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] sched: [1:1.00]
6902 ; SKX-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[16],mem[16],ymm0[17],mem[17],ymm0[18],mem[18],ymm0[19],mem[19],ymm0[20],mem[20],ymm0[21],mem[21],ymm0[22],mem[22],ymm0[23],mem[23] sched: [8:1.00]
6903 ; SKX-NEXT: retq # sched: [7:1.00]
6905 ; ZNVER1-LABEL: test_punpcklbw:
6907 ; ZNVER1-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] sched: [1:0.25]
6908 ; ZNVER1-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[16],mem[16],ymm0[17],mem[17],ymm0[18],mem[18],ymm0[19],mem[19],ymm0[20],mem[20],ymm0[21],mem[21],ymm0[22],mem[22],ymm0[23],mem[23] sched: [8:0.50]
6909 ; ZNVER1-NEXT: retq # sched: [1:0.50]
6910 %1 = shufflevector <32 x i8> %a0, <32 x i8> %a1, <32 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 16, i32 48, i32 17, i32 49, i32 18, i32 50, i32 19, i32 51, i32 20, i32 52, i32 21, i32 53, i32 22, i32 54, i32 23, i32 55>
6911 %2 = load <32 x i8>, <32 x i8> *%a2, align 32
6912 %3 = shufflevector <32 x i8> %1, <32 x i8> %2, <32 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 16, i32 48, i32 17, i32 49, i32 18, i32 50, i32 19, i32 51, i32 20, i32 52, i32 21, i32 53, i32 22, i32 54, i32 23, i32 55>
6916 define <8 x i32> @test_punpckldq(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
6917 ; GENERIC-LABEL: test_punpckldq:
6919 ; GENERIC-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00]
6920 ; GENERIC-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [5:1.00]
6921 ; GENERIC-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [3:1.00]
6922 ; GENERIC-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
6923 ; GENERIC-NEXT: retq # sched: [1:1.00]
6925 ; HASWELL-LABEL: test_punpckldq:
6927 ; HASWELL-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00]
6928 ; HASWELL-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [1:1.00]
6929 ; HASWELL-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.50]
6930 ; HASWELL-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6931 ; HASWELL-NEXT: retq # sched: [2:1.00]
6933 ; BROADWELL-LABEL: test_punpckldq:
6934 ; BROADWELL: # BB#0:
6935 ; BROADWELL-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00]
6936 ; BROADWELL-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [7:1.00]
6937 ; BROADWELL-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.50]
6938 ; BROADWELL-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
6939 ; BROADWELL-NEXT: retq # sched: [7:1.00]
6941 ; SKYLAKE-LABEL: test_punpckldq:
6943 ; SKYLAKE-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00]
6944 ; SKYLAKE-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00]
6945 ; SKYLAKE-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.50]
6946 ; SKYLAKE-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
6947 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
6949 ; SKX-LABEL: test_punpckldq:
6951 ; SKX-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00]
6952 ; SKX-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00]
6953 ; SKX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.50]
6954 ; SKX-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
6955 ; SKX-NEXT: retq # sched: [7:1.00]
6957 ; ZNVER1-LABEL: test_punpckldq:
6959 ; ZNVER1-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:0.25]
6960 ; ZNVER1-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:0.50]
6961 ; ZNVER1-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.25]
6962 ; ZNVER1-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
6963 ; ZNVER1-NEXT: retq # sched: [1:0.50]
6964 %1 = shufflevector <8 x i32> %a0, <8 x i32> %a1, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
6965 %2 = load <8 x i32>, <8 x i32> *%a2, align 32
6966 %3 = shufflevector <8 x i32> %1, <8 x i32> %2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
6967 %4 = add <8 x i32> %3, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
6971 define <4 x i64> @test_punpcklqdq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
6972 ; GENERIC-LABEL: test_punpcklqdq:
6974 ; GENERIC-NEXT: vpunpcklqdq {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00]
6975 ; GENERIC-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2] sched: [5:1.00]
6976 ; GENERIC-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
6977 ; GENERIC-NEXT: retq # sched: [1:1.00]
6979 ; HASWELL-LABEL: test_punpcklqdq:
6981 ; HASWELL-NEXT: vpunpcklqdq {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00]
6982 ; HASWELL-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2] sched: [1:1.00]
6983 ; HASWELL-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
6984 ; HASWELL-NEXT: retq # sched: [2:1.00]
6986 ; BROADWELL-LABEL: test_punpcklqdq:
6987 ; BROADWELL: # BB#0:
6988 ; BROADWELL-NEXT: vpunpcklqdq {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00]
6989 ; BROADWELL-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2] sched: [7:1.00]
6990 ; BROADWELL-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
6991 ; BROADWELL-NEXT: retq # sched: [7:1.00]
6993 ; SKYLAKE-LABEL: test_punpcklqdq:
6995 ; SKYLAKE-NEXT: vpunpcklqdq {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00]
6996 ; SKYLAKE-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00]
6997 ; SKYLAKE-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
6998 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
7000 ; SKX-LABEL: test_punpcklqdq:
7002 ; SKX-NEXT: vpunpcklqdq {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00]
7003 ; SKX-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00]
7004 ; SKX-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
7005 ; SKX-NEXT: retq # sched: [7:1.00]
7007 ; ZNVER1-LABEL: test_punpcklqdq:
7009 ; ZNVER1-NEXT: vpunpcklqdq {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:0.25]
7010 ; ZNVER1-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:0.50]
7011 ; ZNVER1-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.25]
7012 ; ZNVER1-NEXT: retq # sched: [1:0.50]
7013 %1 = shufflevector <4 x i64> %a0, <4 x i64> %a1, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
7014 %2 = load <4 x i64>, <4 x i64> *%a2, align 32
7015 %3 = shufflevector <4 x i64> %a0, <4 x i64> %2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
7016 %4 = add <4 x i64> %1, %3
7020 define <16 x i16> @test_punpcklwd(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
7021 ; GENERIC-LABEL: test_punpcklwd:
7023 ; GENERIC-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] sched: [1:1.00]
7024 ; GENERIC-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11] sched: [5:1.00]
7025 ; GENERIC-NEXT: retq # sched: [1:1.00]
7027 ; HASWELL-LABEL: test_punpcklwd:
7029 ; HASWELL-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] sched: [1:1.00]
7030 ; HASWELL-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11] sched: [1:1.00]
7031 ; HASWELL-NEXT: retq # sched: [2:1.00]
7033 ; BROADWELL-LABEL: test_punpcklwd:
7034 ; BROADWELL: # BB#0:
7035 ; BROADWELL-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] sched: [1:1.00]
7036 ; BROADWELL-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11] sched: [7:1.00]
7037 ; BROADWELL-NEXT: retq # sched: [7:1.00]
7039 ; SKYLAKE-LABEL: test_punpcklwd:
7041 ; SKYLAKE-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] sched: [1:1.00]
7042 ; SKYLAKE-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11] sched: [8:1.00]
7043 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
7045 ; SKX-LABEL: test_punpcklwd:
7047 ; SKX-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] sched: [1:1.00]
7048 ; SKX-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11] sched: [8:1.00]
7049 ; SKX-NEXT: retq # sched: [7:1.00]
7051 ; ZNVER1-LABEL: test_punpcklwd:
7053 ; ZNVER1-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] sched: [1:0.25]
7054 ; ZNVER1-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11] sched: [8:0.50]
7055 ; ZNVER1-NEXT: retq # sched: [1:0.50]
7056 %1 = shufflevector <16 x i16> %a0, <16 x i16> %a1, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27>
7057 %2 = load <16 x i16>, <16 x i16> *%a2, align 32
7058 %3 = shufflevector <16 x i16> %1, <16 x i16> %2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27>
7062 define <4 x i64> @test_pxor(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
7063 ; GENERIC-LABEL: test_pxor:
7065 ; GENERIC-NEXT: vpxor %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
7066 ; GENERIC-NEXT: vpxor (%rdi), %ymm0, %ymm0 # sched: [5:1.00]
7067 ; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
7068 ; GENERIC-NEXT: retq # sched: [1:1.00]
7070 ; HASWELL-LABEL: test_pxor:
7072 ; HASWELL-NEXT: vpxor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
7073 ; HASWELL-NEXT: vpxor (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
7074 ; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
7075 ; HASWELL-NEXT: retq # sched: [2:1.00]
7077 ; BROADWELL-LABEL: test_pxor:
7078 ; BROADWELL: # BB#0:
7079 ; BROADWELL-NEXT: vpxor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
7080 ; BROADWELL-NEXT: vpxor (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
7081 ; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
7082 ; BROADWELL-NEXT: retq # sched: [7:1.00]
7084 ; SKYLAKE-LABEL: test_pxor:
7086 ; SKYLAKE-NEXT: vpxor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
7087 ; SKYLAKE-NEXT: vpxor (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
7088 ; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
7089 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
7091 ; SKX-LABEL: test_pxor:
7093 ; SKX-NEXT: vpxor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
7094 ; SKX-NEXT: vpxor (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
7095 ; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
7096 ; SKX-NEXT: retq # sched: [7:1.00]
7098 ; ZNVER1-LABEL: test_pxor:
7100 ; ZNVER1-NEXT: vpxor %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
7101 ; ZNVER1-NEXT: vpxor (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
7102 ; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
7103 ; ZNVER1-NEXT: retq # sched: [1:0.50]
7104 %1 = xor <4 x i64> %a0, %a1
7105 %2 = load <4 x i64>, <4 x i64> *%a2, align 32
7106 %3 = xor <4 x i64> %1, %2
7107 %4 = add <4 x i64> %3, %a1