1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE
8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
9 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
10 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
12 define <4 x double> @test_addpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) {
13 ; GENERIC-LABEL: test_addpd:
15 ; GENERIC-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
16 ; GENERIC-NEXT: vaddpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
17 ; GENERIC-NEXT: retq # sched: [1:1.00]
19 ; SANDY-LABEL: test_addpd:
21 ; SANDY-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
22 ; SANDY-NEXT: vaddpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
23 ; SANDY-NEXT: retq # sched: [1:1.00]
25 ; HASWELL-LABEL: test_addpd:
27 ; HASWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
28 ; HASWELL-NEXT: vaddpd (%rdi), %ymm0, %ymm0 # sched: [3:1.00]
29 ; HASWELL-NEXT: retq # sched: [2:1.00]
31 ; BROADWELL-LABEL: test_addpd:
33 ; BROADWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
34 ; BROADWELL-NEXT: vaddpd (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
35 ; BROADWELL-NEXT: retq # sched: [7:1.00]
37 ; SKYLAKE-LABEL: test_addpd:
39 ; SKYLAKE-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
40 ; SKYLAKE-NEXT: vaddpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
41 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
43 ; SKX-LABEL: test_addpd:
45 ; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
46 ; SKX-NEXT: vaddpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
47 ; SKX-NEXT: retq # sched: [7:1.00]
49 ; BTVER2-LABEL: test_addpd:
51 ; BTVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
52 ; BTVER2-NEXT: vaddpd (%rdi), %ymm0, %ymm0 # sched: [8:2.00]
53 ; BTVER2-NEXT: retq # sched: [4:1.00]
55 ; ZNVER1-LABEL: test_addpd:
57 ; ZNVER1-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
58 ; ZNVER1-NEXT: vaddpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
59 ; ZNVER1-NEXT: retq # sched: [1:0.50]
60 %1 = fadd <4 x double> %a0, %a1
61 %2 = load <4 x double>, <4 x double> *%a2, align 32
62 %3 = fadd <4 x double> %1, %2
66 define <8 x float> @test_addps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) {
67 ; GENERIC-LABEL: test_addps:
69 ; GENERIC-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
70 ; GENERIC-NEXT: vaddps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
71 ; GENERIC-NEXT: retq # sched: [1:1.00]
73 ; SANDY-LABEL: test_addps:
75 ; SANDY-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
76 ; SANDY-NEXT: vaddps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
77 ; SANDY-NEXT: retq # sched: [1:1.00]
79 ; HASWELL-LABEL: test_addps:
81 ; HASWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
82 ; HASWELL-NEXT: vaddps (%rdi), %ymm0, %ymm0 # sched: [3:1.00]
83 ; HASWELL-NEXT: retq # sched: [2:1.00]
85 ; BROADWELL-LABEL: test_addps:
87 ; BROADWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
88 ; BROADWELL-NEXT: vaddps (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
89 ; BROADWELL-NEXT: retq # sched: [7:1.00]
91 ; SKYLAKE-LABEL: test_addps:
93 ; SKYLAKE-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
94 ; SKYLAKE-NEXT: vaddps (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
95 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
97 ; SKX-LABEL: test_addps:
99 ; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
100 ; SKX-NEXT: vaddps (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
101 ; SKX-NEXT: retq # sched: [7:1.00]
103 ; BTVER2-LABEL: test_addps:
105 ; BTVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
106 ; BTVER2-NEXT: vaddps (%rdi), %ymm0, %ymm0 # sched: [8:2.00]
107 ; BTVER2-NEXT: retq # sched: [4:1.00]
109 ; ZNVER1-LABEL: test_addps:
111 ; ZNVER1-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
112 ; ZNVER1-NEXT: vaddps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
113 ; ZNVER1-NEXT: retq # sched: [1:0.50]
114 %1 = fadd <8 x float> %a0, %a1
115 %2 = load <8 x float>, <8 x float> *%a2, align 32
116 %3 = fadd <8 x float> %1, %2
120 define <4 x double> @test_addsubpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) {
121 ; GENERIC-LABEL: test_addsubpd:
123 ; GENERIC-NEXT: vaddsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
124 ; GENERIC-NEXT: vaddsubpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
125 ; GENERIC-NEXT: retq # sched: [1:1.00]
127 ; SANDY-LABEL: test_addsubpd:
129 ; SANDY-NEXT: vaddsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
130 ; SANDY-NEXT: vaddsubpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
131 ; SANDY-NEXT: retq # sched: [1:1.00]
133 ; HASWELL-LABEL: test_addsubpd:
135 ; HASWELL-NEXT: vaddsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
136 ; HASWELL-NEXT: vaddsubpd (%rdi), %ymm0, %ymm0 # sched: [3:1.00]
137 ; HASWELL-NEXT: retq # sched: [2:1.00]
139 ; BROADWELL-LABEL: test_addsubpd:
141 ; BROADWELL-NEXT: vaddsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
142 ; BROADWELL-NEXT: vaddsubpd (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
143 ; BROADWELL-NEXT: retq # sched: [7:1.00]
145 ; SKYLAKE-LABEL: test_addsubpd:
147 ; SKYLAKE-NEXT: vaddsubpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
148 ; SKYLAKE-NEXT: vaddsubpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
149 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
151 ; SKX-LABEL: test_addsubpd:
153 ; SKX-NEXT: vaddsubpd %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
154 ; SKX-NEXT: vaddsubpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
155 ; SKX-NEXT: retq # sched: [7:1.00]
157 ; BTVER2-LABEL: test_addsubpd:
159 ; BTVER2-NEXT: vaddsubpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
160 ; BTVER2-NEXT: vaddsubpd (%rdi), %ymm0, %ymm0 # sched: [8:2.00]
161 ; BTVER2-NEXT: retq # sched: [4:1.00]
163 ; ZNVER1-LABEL: test_addsubpd:
165 ; ZNVER1-NEXT: vaddsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
166 ; ZNVER1-NEXT: vaddsubpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
167 ; ZNVER1-NEXT: retq # sched: [1:0.50]
168 %1 = call <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double> %a0, <4 x double> %a1)
169 %2 = load <4 x double>, <4 x double> *%a2, align 32
170 %3 = call <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double> %1, <4 x double> %2)
173 declare <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double>, <4 x double>) nounwind readnone
175 define <8 x float> @test_addsubps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) {
176 ; GENERIC-LABEL: test_addsubps:
178 ; GENERIC-NEXT: vaddsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
179 ; GENERIC-NEXT: vaddsubps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
180 ; GENERIC-NEXT: retq # sched: [1:1.00]
182 ; SANDY-LABEL: test_addsubps:
184 ; SANDY-NEXT: vaddsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
185 ; SANDY-NEXT: vaddsubps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
186 ; SANDY-NEXT: retq # sched: [1:1.00]
188 ; HASWELL-LABEL: test_addsubps:
190 ; HASWELL-NEXT: vaddsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
191 ; HASWELL-NEXT: vaddsubps (%rdi), %ymm0, %ymm0 # sched: [3:1.00]
192 ; HASWELL-NEXT: retq # sched: [2:1.00]
194 ; BROADWELL-LABEL: test_addsubps:
196 ; BROADWELL-NEXT: vaddsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
197 ; BROADWELL-NEXT: vaddsubps (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
198 ; BROADWELL-NEXT: retq # sched: [7:1.00]
200 ; SKYLAKE-LABEL: test_addsubps:
202 ; SKYLAKE-NEXT: vaddsubps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
203 ; SKYLAKE-NEXT: vaddsubps (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
204 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
206 ; SKX-LABEL: test_addsubps:
208 ; SKX-NEXT: vaddsubps %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
209 ; SKX-NEXT: vaddsubps (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
210 ; SKX-NEXT: retq # sched: [7:1.00]
212 ; BTVER2-LABEL: test_addsubps:
214 ; BTVER2-NEXT: vaddsubps %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
215 ; BTVER2-NEXT: vaddsubps (%rdi), %ymm0, %ymm0 # sched: [8:2.00]
216 ; BTVER2-NEXT: retq # sched: [4:1.00]
218 ; ZNVER1-LABEL: test_addsubps:
220 ; ZNVER1-NEXT: vaddsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
221 ; ZNVER1-NEXT: vaddsubps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
222 ; ZNVER1-NEXT: retq # sched: [1:0.50]
223 %1 = call <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float> %a0, <8 x float> %a1)
224 %2 = load <8 x float>, <8 x float> *%a2, align 32
225 %3 = call <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float> %1, <8 x float> %2)
228 declare <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float>, <8 x float>) nounwind readnone
230 define <4 x double> @test_andnotpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) {
231 ; GENERIC-LABEL: test_andnotpd:
233 ; GENERIC-NEXT: vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
234 ; GENERIC-NEXT: vandnpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
235 ; GENERIC-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
236 ; GENERIC-NEXT: retq # sched: [1:1.00]
238 ; SANDY-LABEL: test_andnotpd:
240 ; SANDY-NEXT: vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
241 ; SANDY-NEXT: vandnpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
242 ; SANDY-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
243 ; SANDY-NEXT: retq # sched: [1:1.00]
245 ; HASWELL-LABEL: test_andnotpd:
247 ; HASWELL-NEXT: vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
248 ; HASWELL-NEXT: vandnpd (%rdi), %ymm0, %ymm0 # sched: [1:1.00]
249 ; HASWELL-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
250 ; HASWELL-NEXT: retq # sched: [2:1.00]
252 ; BROADWELL-LABEL: test_andnotpd:
254 ; BROADWELL-NEXT: vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
255 ; BROADWELL-NEXT: vandnpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
256 ; BROADWELL-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
257 ; BROADWELL-NEXT: retq # sched: [7:1.00]
259 ; SKYLAKE-LABEL: test_andnotpd:
261 ; SKYLAKE-NEXT: vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
262 ; SKYLAKE-NEXT: vandnpd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
263 ; SKYLAKE-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
264 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
266 ; SKX-LABEL: test_andnotpd:
268 ; SKX-NEXT: vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
269 ; SKX-NEXT: vandnpd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
270 ; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.33]
271 ; SKX-NEXT: retq # sched: [7:1.00]
273 ; BTVER2-LABEL: test_andnotpd:
275 ; BTVER2-NEXT: vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
276 ; BTVER2-NEXT: vandnpd (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
277 ; BTVER2-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:2.00]
278 ; BTVER2-NEXT: retq # sched: [4:1.00]
280 ; ZNVER1-LABEL: test_andnotpd:
282 ; ZNVER1-NEXT: vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
283 ; ZNVER1-NEXT: vandnpd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
284 ; ZNVER1-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
285 ; ZNVER1-NEXT: retq # sched: [1:0.50]
286 %1 = bitcast <4 x double> %a0 to <4 x i64>
287 %2 = bitcast <4 x double> %a1 to <4 x i64>
288 %3 = xor <4 x i64> %1, <i64 -1, i64 -1, i64 -1, i64 -1>
289 %4 = and <4 x i64> %3, %2
290 %5 = load <4 x double>, <4 x double> *%a2, align 32
291 %6 = bitcast <4 x double> %5 to <4 x i64>
292 %7 = xor <4 x i64> %4, <i64 -1, i64 -1, i64 -1, i64 -1>
293 %8 = and <4 x i64> %6, %7
294 %9 = bitcast <4 x i64> %8 to <4 x double>
295 %10 = fadd <4 x double> %a1, %9
299 define <8 x float> @test_andnotps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) {
300 ; GENERIC-LABEL: test_andnotps:
302 ; GENERIC-NEXT: vandnps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
303 ; GENERIC-NEXT: vandnps (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
304 ; GENERIC-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
305 ; GENERIC-NEXT: retq # sched: [1:1.00]
307 ; SANDY-LABEL: test_andnotps:
309 ; SANDY-NEXT: vandnps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
310 ; SANDY-NEXT: vandnps (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
311 ; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
312 ; SANDY-NEXT: retq # sched: [1:1.00]
314 ; HASWELL-LABEL: test_andnotps:
316 ; HASWELL-NEXT: vandnps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
317 ; HASWELL-NEXT: vandnps (%rdi), %ymm0, %ymm0 # sched: [1:1.00]
318 ; HASWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
319 ; HASWELL-NEXT: retq # sched: [2:1.00]
321 ; BROADWELL-LABEL: test_andnotps:
323 ; BROADWELL-NEXT: vandnps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
324 ; BROADWELL-NEXT: vandnps (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
325 ; BROADWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
326 ; BROADWELL-NEXT: retq # sched: [7:1.00]
328 ; SKYLAKE-LABEL: test_andnotps:
330 ; SKYLAKE-NEXT: vandnps %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
331 ; SKYLAKE-NEXT: vandnps (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
332 ; SKYLAKE-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
333 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
335 ; SKX-LABEL: test_andnotps:
337 ; SKX-NEXT: vandnps %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
338 ; SKX-NEXT: vandnps (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
339 ; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.33]
340 ; SKX-NEXT: retq # sched: [7:1.00]
342 ; BTVER2-LABEL: test_andnotps:
344 ; BTVER2-NEXT: vandnps %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
345 ; BTVER2-NEXT: vandnps (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
346 ; BTVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:2.00]
347 ; BTVER2-NEXT: retq # sched: [4:1.00]
349 ; ZNVER1-LABEL: test_andnotps:
351 ; ZNVER1-NEXT: vandnps %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
352 ; ZNVER1-NEXT: vandnps (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
353 ; ZNVER1-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
354 ; ZNVER1-NEXT: retq # sched: [1:0.50]
355 %1 = bitcast <8 x float> %a0 to <4 x i64>
356 %2 = bitcast <8 x float> %a1 to <4 x i64>
357 %3 = xor <4 x i64> %1, <i64 -1, i64 -1, i64 -1, i64 -1>
358 %4 = and <4 x i64> %3, %2
359 %5 = load <8 x float>, <8 x float> *%a2, align 32
360 %6 = bitcast <8 x float> %5 to <4 x i64>
361 %7 = xor <4 x i64> %4, <i64 -1, i64 -1, i64 -1, i64 -1>
362 %8 = and <4 x i64> %6, %7
363 %9 = bitcast <4 x i64> %8 to <8 x float>
364 %10 = fadd <8 x float> %a1, %9
368 define <4 x double> @test_andpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) {
369 ; GENERIC-LABEL: test_andpd:
371 ; GENERIC-NEXT: vandpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
372 ; GENERIC-NEXT: vandpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
373 ; GENERIC-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
374 ; GENERIC-NEXT: retq # sched: [1:1.00]
376 ; SANDY-LABEL: test_andpd:
378 ; SANDY-NEXT: vandpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
379 ; SANDY-NEXT: vandpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
380 ; SANDY-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
381 ; SANDY-NEXT: retq # sched: [1:1.00]
383 ; HASWELL-LABEL: test_andpd:
385 ; HASWELL-NEXT: vandpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
386 ; HASWELL-NEXT: vandpd (%rdi), %ymm0, %ymm0 # sched: [1:1.00]
387 ; HASWELL-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
388 ; HASWELL-NEXT: retq # sched: [2:1.00]
390 ; BROADWELL-LABEL: test_andpd:
392 ; BROADWELL-NEXT: vandpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
393 ; BROADWELL-NEXT: vandpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
394 ; BROADWELL-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
395 ; BROADWELL-NEXT: retq # sched: [7:1.00]
397 ; SKYLAKE-LABEL: test_andpd:
399 ; SKYLAKE-NEXT: vandpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
400 ; SKYLAKE-NEXT: vandpd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
401 ; SKYLAKE-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
402 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
404 ; SKX-LABEL: test_andpd:
406 ; SKX-NEXT: vandpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
407 ; SKX-NEXT: vandpd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
408 ; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.33]
409 ; SKX-NEXT: retq # sched: [7:1.00]
411 ; BTVER2-LABEL: test_andpd:
413 ; BTVER2-NEXT: vandpd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
414 ; BTVER2-NEXT: vandpd (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
415 ; BTVER2-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:2.00]
416 ; BTVER2-NEXT: retq # sched: [4:1.00]
418 ; ZNVER1-LABEL: test_andpd:
420 ; ZNVER1-NEXT: vandpd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
421 ; ZNVER1-NEXT: vandpd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
422 ; ZNVER1-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
423 ; ZNVER1-NEXT: retq # sched: [1:0.50]
424 %1 = bitcast <4 x double> %a0 to <4 x i64>
425 %2 = bitcast <4 x double> %a1 to <4 x i64>
426 %3 = and <4 x i64> %1, %2
427 %4 = load <4 x double>, <4 x double> *%a2, align 32
428 %5 = bitcast <4 x double> %4 to <4 x i64>
429 %6 = and <4 x i64> %3, %5
430 %7 = bitcast <4 x i64> %6 to <4 x double>
431 %8 = fadd <4 x double> %a1, %7
435 define <8 x float> @test_andps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) {
436 ; GENERIC-LABEL: test_andps:
438 ; GENERIC-NEXT: vandps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
439 ; GENERIC-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
440 ; GENERIC-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
441 ; GENERIC-NEXT: retq # sched: [1:1.00]
443 ; SANDY-LABEL: test_andps:
445 ; SANDY-NEXT: vandps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
446 ; SANDY-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
447 ; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
448 ; SANDY-NEXT: retq # sched: [1:1.00]
450 ; HASWELL-LABEL: test_andps:
452 ; HASWELL-NEXT: vandps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
453 ; HASWELL-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [1:1.00]
454 ; HASWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
455 ; HASWELL-NEXT: retq # sched: [2:1.00]
457 ; BROADWELL-LABEL: test_andps:
459 ; BROADWELL-NEXT: vandps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
460 ; BROADWELL-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
461 ; BROADWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
462 ; BROADWELL-NEXT: retq # sched: [7:1.00]
464 ; SKYLAKE-LABEL: test_andps:
466 ; SKYLAKE-NEXT: vandps %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
467 ; SKYLAKE-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
468 ; SKYLAKE-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
469 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
471 ; SKX-LABEL: test_andps:
473 ; SKX-NEXT: vandps %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
474 ; SKX-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
475 ; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.33]
476 ; SKX-NEXT: retq # sched: [7:1.00]
478 ; BTVER2-LABEL: test_andps:
480 ; BTVER2-NEXT: vandps %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
481 ; BTVER2-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
482 ; BTVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:2.00]
483 ; BTVER2-NEXT: retq # sched: [4:1.00]
485 ; ZNVER1-LABEL: test_andps:
487 ; ZNVER1-NEXT: vandps %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
488 ; ZNVER1-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
489 ; ZNVER1-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
490 ; ZNVER1-NEXT: retq # sched: [1:0.50]
491 %1 = bitcast <8 x float> %a0 to <4 x i64>
492 %2 = bitcast <8 x float> %a1 to <4 x i64>
493 %3 = and <4 x i64> %1, %2
494 %4 = load <8 x float>, <8 x float> *%a2, align 32
495 %5 = bitcast <8 x float> %4 to <4 x i64>
496 %6 = and <4 x i64> %3, %5
497 %7 = bitcast <4 x i64> %6 to <8 x float>
498 %8 = fadd <8 x float> %a1, %7
502 define <4 x double> @test_blendpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) {
503 ; GENERIC-LABEL: test_blendpd:
505 ; GENERIC-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:0.50]
506 ; GENERIC-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
507 ; GENERIC-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],mem[1,2],ymm0[3] sched: [8:0.50]
508 ; GENERIC-NEXT: retq # sched: [1:1.00]
510 ; SANDY-LABEL: test_blendpd:
512 ; SANDY-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:0.50]
513 ; SANDY-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
514 ; SANDY-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],mem[1,2],ymm0[3] sched: [8:0.50]
515 ; SANDY-NEXT: retq # sched: [1:1.00]
517 ; HASWELL-LABEL: test_blendpd:
519 ; HASWELL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:0.33]
520 ; HASWELL-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
521 ; HASWELL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],mem[1,2],ymm0[3] sched: [1:0.50]
522 ; HASWELL-NEXT: retq # sched: [2:1.00]
524 ; BROADWELL-LABEL: test_blendpd:
526 ; BROADWELL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:0.33]
527 ; BROADWELL-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
528 ; BROADWELL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],mem[1,2],ymm0[3] sched: [7:0.50]
529 ; BROADWELL-NEXT: retq # sched: [7:1.00]
531 ; SKYLAKE-LABEL: test_blendpd:
533 ; SKYLAKE-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:0.33]
534 ; SKYLAKE-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
535 ; SKYLAKE-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],mem[1,2],ymm0[3] sched: [8:0.50]
536 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
538 ; SKX-LABEL: test_blendpd:
540 ; SKX-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:0.33]
541 ; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.33]
542 ; SKX-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],mem[1,2],ymm0[3] sched: [8:0.50]
543 ; SKX-NEXT: retq # sched: [7:1.00]
545 ; BTVER2-LABEL: test_blendpd:
547 ; BTVER2-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:0.50]
548 ; BTVER2-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:2.00]
549 ; BTVER2-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],mem[1,2],ymm0[3] sched: [6:1.00]
550 ; BTVER2-NEXT: retq # sched: [4:1.00]
552 ; ZNVER1-LABEL: test_blendpd:
554 ; ZNVER1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:0.50]
555 ; ZNVER1-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
556 ; ZNVER1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],mem[1,2],ymm0[3] sched: [8:0.50]
557 ; ZNVER1-NEXT: retq # sched: [1:0.50]
558 %1 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
559 %2 = load <4 x double>, <4 x double> *%a2, align 32
560 %3 = fadd <4 x double> %a1, %1
561 %4 = shufflevector <4 x double> %3, <4 x double> %2, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
565 define <8 x float> @test_blendps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) {
566 ; GENERIC-LABEL: test_blendps:
568 ; GENERIC-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4,5,6,7] sched: [1:0.50]
569 ; GENERIC-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],mem[2],ymm0[3],mem[4,5,6],ymm0[7] sched: [8:0.50]
570 ; GENERIC-NEXT: retq # sched: [1:1.00]
572 ; SANDY-LABEL: test_blendps:
574 ; SANDY-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4,5,6,7] sched: [1:0.50]
575 ; SANDY-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],mem[2],ymm0[3],mem[4,5,6],ymm0[7] sched: [8:0.50]
576 ; SANDY-NEXT: retq # sched: [1:1.00]
578 ; HASWELL-LABEL: test_blendps:
580 ; HASWELL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4,5,6,7] sched: [1:0.33]
581 ; HASWELL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],mem[2],ymm0[3],mem[4,5,6],ymm0[7] sched: [1:0.50]
582 ; HASWELL-NEXT: retq # sched: [2:1.00]
584 ; BROADWELL-LABEL: test_blendps:
586 ; BROADWELL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4,5,6,7] sched: [1:0.33]
587 ; BROADWELL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],mem[2],ymm0[3],mem[4,5,6],ymm0[7] sched: [7:0.50]
588 ; BROADWELL-NEXT: retq # sched: [7:1.00]
590 ; SKYLAKE-LABEL: test_blendps:
592 ; SKYLAKE-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4,5,6,7] sched: [1:0.33]
593 ; SKYLAKE-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],mem[2],ymm0[3],mem[4,5,6],ymm0[7] sched: [8:0.50]
594 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
596 ; SKX-LABEL: test_blendps:
598 ; SKX-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4,5,6,7] sched: [1:0.33]
599 ; SKX-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],mem[2],ymm0[3],mem[4,5,6],ymm0[7] sched: [8:0.50]
600 ; SKX-NEXT: retq # sched: [7:1.00]
602 ; BTVER2-LABEL: test_blendps:
604 ; BTVER2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4,5,6,7] sched: [1:0.50]
605 ; BTVER2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],mem[2],ymm0[3],mem[4,5,6],ymm0[7] sched: [6:1.00]
606 ; BTVER2-NEXT: retq # sched: [4:1.00]
608 ; ZNVER1-LABEL: test_blendps:
610 ; ZNVER1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4,5,6,7] sched: [1:0.50]
611 ; ZNVER1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],mem[2],ymm0[3],mem[4,5,6],ymm0[7] sched: [8:0.50]
612 ; ZNVER1-NEXT: retq # sched: [1:0.50]
613 %1 = shufflevector <8 x float> %a0, <8 x float> %a1, <8 x i32> <i32 0, i32 9, i32 10, i32 3, i32 4, i32 5, i32 6, i32 7>
614 %2 = load <8 x float>, <8 x float> *%a2, align 32
615 %3 = shufflevector <8 x float> %1, <8 x float> %2, <8 x i32> <i32 0, i32 1, i32 10, i32 3, i32 12, i32 13, i32 14, i32 7>
619 define <4 x double> @test_blendvpd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) {
620 ; GENERIC-LABEL: test_blendvpd:
622 ; GENERIC-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:1.00]
623 ; GENERIC-NEXT: vblendvpd %ymm2, (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
624 ; GENERIC-NEXT: retq # sched: [1:1.00]
626 ; SANDY-LABEL: test_blendvpd:
628 ; SANDY-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:1.00]
629 ; SANDY-NEXT: vblendvpd %ymm2, (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
630 ; SANDY-NEXT: retq # sched: [1:1.00]
632 ; HASWELL-LABEL: test_blendvpd:
634 ; HASWELL-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:2.00]
635 ; HASWELL-NEXT: vblendvpd %ymm2, (%rdi), %ymm0, %ymm0 # sched: [2:2.00]
636 ; HASWELL-NEXT: retq # sched: [2:1.00]
638 ; BROADWELL-LABEL: test_blendvpd:
640 ; BROADWELL-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:2.00]
641 ; BROADWELL-NEXT: vblendvpd %ymm2, (%rdi), %ymm0, %ymm0 # sched: [8:2.00]
642 ; BROADWELL-NEXT: retq # sched: [7:1.00]
644 ; SKYLAKE-LABEL: test_blendvpd:
646 ; SKYLAKE-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:0.67]
647 ; SKYLAKE-NEXT: vblendvpd %ymm2, (%rdi), %ymm0, %ymm0 # sched: [9:0.67]
648 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
650 ; SKX-LABEL: test_blendvpd:
652 ; SKX-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:0.67]
653 ; SKX-NEXT: vblendvpd %ymm2, (%rdi), %ymm0, %ymm0 # sched: [9:0.67]
654 ; SKX-NEXT: retq # sched: [7:1.00]
656 ; BTVER2-LABEL: test_blendvpd:
658 ; BTVER2-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [3:3.00]
659 ; BTVER2-NEXT: vblendvpd %ymm2, (%rdi), %ymm0, %ymm0 # sched: [8:3.00]
660 ; BTVER2-NEXT: retq # sched: [4:1.00]
662 ; ZNVER1-LABEL: test_blendvpd:
664 ; ZNVER1-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
665 ; ZNVER1-NEXT: vblendvpd %ymm2, (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
666 ; ZNVER1-NEXT: retq # sched: [1:0.50]
667 %1 = call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
668 %2 = load <4 x double>, <4 x double> *%a3, align 32
669 %3 = call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %1, <4 x double> %2, <4 x double> %a2)
672 declare <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone
674 define <8 x float> @test_blendvps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) {
675 ; GENERIC-LABEL: test_blendvps:
677 ; GENERIC-NEXT: vblendvps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:1.00]
678 ; GENERIC-NEXT: vblendvps %ymm2, (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
679 ; GENERIC-NEXT: retq # sched: [1:1.00]
681 ; SANDY-LABEL: test_blendvps:
683 ; SANDY-NEXT: vblendvps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:1.00]
684 ; SANDY-NEXT: vblendvps %ymm2, (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
685 ; SANDY-NEXT: retq # sched: [1:1.00]
687 ; HASWELL-LABEL: test_blendvps:
689 ; HASWELL-NEXT: vblendvps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:2.00]
690 ; HASWELL-NEXT: vblendvps %ymm2, (%rdi), %ymm0, %ymm0 # sched: [2:2.00]
691 ; HASWELL-NEXT: retq # sched: [2:1.00]
693 ; BROADWELL-LABEL: test_blendvps:
695 ; BROADWELL-NEXT: vblendvps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:2.00]
696 ; BROADWELL-NEXT: vblendvps %ymm2, (%rdi), %ymm0, %ymm0 # sched: [8:2.00]
697 ; BROADWELL-NEXT: retq # sched: [7:1.00]
699 ; SKYLAKE-LABEL: test_blendvps:
701 ; SKYLAKE-NEXT: vblendvps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:0.67]
702 ; SKYLAKE-NEXT: vblendvps %ymm2, (%rdi), %ymm0, %ymm0 # sched: [9:0.67]
703 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
705 ; SKX-LABEL: test_blendvps:
707 ; SKX-NEXT: vblendvps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:0.67]
708 ; SKX-NEXT: vblendvps %ymm2, (%rdi), %ymm0, %ymm0 # sched: [9:0.67]
709 ; SKX-NEXT: retq # sched: [7:1.00]
711 ; BTVER2-LABEL: test_blendvps:
713 ; BTVER2-NEXT: vblendvps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [3:3.00]
714 ; BTVER2-NEXT: vblendvps %ymm2, (%rdi), %ymm0, %ymm0 # sched: [8:3.00]
715 ; BTVER2-NEXT: retq # sched: [4:1.00]
717 ; ZNVER1-LABEL: test_blendvps:
719 ; ZNVER1-NEXT: vblendvps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
720 ; ZNVER1-NEXT: vblendvps %ymm2, (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
721 ; ZNVER1-NEXT: retq # sched: [1:0.50]
722 %1 = call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
723 %2 = load <8 x float>, <8 x float> *%a3, align 32
724 %3 = call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %1, <8 x float> %2, <8 x float> %a2)
727 declare <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone
729 define <8 x float> @test_broadcastf128(<4 x float> *%a0) {
730 ; GENERIC-LABEL: test_broadcastf128:
732 ; GENERIC-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] sched: [7:1.00]
733 ; GENERIC-NEXT: retq # sched: [1:1.00]
735 ; SANDY-LABEL: test_broadcastf128:
737 ; SANDY-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] sched: [7:1.00]
738 ; SANDY-NEXT: retq # sched: [1:1.00]
740 ; HASWELL-LABEL: test_broadcastf128:
742 ; HASWELL-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] sched: [1:0.50]
743 ; HASWELL-NEXT: retq # sched: [2:1.00]
745 ; BROADWELL-LABEL: test_broadcastf128:
747 ; BROADWELL-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] sched: [6:0.50]
748 ; BROADWELL-NEXT: retq # sched: [7:1.00]
750 ; SKYLAKE-LABEL: test_broadcastf128:
752 ; SKYLAKE-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] sched: [7:0.50]
753 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
755 ; SKX-LABEL: test_broadcastf128:
757 ; SKX-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] sched: [7:0.50]
758 ; SKX-NEXT: retq # sched: [7:1.00]
760 ; BTVER2-LABEL: test_broadcastf128:
762 ; BTVER2-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] sched: [6:1.00]
763 ; BTVER2-NEXT: retq # sched: [4:1.00]
765 ; ZNVER1-LABEL: test_broadcastf128:
767 ; ZNVER1-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] sched: [8:0.50]
768 ; ZNVER1-NEXT: retq # sched: [1:0.50]
769 %1 = load <4 x float>, <4 x float> *%a0, align 32
770 %2 = shufflevector <4 x float> %1, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
774 define <4 x double> @test_broadcastsd_ymm(double *%a0) {
775 ; GENERIC-LABEL: test_broadcastsd_ymm:
777 ; GENERIC-NEXT: vbroadcastsd (%rdi), %ymm0 # sched: [7:0.50]
778 ; GENERIC-NEXT: retq # sched: [1:1.00]
780 ; SANDY-LABEL: test_broadcastsd_ymm:
782 ; SANDY-NEXT: vbroadcastsd (%rdi), %ymm0 # sched: [7:0.50]
783 ; SANDY-NEXT: retq # sched: [1:1.00]
785 ; HASWELL-LABEL: test_broadcastsd_ymm:
787 ; HASWELL-NEXT: vbroadcastsd (%rdi), %ymm0 # sched: [1:0.50]
788 ; HASWELL-NEXT: retq # sched: [2:1.00]
790 ; BROADWELL-LABEL: test_broadcastsd_ymm:
792 ; BROADWELL-NEXT: vbroadcastsd (%rdi), %ymm0 # sched: [6:0.50]
793 ; BROADWELL-NEXT: retq # sched: [7:1.00]
795 ; SKYLAKE-LABEL: test_broadcastsd_ymm:
797 ; SKYLAKE-NEXT: vbroadcastsd (%rdi), %ymm0 # sched: [7:0.50]
798 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
800 ; SKX-LABEL: test_broadcastsd_ymm:
802 ; SKX-NEXT: vbroadcastsd (%rdi), %ymm0 # sched: [7:0.50]
803 ; SKX-NEXT: retq # sched: [7:1.00]
805 ; BTVER2-LABEL: test_broadcastsd_ymm:
807 ; BTVER2-NEXT: vbroadcastsd (%rdi), %ymm0 # sched: [6:2.00]
808 ; BTVER2-NEXT: retq # sched: [4:1.00]
810 ; ZNVER1-LABEL: test_broadcastsd_ymm:
812 ; ZNVER1-NEXT: vbroadcastsd (%rdi), %ymm0 # sched: [8:0.50]
813 ; ZNVER1-NEXT: retq # sched: [1:0.50]
814 %1 = load double, double *%a0, align 8
815 %2 = insertelement <4 x double> undef, double %1, i32 0
816 %3 = shufflevector <4 x double> %2, <4 x double> undef, <4 x i32> zeroinitializer
820 define <4 x float> @test_broadcastss(float *%a0) {
821 ; GENERIC-LABEL: test_broadcastss:
823 ; GENERIC-NEXT: vbroadcastss (%rdi), %xmm0 # sched: [6:0.50]
824 ; GENERIC-NEXT: retq # sched: [1:1.00]
826 ; SANDY-LABEL: test_broadcastss:
828 ; SANDY-NEXT: vbroadcastss (%rdi), %xmm0 # sched: [6:0.50]
829 ; SANDY-NEXT: retq # sched: [1:1.00]
831 ; HASWELL-LABEL: test_broadcastss:
833 ; HASWELL-NEXT: vbroadcastss (%rdi), %xmm0 # sched: [1:0.50]
834 ; HASWELL-NEXT: retq # sched: [2:1.00]
836 ; BROADWELL-LABEL: test_broadcastss:
838 ; BROADWELL-NEXT: vbroadcastss (%rdi), %xmm0 # sched: [5:0.50]
839 ; BROADWELL-NEXT: retq # sched: [7:1.00]
841 ; SKYLAKE-LABEL: test_broadcastss:
843 ; SKYLAKE-NEXT: vbroadcastss (%rdi), %xmm0 # sched: [6:0.50]
844 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
846 ; SKX-LABEL: test_broadcastss:
848 ; SKX-NEXT: vbroadcastss (%rdi), %xmm0 # sched: [6:0.50]
849 ; SKX-NEXT: retq # sched: [7:1.00]
851 ; BTVER2-LABEL: test_broadcastss:
853 ; BTVER2-NEXT: vbroadcastss (%rdi), %xmm0 # sched: [5:1.00]
854 ; BTVER2-NEXT: retq # sched: [4:1.00]
856 ; ZNVER1-LABEL: test_broadcastss:
858 ; ZNVER1-NEXT: vbroadcastss (%rdi), %xmm0 # sched: [8:0.50]
859 ; ZNVER1-NEXT: retq # sched: [1:0.50]
860 %1 = load float, float *%a0, align 4
861 %2 = insertelement <4 x float> undef, float %1, i32 0
862 %3 = shufflevector <4 x float> %2, <4 x float> undef, <4 x i32> zeroinitializer
866 define <8 x float> @test_broadcastss_ymm(float *%a0) {
867 ; GENERIC-LABEL: test_broadcastss_ymm:
869 ; GENERIC-NEXT: vbroadcastss (%rdi), %ymm0 # sched: [7:0.50]
870 ; GENERIC-NEXT: retq # sched: [1:1.00]
872 ; SANDY-LABEL: test_broadcastss_ymm:
874 ; SANDY-NEXT: vbroadcastss (%rdi), %ymm0 # sched: [7:0.50]
875 ; SANDY-NEXT: retq # sched: [1:1.00]
877 ; HASWELL-LABEL: test_broadcastss_ymm:
879 ; HASWELL-NEXT: vbroadcastss (%rdi), %ymm0 # sched: [1:0.50]
880 ; HASWELL-NEXT: retq # sched: [2:1.00]
882 ; BROADWELL-LABEL: test_broadcastss_ymm:
884 ; BROADWELL-NEXT: vbroadcastss (%rdi), %ymm0 # sched: [6:0.50]
885 ; BROADWELL-NEXT: retq # sched: [7:1.00]
887 ; SKYLAKE-LABEL: test_broadcastss_ymm:
889 ; SKYLAKE-NEXT: vbroadcastss (%rdi), %ymm0 # sched: [7:0.50]
890 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
892 ; SKX-LABEL: test_broadcastss_ymm:
894 ; SKX-NEXT: vbroadcastss (%rdi), %ymm0 # sched: [7:0.50]
895 ; SKX-NEXT: retq # sched: [7:1.00]
897 ; BTVER2-LABEL: test_broadcastss_ymm:
899 ; BTVER2-NEXT: vbroadcastss (%rdi), %ymm0 # sched: [6:2.00]
900 ; BTVER2-NEXT: retq # sched: [4:1.00]
902 ; ZNVER1-LABEL: test_broadcastss_ymm:
904 ; ZNVER1-NEXT: vbroadcastss (%rdi), %ymm0 # sched: [8:0.50]
905 ; ZNVER1-NEXT: retq # sched: [1:0.50]
906 %1 = load float, float *%a0, align 4
907 %2 = insertelement <8 x float> undef, float %1, i32 0
908 %3 = shufflevector <8 x float> %2, <8 x float> undef, <8 x i32> zeroinitializer
912 define <4 x double> @test_cmppd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) {
913 ; GENERIC-LABEL: test_cmppd:
915 ; GENERIC-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
916 ; GENERIC-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
917 ; GENERIC-NEXT: vorpd %ymm0, %ymm1, %ymm0 # sched: [1:1.00]
918 ; GENERIC-NEXT: retq # sched: [1:1.00]
920 ; SANDY-LABEL: test_cmppd:
922 ; SANDY-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
923 ; SANDY-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
924 ; SANDY-NEXT: vorpd %ymm0, %ymm1, %ymm0 # sched: [1:1.00]
925 ; SANDY-NEXT: retq # sched: [1:1.00]
927 ; HASWELL-LABEL: test_cmppd:
929 ; HASWELL-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
930 ; HASWELL-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0 # sched: [3:1.00]
931 ; HASWELL-NEXT: vorpd %ymm0, %ymm1, %ymm0 # sched: [1:1.00]
932 ; HASWELL-NEXT: retq # sched: [2:1.00]
934 ; BROADWELL-LABEL: test_cmppd:
936 ; BROADWELL-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
937 ; BROADWELL-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
938 ; BROADWELL-NEXT: vorpd %ymm0, %ymm1, %ymm0 # sched: [1:1.00]
939 ; BROADWELL-NEXT: retq # sched: [7:1.00]
941 ; SKYLAKE-LABEL: test_cmppd:
943 ; SKYLAKE-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm1 # sched: [4:0.33]
944 ; SKYLAKE-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
945 ; SKYLAKE-NEXT: vorpd %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
946 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
948 ; SKX-LABEL: test_cmppd:
950 ; SKX-NEXT: vcmpeqpd %ymm1, %ymm0, %k0 # sched: [3:1.00]
951 ; SKX-NEXT: vcmpeqpd (%rdi), %ymm0, %k1 # sched: [10:1.00]
952 ; SKX-NEXT: vpmovm2q %k0, %ymm0
953 ; SKX-NEXT: vpmovm2q %k1, %ymm1
954 ; SKX-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
955 ; SKX-NEXT: retq # sched: [7:1.00]
957 ; BTVER2-LABEL: test_cmppd:
959 ; BTVER2-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm1 # sched: [2:2.00]
960 ; BTVER2-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0 # sched: [7:2.00]
961 ; BTVER2-NEXT: vorpd %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
962 ; BTVER2-NEXT: retq # sched: [4:1.00]
964 ; ZNVER1-LABEL: test_cmppd:
966 ; ZNVER1-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
967 ; ZNVER1-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
968 ; ZNVER1-NEXT: vorpd %ymm0, %ymm1, %ymm0 # sched: [1:0.25]
969 ; ZNVER1-NEXT: retq # sched: [1:0.50]
970 %1 = fcmp oeq <4 x double> %a0, %a1
971 %2 = load <4 x double>, <4 x double> *%a2, align 32
972 %3 = fcmp oeq <4 x double> %a0, %2
973 %4 = sext <4 x i1> %1 to <4 x i64>
974 %5 = sext <4 x i1> %3 to <4 x i64>
975 %6 = or <4 x i64> %4, %5
976 %7 = bitcast <4 x i64> %6 to <4 x double>
980 define <8 x float> @test_cmpps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) {
981 ; GENERIC-LABEL: test_cmpps:
983 ; GENERIC-NEXT: vcmpeqps %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
984 ; GENERIC-NEXT: vcmpeqps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
985 ; GENERIC-NEXT: vorps %ymm0, %ymm1, %ymm0 # sched: [1:1.00]
986 ; GENERIC-NEXT: retq # sched: [1:1.00]
988 ; SANDY-LABEL: test_cmpps:
990 ; SANDY-NEXT: vcmpeqps %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
991 ; SANDY-NEXT: vcmpeqps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
992 ; SANDY-NEXT: vorps %ymm0, %ymm1, %ymm0 # sched: [1:1.00]
993 ; SANDY-NEXT: retq # sched: [1:1.00]
995 ; HASWELL-LABEL: test_cmpps:
997 ; HASWELL-NEXT: vcmpeqps %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
998 ; HASWELL-NEXT: vcmpeqps (%rdi), %ymm0, %ymm0 # sched: [3:1.00]
999 ; HASWELL-NEXT: vorps %ymm0, %ymm1, %ymm0 # sched: [1:1.00]
1000 ; HASWELL-NEXT: retq # sched: [2:1.00]
1002 ; BROADWELL-LABEL: test_cmpps:
1003 ; BROADWELL: # BB#0:
1004 ; BROADWELL-NEXT: vcmpeqps %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
1005 ; BROADWELL-NEXT: vcmpeqps (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
1006 ; BROADWELL-NEXT: vorps %ymm0, %ymm1, %ymm0 # sched: [1:1.00]
1007 ; BROADWELL-NEXT: retq # sched: [7:1.00]
1009 ; SKYLAKE-LABEL: test_cmpps:
1011 ; SKYLAKE-NEXT: vcmpeqps %ymm1, %ymm0, %ymm1 # sched: [4:0.33]
1012 ; SKYLAKE-NEXT: vcmpeqps (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
1013 ; SKYLAKE-NEXT: vorps %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
1014 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
1016 ; SKX-LABEL: test_cmpps:
1018 ; SKX-NEXT: vcmpeqps %ymm1, %ymm0, %k0 # sched: [3:1.00]
1019 ; SKX-NEXT: vcmpeqps (%rdi), %ymm0, %k1 # sched: [10:1.00]
1020 ; SKX-NEXT: vpmovm2d %k0, %ymm0
1021 ; SKX-NEXT: vpmovm2d %k1, %ymm1
1022 ; SKX-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1023 ; SKX-NEXT: retq # sched: [7:1.00]
1025 ; BTVER2-LABEL: test_cmpps:
1027 ; BTVER2-NEXT: vcmpeqps %ymm1, %ymm0, %ymm1 # sched: [2:2.00]
1028 ; BTVER2-NEXT: vcmpeqps (%rdi), %ymm0, %ymm0 # sched: [7:2.00]
1029 ; BTVER2-NEXT: vorps %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
1030 ; BTVER2-NEXT: retq # sched: [4:1.00]
1032 ; ZNVER1-LABEL: test_cmpps:
1034 ; ZNVER1-NEXT: vcmpeqps %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
1035 ; ZNVER1-NEXT: vcmpeqps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
1036 ; ZNVER1-NEXT: vorps %ymm0, %ymm1, %ymm0 # sched: [1:0.25]
1037 ; ZNVER1-NEXT: retq # sched: [1:0.50]
1038 %1 = fcmp oeq <8 x float> %a0, %a1
1039 %2 = load <8 x float>, <8 x float> *%a2, align 32
1040 %3 = fcmp oeq <8 x float> %a0, %2
1041 %4 = sext <8 x i1> %1 to <8 x i32>
1042 %5 = sext <8 x i1> %3 to <8 x i32>
1043 %6 = or <8 x i32> %4, %5
1044 %7 = bitcast <8 x i32> %6 to <8 x float>
1048 define <4 x double> @test_cvtdq2pd(<4 x i32> %a0, <4 x i32> *%a1) {
1049 ; GENERIC-LABEL: test_cvtdq2pd:
1051 ; GENERIC-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [4:1.00]
1052 ; GENERIC-NEXT: vcvtdq2pd (%rdi), %ymm1 # sched: [10:1.00]
1053 ; GENERIC-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
1054 ; GENERIC-NEXT: retq # sched: [1:1.00]
1056 ; SANDY-LABEL: test_cvtdq2pd:
1058 ; SANDY-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [4:1.00]
1059 ; SANDY-NEXT: vcvtdq2pd (%rdi), %ymm1 # sched: [10:1.00]
1060 ; SANDY-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
1061 ; SANDY-NEXT: retq # sched: [1:1.00]
1063 ; HASWELL-LABEL: test_cvtdq2pd:
1065 ; HASWELL-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [6:1.00]
1066 ; HASWELL-NEXT: vcvtdq2pd (%rdi), %ymm1 # sched: [6:1.00]
1067 ; HASWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
1068 ; HASWELL-NEXT: retq # sched: [2:1.00]
1070 ; BROADWELL-LABEL: test_cvtdq2pd:
1071 ; BROADWELL: # BB#0:
1072 ; BROADWELL-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [6:1.00]
1073 ; BROADWELL-NEXT: vcvtdq2pd (%rdi), %ymm1 # sched: [11:1.00]
1074 ; BROADWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
1075 ; BROADWELL-NEXT: retq # sched: [7:1.00]
1077 ; SKYLAKE-LABEL: test_cvtdq2pd:
1079 ; SKYLAKE-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [7:1.00]
1080 ; SKYLAKE-NEXT: vcvtdq2pd (%rdi), %ymm1 # sched: [13:1.00]
1081 ; SKYLAKE-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
1082 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
1084 ; SKX-LABEL: test_cvtdq2pd:
1086 ; SKX-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [7:1.00]
1087 ; SKX-NEXT: vcvtdq2pd (%rdi), %ymm1 # sched: [13:1.00]
1088 ; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
1089 ; SKX-NEXT: retq # sched: [7:1.00]
1091 ; BTVER2-LABEL: test_cvtdq2pd:
1093 ; BTVER2-NEXT: vcvtdq2pd (%rdi), %ymm1 # sched: [8:2.00]
1094 ; BTVER2-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [3:2.00]
1095 ; BTVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
1096 ; BTVER2-NEXT: retq # sched: [4:1.00]
1098 ; ZNVER1-LABEL: test_cvtdq2pd:
1100 ; ZNVER1-NEXT: vcvtdq2pd (%rdi), %ymm1 # sched: [12:1.00]
1101 ; ZNVER1-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [5:1.00]
1102 ; ZNVER1-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
1103 ; ZNVER1-NEXT: retq # sched: [1:0.50]
1104 %1 = sitofp <4 x i32> %a0 to <4 x double>
1105 %2 = load <4 x i32>, <4 x i32> *%a1, align 16
1106 %3 = sitofp <4 x i32> %2 to <4 x double>
1107 %4 = fadd <4 x double> %1, %3
1111 define <8 x float> @test_cvtdq2ps(<8 x i32> %a0, <8 x i32> *%a1) {
1112 ; GENERIC-LABEL: test_cvtdq2ps:
1114 ; GENERIC-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [3:1.00]
1115 ; GENERIC-NEXT: vcvtdq2ps (%rdi), %ymm1 # sched: [10:1.00]
1116 ; GENERIC-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
1117 ; GENERIC-NEXT: retq # sched: [1:1.00]
1119 ; SANDY-LABEL: test_cvtdq2ps:
1121 ; SANDY-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [3:1.00]
1122 ; SANDY-NEXT: vmovaps (%rdi), %xmm1 # sched: [6:0.50]
1123 ; SANDY-NEXT: vinsertf128 $1, 16(%rdi), %ymm1, %ymm1 # sched: [7:0.50]
1124 ; SANDY-NEXT: vcvtdq2ps %ymm1, %ymm1 # sched: [3:1.00]
1125 ; SANDY-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
1126 ; SANDY-NEXT: retq # sched: [1:1.00]
1128 ; HASWELL-LABEL: test_cvtdq2ps:
1130 ; HASWELL-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [3:1.00]
1131 ; HASWELL-NEXT: vcvtdq2ps (%rdi), %ymm1 # sched: [3:1.00]
1132 ; HASWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
1133 ; HASWELL-NEXT: retq # sched: [2:1.00]
1135 ; BROADWELL-LABEL: test_cvtdq2ps:
1136 ; BROADWELL: # BB#0:
1137 ; BROADWELL-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [3:1.00]
1138 ; BROADWELL-NEXT: vcvtdq2ps (%rdi), %ymm1 # sched: [9:1.00]
1139 ; BROADWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
1140 ; BROADWELL-NEXT: retq # sched: [7:1.00]
1142 ; SKYLAKE-LABEL: test_cvtdq2ps:
1144 ; SKYLAKE-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [4:0.33]
1145 ; SKYLAKE-NEXT: vcvtdq2ps (%rdi), %ymm1 # sched: [11:0.50]
1146 ; SKYLAKE-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
1147 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
1149 ; SKX-LABEL: test_cvtdq2ps:
1151 ; SKX-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [4:0.33]
1152 ; SKX-NEXT: vcvtdq2ps (%rdi), %ymm1 # sched: [11:0.50]
1153 ; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
1154 ; SKX-NEXT: retq # sched: [7:1.00]
1156 ; BTVER2-LABEL: test_cvtdq2ps:
1158 ; BTVER2-NEXT: vcvtdq2ps (%rdi), %ymm1 # sched: [8:2.00]
1159 ; BTVER2-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [3:2.00]
1160 ; BTVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
1161 ; BTVER2-NEXT: retq # sched: [4:1.00]
1163 ; ZNVER1-LABEL: test_cvtdq2ps:
1165 ; ZNVER1-NEXT: vcvtdq2ps (%rdi), %ymm1 # sched: [12:1.00]
1166 ; ZNVER1-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [5:1.00]
1167 ; ZNVER1-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
1168 ; ZNVER1-NEXT: retq # sched: [1:0.50]
1169 %1 = sitofp <8 x i32> %a0 to <8 x float>
1170 %2 = load <8 x i32>, <8 x i32> *%a1, align 16
1171 %3 = sitofp <8 x i32> %2 to <8 x float>
1172 %4 = fadd <8 x float> %1, %3
1176 define <8 x i32> @test_cvtpd2dq(<4 x double> %a0, <4 x double> *%a1) {
1177 ; GENERIC-LABEL: test_cvtpd2dq:
1179 ; GENERIC-NEXT: vcvtpd2dq %ymm0, %xmm0 # sched: [4:1.00]
1180 ; GENERIC-NEXT: vcvtpd2dqy (%rdi), %xmm1 # sched: [11:1.00]
1181 ; GENERIC-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [1:1.00]
1182 ; GENERIC-NEXT: retq # sched: [1:1.00]
1184 ; SANDY-LABEL: test_cvtpd2dq:
1186 ; SANDY-NEXT: vcvtpd2dq %ymm0, %xmm0 # sched: [4:1.00]
1187 ; SANDY-NEXT: vcvtpd2dqy (%rdi), %xmm1 # sched: [11:1.00]
1188 ; SANDY-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [1:1.00]
1189 ; SANDY-NEXT: retq # sched: [1:1.00]
1191 ; HASWELL-LABEL: test_cvtpd2dq:
1193 ; HASWELL-NEXT: vcvtpd2dq %ymm0, %xmm0 # sched: [6:1.00]
1194 ; HASWELL-NEXT: vcvtpd2dqy (%rdi), %xmm1 # sched: [7:1.00]
1195 ; HASWELL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00]
1196 ; HASWELL-NEXT: retq # sched: [2:1.00]
1198 ; BROADWELL-LABEL: test_cvtpd2dq:
1199 ; BROADWELL: # BB#0:
1200 ; BROADWELL-NEXT: vcvtpd2dq %ymm0, %xmm0 # sched: [6:1.00]
1201 ; BROADWELL-NEXT: vcvtpd2dqy (%rdi), %xmm1 # sched: [8:1.00]
1202 ; BROADWELL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00]
1203 ; BROADWELL-NEXT: retq # sched: [7:1.00]
1205 ; SKYLAKE-LABEL: test_cvtpd2dq:
1207 ; SKYLAKE-NEXT: vcvtpd2dq %ymm0, %xmm0 # sched: [7:1.00]
1208 ; SKYLAKE-NEXT: vcvtpd2dqy (%rdi), %xmm1 # sched: [8:1.00]
1209 ; SKYLAKE-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00]
1210 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
1212 ; SKX-LABEL: test_cvtpd2dq:
1214 ; SKX-NEXT: vcvtpd2dq %ymm0, %xmm0 # sched: [7:1.00]
1215 ; SKX-NEXT: vcvtpd2dqy (%rdi), %xmm1 # sched: [8:1.00]
1216 ; SKX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00]
1217 ; SKX-NEXT: retq # sched: [7:1.00]
1219 ; BTVER2-LABEL: test_cvtpd2dq:
1221 ; BTVER2-NEXT: vcvtpd2dqy (%rdi), %xmm1 # sched: [11:2.00]
1222 ; BTVER2-NEXT: vcvtpd2dq %ymm0, %xmm0 # sched: [6:2.00]
1223 ; BTVER2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [1:0.50]
1224 ; BTVER2-NEXT: retq # sched: [4:1.00]
1226 ; ZNVER1-LABEL: test_cvtpd2dq:
1228 ; ZNVER1-NEXT: vcvtpd2dqy (%rdi), %xmm1 # sched: [12:1.00]
1229 ; ZNVER1-NEXT: vcvtpd2dq %ymm0, %xmm0 # sched: [5:1.00]
1230 ; ZNVER1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [2:0.67]
1231 ; ZNVER1-NEXT: retq # sched: [1:0.50]
1232 %1 = call <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double> %a0)
1233 %2 = load <4 x double>, <4 x double> *%a1, align 32
1234 %3 = call <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double> %2)
1235 %4 = shufflevector <4 x i32> %1, <4 x i32> %3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1238 declare <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double>) nounwind readnone
1240 define <8 x i32> @test_cvttpd2dq(<4 x double> %a0, <4 x double> *%a1) {
1241 ; GENERIC-LABEL: test_cvttpd2dq:
1243 ; GENERIC-NEXT: vcvttpd2dq %ymm0, %xmm0 # sched: [4:1.00]
1244 ; GENERIC-NEXT: vcvttpd2dqy (%rdi), %xmm1 # sched: [11:1.00]
1245 ; GENERIC-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [1:1.00]
1246 ; GENERIC-NEXT: retq # sched: [1:1.00]
1248 ; SANDY-LABEL: test_cvttpd2dq:
1250 ; SANDY-NEXT: vcvttpd2dq %ymm0, %xmm0 # sched: [4:1.00]
1251 ; SANDY-NEXT: vcvttpd2dqy (%rdi), %xmm1 # sched: [11:1.00]
1252 ; SANDY-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [1:1.00]
1253 ; SANDY-NEXT: retq # sched: [1:1.00]
1255 ; HASWELL-LABEL: test_cvttpd2dq:
1257 ; HASWELL-NEXT: vcvttpd2dq %ymm0, %xmm0 # sched: [6:1.00]
1258 ; HASWELL-NEXT: vcvttpd2dqy (%rdi), %xmm1 # sched: [7:1.00]
1259 ; HASWELL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00]
1260 ; HASWELL-NEXT: retq # sched: [2:1.00]
1262 ; BROADWELL-LABEL: test_cvttpd2dq:
1263 ; BROADWELL: # BB#0:
1264 ; BROADWELL-NEXT: vcvttpd2dq %ymm0, %xmm0 # sched: [6:1.00]
1265 ; BROADWELL-NEXT: vcvttpd2dqy (%rdi), %xmm1 # sched: [8:1.00]
1266 ; BROADWELL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00]
1267 ; BROADWELL-NEXT: retq # sched: [7:1.00]
1269 ; SKYLAKE-LABEL: test_cvttpd2dq:
1271 ; SKYLAKE-NEXT: vcvttpd2dq %ymm0, %xmm0 # sched: [7:1.00]
1272 ; SKYLAKE-NEXT: vcvttpd2dqy (%rdi), %xmm1 # sched: [8:1.00]
1273 ; SKYLAKE-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00]
1274 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
1276 ; SKX-LABEL: test_cvttpd2dq:
1278 ; SKX-NEXT: vcvttpd2dq %ymm0, %xmm0 # sched: [7:1.00]
1279 ; SKX-NEXT: vcvttpd2dqy (%rdi), %xmm1 # sched: [8:1.00]
1280 ; SKX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00]
1281 ; SKX-NEXT: retq # sched: [7:1.00]
1283 ; BTVER2-LABEL: test_cvttpd2dq:
1285 ; BTVER2-NEXT: vcvttpd2dqy (%rdi), %xmm1 # sched: [11:2.00]
1286 ; BTVER2-NEXT: vcvttpd2dq %ymm0, %xmm0 # sched: [6:2.00]
1287 ; BTVER2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [1:0.50]
1288 ; BTVER2-NEXT: retq # sched: [4:1.00]
1290 ; ZNVER1-LABEL: test_cvttpd2dq:
1292 ; ZNVER1-NEXT: vcvttpd2dqy (%rdi), %xmm1 # sched: [12:1.00]
1293 ; ZNVER1-NEXT: vcvttpd2dq %ymm0, %xmm0 # sched: [5:1.00]
1294 ; ZNVER1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [2:0.67]
1295 ; ZNVER1-NEXT: retq # sched: [1:0.50]
1296 %1 = fptosi <4 x double> %a0 to <4 x i32>
1297 %2 = load <4 x double>, <4 x double> *%a1, align 32
1298 %3 = fptosi <4 x double> %2 to <4 x i32>
1299 %4 = shufflevector <4 x i32> %1, <4 x i32> %3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1303 define <8 x float> @test_cvtpd2ps(<4 x double> %a0, <4 x double> *%a1) {
1304 ; GENERIC-LABEL: test_cvtpd2ps:
1306 ; GENERIC-NEXT: vcvtpd2ps %ymm0, %xmm0 # sched: [4:1.00]
1307 ; GENERIC-NEXT: vcvtpd2psy (%rdi), %xmm1 # sched: [11:1.00]
1308 ; GENERIC-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [1:1.00]
1309 ; GENERIC-NEXT: retq # sched: [1:1.00]
1311 ; SANDY-LABEL: test_cvtpd2ps:
1313 ; SANDY-NEXT: vcvtpd2ps %ymm0, %xmm0 # sched: [4:1.00]
1314 ; SANDY-NEXT: vcvtpd2psy (%rdi), %xmm1 # sched: [11:1.00]
1315 ; SANDY-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [1:1.00]
1316 ; SANDY-NEXT: retq # sched: [1:1.00]
1318 ; HASWELL-LABEL: test_cvtpd2ps:
1320 ; HASWELL-NEXT: vcvtpd2ps %ymm0, %xmm0 # sched: [6:1.00]
1321 ; HASWELL-NEXT: vcvtpd2psy (%rdi), %xmm1 # sched: [7:1.00]
1322 ; HASWELL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00]
1323 ; HASWELL-NEXT: retq # sched: [2:1.00]
1325 ; BROADWELL-LABEL: test_cvtpd2ps:
1326 ; BROADWELL: # BB#0:
1327 ; BROADWELL-NEXT: vcvtpd2ps %ymm0, %xmm0 # sched: [6:1.00]
1328 ; BROADWELL-NEXT: vcvtpd2psy (%rdi), %xmm1 # sched: [8:1.00]
1329 ; BROADWELL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00]
1330 ; BROADWELL-NEXT: retq # sched: [7:1.00]
1332 ; SKYLAKE-LABEL: test_cvtpd2ps:
1334 ; SKYLAKE-NEXT: vcvtpd2ps %ymm0, %xmm0 # sched: [7:1.00]
1335 ; SKYLAKE-NEXT: vcvtpd2psy (%rdi), %xmm1 # sched: [8:1.00]
1336 ; SKYLAKE-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00]
1337 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
1339 ; SKX-LABEL: test_cvtpd2ps:
1341 ; SKX-NEXT: vcvtpd2ps %ymm0, %xmm0 # sched: [7:1.00]
1342 ; SKX-NEXT: vcvtpd2psy (%rdi), %xmm1 # sched: [8:1.00]
1343 ; SKX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00]
1344 ; SKX-NEXT: retq # sched: [7:1.00]
1346 ; BTVER2-LABEL: test_cvtpd2ps:
1348 ; BTVER2-NEXT: vcvtpd2psy (%rdi), %xmm1 # sched: [11:2.00]
1349 ; BTVER2-NEXT: vcvtpd2ps %ymm0, %xmm0 # sched: [6:2.00]
1350 ; BTVER2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [1:0.50]
1351 ; BTVER2-NEXT: retq # sched: [4:1.00]
1353 ; ZNVER1-LABEL: test_cvtpd2ps:
1355 ; ZNVER1-NEXT: vcvtpd2psy (%rdi), %xmm1 # sched: [11:1.00]
1356 ; ZNVER1-NEXT: vcvtpd2ps %ymm0, %xmm0 # sched: [5:1.00]
1357 ; ZNVER1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [2:0.67]
1358 ; ZNVER1-NEXT: retq # sched: [1:0.50]
1359 %1 = fptrunc <4 x double> %a0 to <4 x float>
1360 %2 = load <4 x double>, <4 x double> *%a1, align 32
1361 %3 = fptrunc <4 x double> %2 to <4 x float>
1362 %4 = shufflevector <4 x float> %1, <4 x float> %3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1366 define <8 x i32> @test_cvtps2dq(<8 x float> %a0, <8 x float> *%a1) {
1367 ; GENERIC-LABEL: test_cvtps2dq:
1369 ; GENERIC-NEXT: vcvtps2dq %ymm0, %ymm0 # sched: [3:1.00]
1370 ; GENERIC-NEXT: vcvtps2dq (%rdi), %ymm1 # sched: [10:1.00]
1371 ; GENERIC-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
1372 ; GENERIC-NEXT: retq # sched: [1:1.00]
1374 ; SANDY-LABEL: test_cvtps2dq:
1376 ; SANDY-NEXT: vcvtps2dq %ymm0, %ymm0 # sched: [3:1.00]
1377 ; SANDY-NEXT: vcvtps2dq (%rdi), %ymm1 # sched: [10:1.00]
1378 ; SANDY-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
1379 ; SANDY-NEXT: retq # sched: [1:1.00]
1381 ; HASWELL-LABEL: test_cvtps2dq:
1383 ; HASWELL-NEXT: vcvtps2dq %ymm0, %ymm0 # sched: [3:1.00]
1384 ; HASWELL-NEXT: vcvtps2dq (%rdi), %ymm1 # sched: [3:1.00]
1385 ; HASWELL-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
1386 ; HASWELL-NEXT: retq # sched: [2:1.00]
1388 ; BROADWELL-LABEL: test_cvtps2dq:
1389 ; BROADWELL: # BB#0:
1390 ; BROADWELL-NEXT: vcvtps2dq %ymm0, %ymm0 # sched: [3:1.00]
1391 ; BROADWELL-NEXT: vcvtps2dq (%rdi), %ymm1 # sched: [9:1.00]
1392 ; BROADWELL-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
1393 ; BROADWELL-NEXT: retq # sched: [7:1.00]
1395 ; SKYLAKE-LABEL: test_cvtps2dq:
1397 ; SKYLAKE-NEXT: vcvtps2dq %ymm0, %ymm0 # sched: [4:0.33]
1398 ; SKYLAKE-NEXT: vcvtps2dq (%rdi), %ymm1 # sched: [11:0.50]
1399 ; SKYLAKE-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1400 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
1402 ; SKX-LABEL: test_cvtps2dq:
1404 ; SKX-NEXT: vcvtps2dq %ymm0, %ymm0 # sched: [4:0.33]
1405 ; SKX-NEXT: vcvtps2dq (%rdi), %ymm1 # sched: [11:0.50]
1406 ; SKX-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1407 ; SKX-NEXT: retq # sched: [7:1.00]
1409 ; BTVER2-LABEL: test_cvtps2dq:
1411 ; BTVER2-NEXT: vcvtps2dq (%rdi), %ymm1 # sched: [8:2.00]
1412 ; BTVER2-NEXT: vcvtps2dq %ymm0, %ymm0 # sched: [3:2.00]
1413 ; BTVER2-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1414 ; BTVER2-NEXT: retq # sched: [4:1.00]
1416 ; ZNVER1-LABEL: test_cvtps2dq:
1418 ; ZNVER1-NEXT: vcvtps2dq (%rdi), %ymm1 # sched: [12:1.00]
1419 ; ZNVER1-NEXT: vcvtps2dq %ymm0, %ymm0 # sched: [5:1.00]
1420 ; ZNVER1-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
1421 ; ZNVER1-NEXT: retq # sched: [1:0.50]
1422 %1 = call <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float> %a0)
1423 %2 = load <8 x float>, <8 x float> *%a1, align 32
1424 %3 = call <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float> %2)
1425 %4 = or <8 x i32> %1, %3
1428 declare <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float>) nounwind readnone
1430 define <8 x i32> @test_cvttps2dq(<8 x float> %a0, <8 x float> *%a1) {
1431 ; GENERIC-LABEL: test_cvttps2dq:
1433 ; GENERIC-NEXT: vcvttps2dq %ymm0, %ymm0 # sched: [3:1.00]
1434 ; GENERIC-NEXT: vcvttps2dq (%rdi), %ymm1 # sched: [10:1.00]
1435 ; GENERIC-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
1436 ; GENERIC-NEXT: retq # sched: [1:1.00]
1438 ; SANDY-LABEL: test_cvttps2dq:
1440 ; SANDY-NEXT: vcvttps2dq %ymm0, %ymm0 # sched: [3:1.00]
1441 ; SANDY-NEXT: vcvttps2dq (%rdi), %ymm1 # sched: [10:1.00]
1442 ; SANDY-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
1443 ; SANDY-NEXT: retq # sched: [1:1.00]
1445 ; HASWELL-LABEL: test_cvttps2dq:
1447 ; HASWELL-NEXT: vcvttps2dq %ymm0, %ymm0 # sched: [3:1.00]
1448 ; HASWELL-NEXT: vcvttps2dq (%rdi), %ymm1 # sched: [3:1.00]
1449 ; HASWELL-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
1450 ; HASWELL-NEXT: retq # sched: [2:1.00]
1452 ; BROADWELL-LABEL: test_cvttps2dq:
1453 ; BROADWELL: # BB#0:
1454 ; BROADWELL-NEXT: vcvttps2dq %ymm0, %ymm0 # sched: [3:1.00]
1455 ; BROADWELL-NEXT: vcvttps2dq (%rdi), %ymm1 # sched: [9:1.00]
1456 ; BROADWELL-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
1457 ; BROADWELL-NEXT: retq # sched: [7:1.00]
1459 ; SKYLAKE-LABEL: test_cvttps2dq:
1461 ; SKYLAKE-NEXT: vcvttps2dq %ymm0, %ymm0 # sched: [4:0.33]
1462 ; SKYLAKE-NEXT: vcvttps2dq (%rdi), %ymm1 # sched: [11:0.50]
1463 ; SKYLAKE-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1464 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
1466 ; SKX-LABEL: test_cvttps2dq:
1468 ; SKX-NEXT: vcvttps2dq %ymm0, %ymm0 # sched: [4:0.33]
1469 ; SKX-NEXT: vcvttps2dq (%rdi), %ymm1 # sched: [11:0.50]
1470 ; SKX-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
1471 ; SKX-NEXT: retq # sched: [7:1.00]
1473 ; BTVER2-LABEL: test_cvttps2dq:
1475 ; BTVER2-NEXT: vcvttps2dq (%rdi), %ymm1 # sched: [8:2.00]
1476 ; BTVER2-NEXT: vcvttps2dq %ymm0, %ymm0 # sched: [3:2.00]
1477 ; BTVER2-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
1478 ; BTVER2-NEXT: retq # sched: [4:1.00]
1480 ; ZNVER1-LABEL: test_cvttps2dq:
1482 ; ZNVER1-NEXT: vcvttps2dq (%rdi), %ymm1 # sched: [12:1.00]
1483 ; ZNVER1-NEXT: vcvttps2dq %ymm0, %ymm0 # sched: [5:1.00]
1484 ; ZNVER1-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
1485 ; ZNVER1-NEXT: retq # sched: [1:0.50]
1486 %1 = fptosi <8 x float> %a0 to <8 x i32>
1487 %2 = load <8 x float>, <8 x float> *%a1, align 32
1488 %3 = fptosi <8 x float> %2 to <8 x i32>
1489 %4 = or <8 x i32> %1, %3
1493 define <4 x double> @test_divpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) {
1494 ; GENERIC-LABEL: test_divpd:
1496 ; GENERIC-NEXT: vdivpd %ymm1, %ymm0, %ymm0 # sched: [45:2.00]
1497 ; GENERIC-NEXT: vdivpd (%rdi), %ymm0, %ymm0 # sched: [52:2.00]
1498 ; GENERIC-NEXT: retq # sched: [1:1.00]
1500 ; SANDY-LABEL: test_divpd:
1502 ; SANDY-NEXT: vdivpd %ymm1, %ymm0, %ymm0 # sched: [45:2.00]
1503 ; SANDY-NEXT: vdivpd (%rdi), %ymm0, %ymm0 # sched: [52:2.00]
1504 ; SANDY-NEXT: retq # sched: [1:1.00]
1506 ; HASWELL-LABEL: test_divpd:
1508 ; HASWELL-NEXT: vdivpd %ymm1, %ymm0, %ymm0 # sched: [35:2.00]
1509 ; HASWELL-NEXT: vdivpd (%rdi), %ymm0, %ymm0 # sched: [35:2.00]
1510 ; HASWELL-NEXT: retq # sched: [2:1.00]
1512 ; BROADWELL-LABEL: test_divpd:
1513 ; BROADWELL: # BB#0:
1514 ; BROADWELL-NEXT: vdivpd %ymm1, %ymm0, %ymm0 # sched: [23:2.00]
1515 ; BROADWELL-NEXT: vdivpd (%rdi), %ymm0, %ymm0 # sched: [29:2.00]
1516 ; BROADWELL-NEXT: retq # sched: [7:1.00]
1518 ; SKYLAKE-LABEL: test_divpd:
1520 ; SKYLAKE-NEXT: vdivpd %ymm1, %ymm0, %ymm0 # sched: [14:1.00]
1521 ; SKYLAKE-NEXT: vdivpd (%rdi), %ymm0, %ymm0 # sched: [21:1.00]
1522 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
1524 ; SKX-LABEL: test_divpd:
1526 ; SKX-NEXT: vdivpd %ymm1, %ymm0, %ymm0 # sched: [14:1.00]
1527 ; SKX-NEXT: vdivpd (%rdi), %ymm0, %ymm0 # sched: [21:1.00]
1528 ; SKX-NEXT: retq # sched: [7:1.00]
1530 ; BTVER2-LABEL: test_divpd:
1532 ; BTVER2-NEXT: vdivpd %ymm1, %ymm0, %ymm0 # sched: [38:38.00]
1533 ; BTVER2-NEXT: vdivpd (%rdi), %ymm0, %ymm0 # sched: [43:38.00]
1534 ; BTVER2-NEXT: retq # sched: [4:1.00]
1536 ; ZNVER1-LABEL: test_divpd:
1538 ; ZNVER1-NEXT: vdivpd %ymm1, %ymm0, %ymm0 # sched: [15:15.00]
1539 ; ZNVER1-NEXT: vdivpd (%rdi), %ymm0, %ymm0 # sched: [22:22.00]
1540 ; ZNVER1-NEXT: retq # sched: [1:0.50]
1541 %1 = fdiv <4 x double> %a0, %a1
1542 %2 = load <4 x double>, <4 x double> *%a2, align 32
1543 %3 = fdiv <4 x double> %1, %2
1547 define <8 x float> @test_divps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) {
1548 ; GENERIC-LABEL: test_divps:
1550 ; GENERIC-NEXT: vdivps %ymm1, %ymm0, %ymm0 # sched: [29:2.00]
1551 ; GENERIC-NEXT: vdivps (%rdi), %ymm0, %ymm0 # sched: [36:2.00]
1552 ; GENERIC-NEXT: retq # sched: [1:1.00]
1554 ; SANDY-LABEL: test_divps:
1556 ; SANDY-NEXT: vdivps %ymm1, %ymm0, %ymm0 # sched: [29:2.00]
1557 ; SANDY-NEXT: vdivps (%rdi), %ymm0, %ymm0 # sched: [36:2.00]
1558 ; SANDY-NEXT: retq # sched: [1:1.00]
1560 ; HASWELL-LABEL: test_divps:
1562 ; HASWELL-NEXT: vdivps %ymm1, %ymm0, %ymm0 # sched: [21:2.00]
1563 ; HASWELL-NEXT: vdivps (%rdi), %ymm0, %ymm0 # sched: [21:2.00]
1564 ; HASWELL-NEXT: retq # sched: [2:1.00]
1566 ; BROADWELL-LABEL: test_divps:
1567 ; BROADWELL: # BB#0:
1568 ; BROADWELL-NEXT: vdivps %ymm1, %ymm0, %ymm0 # sched: [17:2.00]
1569 ; BROADWELL-NEXT: vdivps (%rdi), %ymm0, %ymm0 # sched: [23:2.00]
1570 ; BROADWELL-NEXT: retq # sched: [7:1.00]
1572 ; SKYLAKE-LABEL: test_divps:
1574 ; SKYLAKE-NEXT: vdivps %ymm1, %ymm0, %ymm0 # sched: [11:1.00]
1575 ; SKYLAKE-NEXT: vdivps (%rdi), %ymm0, %ymm0 # sched: [18:1.00]
1576 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
1578 ; SKX-LABEL: test_divps:
1580 ; SKX-NEXT: vdivps %ymm1, %ymm0, %ymm0 # sched: [11:1.00]
1581 ; SKX-NEXT: vdivps (%rdi), %ymm0, %ymm0 # sched: [18:1.00]
1582 ; SKX-NEXT: retq # sched: [7:1.00]
1584 ; BTVER2-LABEL: test_divps:
1586 ; BTVER2-NEXT: vdivps %ymm1, %ymm0, %ymm0 # sched: [38:38.00]
1587 ; BTVER2-NEXT: vdivps (%rdi), %ymm0, %ymm0 # sched: [43:38.00]
1588 ; BTVER2-NEXT: retq # sched: [4:1.00]
1590 ; ZNVER1-LABEL: test_divps:
1592 ; ZNVER1-NEXT: vdivps %ymm1, %ymm0, %ymm0 # sched: [12:12.00]
1593 ; ZNVER1-NEXT: vdivps (%rdi), %ymm0, %ymm0 # sched: [19:19.00]
1594 ; ZNVER1-NEXT: retq # sched: [1:0.50]
1595 %1 = fdiv <8 x float> %a0, %a1
1596 %2 = load <8 x float>, <8 x float> *%a2, align 32
1597 %3 = fdiv <8 x float> %1, %2
1601 define <8 x float> @test_dpps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) {
1602 ; GENERIC-LABEL: test_dpps:
1604 ; GENERIC-NEXT: vdpps $7, %ymm1, %ymm0, %ymm0 # sched: [12:2.00]
1605 ; GENERIC-NEXT: vdpps $7, (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
1606 ; GENERIC-NEXT: retq # sched: [1:1.00]
1608 ; SANDY-LABEL: test_dpps:
1610 ; SANDY-NEXT: vdpps $7, %ymm1, %ymm0, %ymm0 # sched: [12:2.00]
1611 ; SANDY-NEXT: vdpps $7, (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
1612 ; SANDY-NEXT: retq # sched: [1:1.00]
1614 ; HASWELL-LABEL: test_dpps:
1616 ; HASWELL-NEXT: vdpps $7, %ymm1, %ymm0, %ymm0 # sched: [14:2.00]
1617 ; HASWELL-NEXT: vdpps $7, (%rdi), %ymm0, %ymm0 # sched: [14:2.00]
1618 ; HASWELL-NEXT: retq # sched: [2:1.00]
1620 ; BROADWELL-LABEL: test_dpps:
1621 ; BROADWELL: # BB#0:
1622 ; BROADWELL-NEXT: vdpps $7, %ymm1, %ymm0, %ymm0 # sched: [14:2.00]
1623 ; BROADWELL-NEXT: vdpps $7, (%rdi), %ymm0, %ymm0 # sched: [20:2.00]
1624 ; BROADWELL-NEXT: retq # sched: [7:1.00]
1626 ; SKYLAKE-LABEL: test_dpps:
1628 ; SKYLAKE-NEXT: vdpps $7, %ymm1, %ymm0, %ymm0 # sched: [13:1.33]
1629 ; SKYLAKE-NEXT: vdpps $7, (%rdi), %ymm0, %ymm0 # sched: [20:1.33]
1630 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
1632 ; SKX-LABEL: test_dpps:
1634 ; SKX-NEXT: vdpps $7, %ymm1, %ymm0, %ymm0 # sched: [13:1.33]
1635 ; SKX-NEXT: vdpps $7, (%rdi), %ymm0, %ymm0 # sched: [20:1.33]
1636 ; SKX-NEXT: retq # sched: [7:1.00]
1638 ; BTVER2-LABEL: test_dpps:
1640 ; BTVER2-NEXT: vdpps $7, %ymm1, %ymm0, %ymm0 # sched: [12:6.00]
1641 ; BTVER2-NEXT: vdpps $7, (%rdi), %ymm0, %ymm0 # sched: [17:6.00]
1642 ; BTVER2-NEXT: retq # sched: [4:1.00]
1644 ; ZNVER1-LABEL: test_dpps:
1646 ; ZNVER1-NEXT: vdpps $7, %ymm1, %ymm0, %ymm0 # sched: [100:?]
1647 ; ZNVER1-NEXT: vdpps $7, (%rdi), %ymm0, %ymm0 # sched: [100:?]
1648 ; ZNVER1-NEXT: retq # sched: [1:0.50]
1649 %1 = call <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7)
1650 %2 = load <8 x float>, <8 x float> *%a2, align 32
1651 %3 = call <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float> %1, <8 x float> %2, i8 7)
1654 declare <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
1656 define <4 x float> @test_extractf128(<8 x float> %a0, <8 x float> %a1, <4 x float> *%a2) {
1657 ; GENERIC-LABEL: test_extractf128:
1659 ; GENERIC-NEXT: vextractf128 $1, %ymm0, %xmm0 # sched: [1:1.00]
1660 ; GENERIC-NEXT: vextractf128 $1, %ymm1, (%rdi) # sched: [5:1.00]
1661 ; GENERIC-NEXT: vzeroupper
1662 ; GENERIC-NEXT: retq # sched: [1:1.00]
1664 ; SANDY-LABEL: test_extractf128:
1666 ; SANDY-NEXT: vextractf128 $1, %ymm0, %xmm0 # sched: [1:1.00]
1667 ; SANDY-NEXT: vextractf128 $1, %ymm1, (%rdi) # sched: [5:1.00]
1668 ; SANDY-NEXT: vzeroupper
1669 ; SANDY-NEXT: retq # sched: [1:1.00]
1671 ; HASWELL-LABEL: test_extractf128:
1673 ; HASWELL-NEXT: vextractf128 $1, %ymm0, %xmm0 # sched: [3:1.00]
1674 ; HASWELL-NEXT: vextractf128 $1, %ymm1, (%rdi) # sched: [1:1.00]
1675 ; HASWELL-NEXT: vzeroupper # sched: [4:1.00]
1676 ; HASWELL-NEXT: retq # sched: [2:1.00]
1678 ; BROADWELL-LABEL: test_extractf128:
1679 ; BROADWELL: # BB#0:
1680 ; BROADWELL-NEXT: vextractf128 $1, %ymm0, %xmm0 # sched: [3:1.00]
1681 ; BROADWELL-NEXT: vextractf128 $1, %ymm1, (%rdi) # sched: [1:1.00]
1682 ; BROADWELL-NEXT: vzeroupper # sched: [4:1.00]
1683 ; BROADWELL-NEXT: retq # sched: [7:1.00]
1685 ; SKYLAKE-LABEL: test_extractf128:
1687 ; SKYLAKE-NEXT: vextractf128 $1, %ymm0, %xmm0 # sched: [3:1.00]
1688 ; SKYLAKE-NEXT: vextractf128 $1, %ymm1, (%rdi) # sched: [1:1.00]
1689 ; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00]
1690 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
1692 ; SKX-LABEL: test_extractf128:
1694 ; SKX-NEXT: vextractf128 $1, %ymm0, %xmm0 # sched: [3:1.00]
1695 ; SKX-NEXT: vextractf128 $1, %ymm1, (%rdi) # sched: [1:1.00]
1696 ; SKX-NEXT: vzeroupper # sched: [4:1.00]
1697 ; SKX-NEXT: retq # sched: [7:1.00]
1699 ; BTVER2-LABEL: test_extractf128:
1701 ; BTVER2-NEXT: vextractf128 $1, %ymm0, %xmm0 # sched: [1:0.50]
1702 ; BTVER2-NEXT: vextractf128 $1, %ymm1, (%rdi) # sched: [1:1.00]
1703 ; BTVER2-NEXT: retq # sched: [4:1.00]
1705 ; ZNVER1-LABEL: test_extractf128:
1707 ; ZNVER1-NEXT: vextractf128 $1, %ymm0, %xmm0 # sched: [1:0.33]
1708 ; ZNVER1-NEXT: vextractf128 $1, %ymm1, (%rdi) # sched: [8:0.50]
1709 ; ZNVER1-NEXT: vzeroupper # sched: [100:?]
1710 ; ZNVER1-NEXT: retq # sched: [1:0.50]
1711 %1 = shufflevector <8 x float> %a0, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1712 %2 = shufflevector <8 x float> %a1, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
1713 store <4 x float> %2, <4 x float> *%a2
1717 define <4 x double> @test_haddpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) {
1718 ; GENERIC-LABEL: test_haddpd:
1720 ; GENERIC-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
1721 ; GENERIC-NEXT: vhaddpd (%rdi), %ymm0, %ymm0 # sched: [12:2.00]
1722 ; GENERIC-NEXT: retq # sched: [1:1.00]
1724 ; SANDY-LABEL: test_haddpd:
1726 ; SANDY-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
1727 ; SANDY-NEXT: vhaddpd (%rdi), %ymm0, %ymm0 # sched: [12:2.00]
1728 ; SANDY-NEXT: retq # sched: [1:1.00]
1730 ; HASWELL-LABEL: test_haddpd:
1732 ; HASWELL-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
1733 ; HASWELL-NEXT: vhaddpd (%rdi), %ymm0, %ymm0 # sched: [5:2.00]
1734 ; HASWELL-NEXT: retq # sched: [2:1.00]
1736 ; BROADWELL-LABEL: test_haddpd:
1737 ; BROADWELL: # BB#0:
1738 ; BROADWELL-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
1739 ; BROADWELL-NEXT: vhaddpd (%rdi), %ymm0, %ymm0 # sched: [11:2.00]
1740 ; BROADWELL-NEXT: retq # sched: [7:1.00]
1742 ; SKYLAKE-LABEL: test_haddpd:
1744 ; SKYLAKE-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 # sched: [6:2.00]
1745 ; SKYLAKE-NEXT: vhaddpd (%rdi), %ymm0, %ymm0 # sched: [13:2.00]
1746 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
1748 ; SKX-LABEL: test_haddpd:
1750 ; SKX-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 # sched: [6:2.00]
1751 ; SKX-NEXT: vhaddpd (%rdi), %ymm0, %ymm0 # sched: [13:2.00]
1752 ; SKX-NEXT: retq # sched: [7:1.00]
1754 ; BTVER2-LABEL: test_haddpd:
1756 ; BTVER2-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
1757 ; BTVER2-NEXT: vhaddpd (%rdi), %ymm0, %ymm0 # sched: [8:2.00]
1758 ; BTVER2-NEXT: retq # sched: [4:1.00]
1760 ; ZNVER1-LABEL: test_haddpd:
1762 ; ZNVER1-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 # sched: [100:?]
1763 ; ZNVER1-NEXT: vhaddpd (%rdi), %ymm0, %ymm0 # sched: [100:?]
1764 ; ZNVER1-NEXT: retq # sched: [1:0.50]
1765 %1 = call <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> %a0, <4 x double> %a1)
1766 %2 = load <4 x double>, <4 x double> *%a2, align 32
1767 %3 = call <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> %1, <4 x double> %2)
1770 declare <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double>, <4 x double>) nounwind readnone
1772 define <8 x float> @test_haddps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) {
1773 ; GENERIC-LABEL: test_haddps:
1775 ; GENERIC-NEXT: vhaddps %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
1776 ; GENERIC-NEXT: vhaddps (%rdi), %ymm0, %ymm0 # sched: [12:2.00]
1777 ; GENERIC-NEXT: retq # sched: [1:1.00]
1779 ; SANDY-LABEL: test_haddps:
1781 ; SANDY-NEXT: vhaddps %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
1782 ; SANDY-NEXT: vhaddps (%rdi), %ymm0, %ymm0 # sched: [12:2.00]
1783 ; SANDY-NEXT: retq # sched: [1:1.00]
1785 ; HASWELL-LABEL: test_haddps:
1787 ; HASWELL-NEXT: vhaddps %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
1788 ; HASWELL-NEXT: vhaddps (%rdi), %ymm0, %ymm0 # sched: [5:2.00]
1789 ; HASWELL-NEXT: retq # sched: [2:1.00]
1791 ; BROADWELL-LABEL: test_haddps:
1792 ; BROADWELL: # BB#0:
1793 ; BROADWELL-NEXT: vhaddps %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
1794 ; BROADWELL-NEXT: vhaddps (%rdi), %ymm0, %ymm0 # sched: [11:2.00]
1795 ; BROADWELL-NEXT: retq # sched: [7:1.00]
1797 ; SKYLAKE-LABEL: test_haddps:
1799 ; SKYLAKE-NEXT: vhaddps %ymm1, %ymm0, %ymm0 # sched: [6:2.00]
1800 ; SKYLAKE-NEXT: vhaddps (%rdi), %ymm0, %ymm0 # sched: [13:2.00]
1801 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
1803 ; SKX-LABEL: test_haddps:
1805 ; SKX-NEXT: vhaddps %ymm1, %ymm0, %ymm0 # sched: [6:2.00]
1806 ; SKX-NEXT: vhaddps (%rdi), %ymm0, %ymm0 # sched: [13:2.00]
1807 ; SKX-NEXT: retq # sched: [7:1.00]
1809 ; BTVER2-LABEL: test_haddps:
1811 ; BTVER2-NEXT: vhaddps %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
1812 ; BTVER2-NEXT: vhaddps (%rdi), %ymm0, %ymm0 # sched: [8:2.00]
1813 ; BTVER2-NEXT: retq # sched: [4:1.00]
1815 ; ZNVER1-LABEL: test_haddps:
1817 ; ZNVER1-NEXT: vhaddps %ymm1, %ymm0, %ymm0 # sched: [100:?]
1818 ; ZNVER1-NEXT: vhaddps (%rdi), %ymm0, %ymm0 # sched: [100:?]
1819 ; ZNVER1-NEXT: retq # sched: [1:0.50]
1820 %1 = call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> %a0, <8 x float> %a1)
1821 %2 = load <8 x float>, <8 x float> *%a2, align 32
1822 %3 = call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> %1, <8 x float> %2)
1825 declare <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float>, <8 x float>) nounwind readnone
1827 define <4 x double> @test_hsubpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) {
1828 ; GENERIC-LABEL: test_hsubpd:
1830 ; GENERIC-NEXT: vhsubpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
1831 ; GENERIC-NEXT: vhsubpd (%rdi), %ymm0, %ymm0 # sched: [12:2.00]
1832 ; GENERIC-NEXT: retq # sched: [1:1.00]
1834 ; SANDY-LABEL: test_hsubpd:
1836 ; SANDY-NEXT: vhsubpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
1837 ; SANDY-NEXT: vhsubpd (%rdi), %ymm0, %ymm0 # sched: [12:2.00]
1838 ; SANDY-NEXT: retq # sched: [1:1.00]
1840 ; HASWELL-LABEL: test_hsubpd:
1842 ; HASWELL-NEXT: vhsubpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
1843 ; HASWELL-NEXT: vhsubpd (%rdi), %ymm0, %ymm0 # sched: [5:2.00]
1844 ; HASWELL-NEXT: retq # sched: [2:1.00]
1846 ; BROADWELL-LABEL: test_hsubpd:
1847 ; BROADWELL: # BB#0:
1848 ; BROADWELL-NEXT: vhsubpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
1849 ; BROADWELL-NEXT: vhsubpd (%rdi), %ymm0, %ymm0 # sched: [11:2.00]
1850 ; BROADWELL-NEXT: retq # sched: [7:1.00]
1852 ; SKYLAKE-LABEL: test_hsubpd:
1854 ; SKYLAKE-NEXT: vhsubpd %ymm1, %ymm0, %ymm0 # sched: [6:2.00]
1855 ; SKYLAKE-NEXT: vhsubpd (%rdi), %ymm0, %ymm0 # sched: [13:2.00]
1856 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
1858 ; SKX-LABEL: test_hsubpd:
1860 ; SKX-NEXT: vhsubpd %ymm1, %ymm0, %ymm0 # sched: [6:2.00]
1861 ; SKX-NEXT: vhsubpd (%rdi), %ymm0, %ymm0 # sched: [13:2.00]
1862 ; SKX-NEXT: retq # sched: [7:1.00]
1864 ; BTVER2-LABEL: test_hsubpd:
1866 ; BTVER2-NEXT: vhsubpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
1867 ; BTVER2-NEXT: vhsubpd (%rdi), %ymm0, %ymm0 # sched: [8:2.00]
1868 ; BTVER2-NEXT: retq # sched: [4:1.00]
1870 ; ZNVER1-LABEL: test_hsubpd:
1872 ; ZNVER1-NEXT: vhsubpd %ymm1, %ymm0, %ymm0 # sched: [100:?]
1873 ; ZNVER1-NEXT: vhsubpd (%rdi), %ymm0, %ymm0 # sched: [100:?]
1874 ; ZNVER1-NEXT: retq # sched: [1:0.50]
1875 %1 = call <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double> %a0, <4 x double> %a1)
1876 %2 = load <4 x double>, <4 x double> *%a2, align 32
1877 %3 = call <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double> %1, <4 x double> %2)
1880 declare <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double>, <4 x double>) nounwind readnone
1882 define <8 x float> @test_hsubps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) {
1883 ; GENERIC-LABEL: test_hsubps:
1885 ; GENERIC-NEXT: vhsubps %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
1886 ; GENERIC-NEXT: vhsubps (%rdi), %ymm0, %ymm0 # sched: [12:2.00]
1887 ; GENERIC-NEXT: retq # sched: [1:1.00]
1889 ; SANDY-LABEL: test_hsubps:
1891 ; SANDY-NEXT: vhsubps %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
1892 ; SANDY-NEXT: vhsubps (%rdi), %ymm0, %ymm0 # sched: [12:2.00]
1893 ; SANDY-NEXT: retq # sched: [1:1.00]
1895 ; HASWELL-LABEL: test_hsubps:
1897 ; HASWELL-NEXT: vhsubps %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
1898 ; HASWELL-NEXT: vhsubps (%rdi), %ymm0, %ymm0 # sched: [5:2.00]
1899 ; HASWELL-NEXT: retq # sched: [2:1.00]
1901 ; BROADWELL-LABEL: test_hsubps:
1902 ; BROADWELL: # BB#0:
1903 ; BROADWELL-NEXT: vhsubps %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
1904 ; BROADWELL-NEXT: vhsubps (%rdi), %ymm0, %ymm0 # sched: [11:2.00]
1905 ; BROADWELL-NEXT: retq # sched: [7:1.00]
1907 ; SKYLAKE-LABEL: test_hsubps:
1909 ; SKYLAKE-NEXT: vhsubps %ymm1, %ymm0, %ymm0 # sched: [6:2.00]
1910 ; SKYLAKE-NEXT: vhsubps (%rdi), %ymm0, %ymm0 # sched: [13:2.00]
1911 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
1913 ; SKX-LABEL: test_hsubps:
1915 ; SKX-NEXT: vhsubps %ymm1, %ymm0, %ymm0 # sched: [6:2.00]
1916 ; SKX-NEXT: vhsubps (%rdi), %ymm0, %ymm0 # sched: [13:2.00]
1917 ; SKX-NEXT: retq # sched: [7:1.00]
1919 ; BTVER2-LABEL: test_hsubps:
1921 ; BTVER2-NEXT: vhsubps %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
1922 ; BTVER2-NEXT: vhsubps (%rdi), %ymm0, %ymm0 # sched: [8:2.00]
1923 ; BTVER2-NEXT: retq # sched: [4:1.00]
1925 ; ZNVER1-LABEL: test_hsubps:
1927 ; ZNVER1-NEXT: vhsubps %ymm1, %ymm0, %ymm0 # sched: [100:?]
1928 ; ZNVER1-NEXT: vhsubps (%rdi), %ymm0, %ymm0 # sched: [100:?]
1929 ; ZNVER1-NEXT: retq # sched: [1:0.50]
1930 %1 = call <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float> %a0, <8 x float> %a1)
1931 %2 = load <8 x float>, <8 x float> *%a2, align 32
1932 %3 = call <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float> %1, <8 x float> %2)
1935 declare <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float>, <8 x float>) nounwind readnone
1937 define <8 x float> @test_insertf128(<8 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
1938 ; GENERIC-LABEL: test_insertf128:
1940 ; GENERIC-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 # sched: [1:1.00]
1941 ; GENERIC-NEXT: vinsertf128 $1, (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
1942 ; GENERIC-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
1943 ; GENERIC-NEXT: retq # sched: [1:1.00]
1945 ; SANDY-LABEL: test_insertf128:
1947 ; SANDY-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 # sched: [1:1.00]
1948 ; SANDY-NEXT: vinsertf128 $1, (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
1949 ; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
1950 ; SANDY-NEXT: retq # sched: [1:1.00]
1952 ; HASWELL-LABEL: test_insertf128:
1954 ; HASWELL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 # sched: [3:1.00]
1955 ; HASWELL-NEXT: vinsertf128 $1, (%rdi), %ymm0, %ymm0 # sched: [1:0.50]
1956 ; HASWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
1957 ; HASWELL-NEXT: retq # sched: [2:1.00]
1959 ; BROADWELL-LABEL: test_insertf128:
1960 ; BROADWELL: # BB#0:
1961 ; BROADWELL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 # sched: [3:1.00]
1962 ; BROADWELL-NEXT: vinsertf128 $1, (%rdi), %ymm0, %ymm0 # sched: [6:0.50]
1963 ; BROADWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
1964 ; BROADWELL-NEXT: retq # sched: [7:1.00]
1966 ; SKYLAKE-LABEL: test_insertf128:
1968 ; SKYLAKE-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 # sched: [3:1.00]
1969 ; SKYLAKE-NEXT: vinsertf128 $1, (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
1970 ; SKYLAKE-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
1971 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
1973 ; SKX-LABEL: test_insertf128:
1975 ; SKX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 # sched: [3:1.00]
1976 ; SKX-NEXT: vinsertf128 $1, (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
1977 ; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.33]
1978 ; SKX-NEXT: retq # sched: [7:1.00]
1980 ; BTVER2-LABEL: test_insertf128:
1982 ; BTVER2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 # sched: [1:0.50]
1983 ; BTVER2-NEXT: vinsertf128 $1, (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
1984 ; BTVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:2.00]
1985 ; BTVER2-NEXT: retq # sched: [4:1.00]
1987 ; ZNVER1-LABEL: test_insertf128:
1989 ; ZNVER1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 # sched: [2:0.67]
1990 ; ZNVER1-NEXT: vinsertf128 $1, (%rdi), %ymm0, %ymm0 # sched: [9:0.67]
1991 ; ZNVER1-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
1992 ; ZNVER1-NEXT: retq # sched: [1:0.50]
1993 %1 = shufflevector <4 x float> %a1, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
1994 %2 = shufflevector <8 x float> %a0, <8 x float> %1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
1995 %3 = load <4 x float>, <4 x float> *%a2, align 16
1996 %4 = shufflevector <4 x float> %3, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
1997 %5 = shufflevector <8 x float> %a0, <8 x float> %4, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
1998 %6 = fadd <8 x float> %2, %5
2002 define <32 x i8> @test_lddqu(i8* %a0) {
2003 ; GENERIC-LABEL: test_lddqu:
2005 ; GENERIC-NEXT: vlddqu (%rdi), %ymm0 # sched: [6:0.50]
2006 ; GENERIC-NEXT: retq # sched: [1:1.00]
2008 ; SANDY-LABEL: test_lddqu:
2010 ; SANDY-NEXT: vlddqu (%rdi), %ymm0 # sched: [6:0.50]
2011 ; SANDY-NEXT: retq # sched: [1:1.00]
2013 ; HASWELL-LABEL: test_lddqu:
2015 ; HASWELL-NEXT: vlddqu (%rdi), %ymm0 # sched: [1:0.50]
2016 ; HASWELL-NEXT: retq # sched: [2:1.00]
2018 ; BROADWELL-LABEL: test_lddqu:
2019 ; BROADWELL: # BB#0:
2020 ; BROADWELL-NEXT: vlddqu (%rdi), %ymm0 # sched: [6:0.50]
2021 ; BROADWELL-NEXT: retq # sched: [7:1.00]
2023 ; SKYLAKE-LABEL: test_lddqu:
2025 ; SKYLAKE-NEXT: vlddqu (%rdi), %ymm0 # sched: [7:0.50]
2026 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
2028 ; SKX-LABEL: test_lddqu:
2030 ; SKX-NEXT: vlddqu (%rdi), %ymm0 # sched: [7:0.50]
2031 ; SKX-NEXT: retq # sched: [7:1.00]
2033 ; BTVER2-LABEL: test_lddqu:
2035 ; BTVER2-NEXT: vlddqu (%rdi), %ymm0 # sched: [5:1.00]
2036 ; BTVER2-NEXT: retq # sched: [4:1.00]
2038 ; ZNVER1-LABEL: test_lddqu:
2040 ; ZNVER1-NEXT: vlddqu (%rdi), %ymm0 # sched: [8:0.50]
2041 ; ZNVER1-NEXT: retq # sched: [1:0.50]
2042 %1 = call <32 x i8> @llvm.x86.avx.ldu.dq.256(i8* %a0)
2045 declare <32 x i8> @llvm.x86.avx.ldu.dq.256(i8*) nounwind readonly
2047 define <2 x double> @test_maskmovpd(i8* %a0, <2 x i64> %a1, <2 x double> %a2) {
2048 ; GENERIC-LABEL: test_maskmovpd:
2050 ; GENERIC-NEXT: vmaskmovpd (%rdi), %xmm0, %xmm2 # sched: [8:1.00]
2051 ; GENERIC-NEXT: vmaskmovpd %xmm1, %xmm0, (%rdi) # sched: [5:1.00]
2052 ; GENERIC-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:1.00]
2053 ; GENERIC-NEXT: retq # sched: [1:1.00]
2055 ; SANDY-LABEL: test_maskmovpd:
2057 ; SANDY-NEXT: vmaskmovpd (%rdi), %xmm0, %xmm2 # sched: [8:1.00]
2058 ; SANDY-NEXT: vmaskmovpd %xmm1, %xmm0, (%rdi) # sched: [5:1.00]
2059 ; SANDY-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:1.00]
2060 ; SANDY-NEXT: retq # sched: [1:1.00]
2062 ; HASWELL-LABEL: test_maskmovpd:
2064 ; HASWELL-NEXT: vmaskmovpd (%rdi), %xmm0, %xmm2 # sched: [2:2.00]
2065 ; HASWELL-NEXT: vmaskmovpd %xmm1, %xmm0, (%rdi) # sched: [4:1.00]
2066 ; HASWELL-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:1.00]
2067 ; HASWELL-NEXT: retq # sched: [2:1.00]
2069 ; BROADWELL-LABEL: test_maskmovpd:
2070 ; BROADWELL: # BB#0:
2071 ; BROADWELL-NEXT: vmaskmovpd (%rdi), %xmm0, %xmm2 # sched: [7:2.00]
2072 ; BROADWELL-NEXT: vmaskmovpd %xmm1, %xmm0, (%rdi) # sched: [5:1.00]
2073 ; BROADWELL-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:1.00]
2074 ; BROADWELL-NEXT: retq # sched: [7:1.00]
2076 ; SKYLAKE-LABEL: test_maskmovpd:
2078 ; SKYLAKE-NEXT: vmaskmovpd (%rdi), %xmm0, %xmm2 # sched: [7:0.50]
2079 ; SKYLAKE-NEXT: vmaskmovpd %xmm1, %xmm0, (%rdi) # sched: [2:1.00]
2080 ; SKYLAKE-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:1.00]
2081 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
2083 ; SKX-LABEL: test_maskmovpd:
2085 ; SKX-NEXT: vmaskmovpd (%rdi), %xmm0, %xmm2 # sched: [7:0.50]
2086 ; SKX-NEXT: vmaskmovpd %xmm1, %xmm0, (%rdi) # sched: [2:1.00]
2087 ; SKX-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:1.00]
2088 ; SKX-NEXT: retq # sched: [7:1.00]
2090 ; BTVER2-LABEL: test_maskmovpd:
2092 ; BTVER2-NEXT: vmaskmovpd (%rdi), %xmm0, %xmm2 # sched: [6:1.00]
2093 ; BTVER2-NEXT: vmaskmovpd %xmm1, %xmm0, (%rdi) # sched: [6:2.00]
2094 ; BTVER2-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:0.50]
2095 ; BTVER2-NEXT: retq # sched: [4:1.00]
2097 ; ZNVER1-LABEL: test_maskmovpd:
2099 ; ZNVER1-NEXT: vmaskmovpd (%rdi), %xmm0, %xmm2 # sched: [8:0.50]
2100 ; ZNVER1-NEXT: vmaskmovpd %xmm1, %xmm0, (%rdi) # sched: [4:0.50]
2101 ; ZNVER1-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:0.50]
2102 ; ZNVER1-NEXT: retq # sched: [1:0.50]
2103 %1 = call <2 x double> @llvm.x86.avx.maskload.pd(i8* %a0, <2 x i64> %a1)
2104 call void @llvm.x86.avx.maskstore.pd(i8* %a0, <2 x i64> %a1, <2 x double> %a2)
2107 declare <2 x double> @llvm.x86.avx.maskload.pd(i8*, <2 x i64>) nounwind readonly
2108 declare void @llvm.x86.avx.maskstore.pd(i8*, <2 x i64>, <2 x double>) nounwind
2110 define <4 x double> @test_maskmovpd_ymm(i8* %a0, <4 x i64> %a1, <4 x double> %a2) {
2111 ; GENERIC-LABEL: test_maskmovpd_ymm:
2113 ; GENERIC-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm2 # sched: [9:1.00]
2114 ; GENERIC-NEXT: vmaskmovpd %ymm1, %ymm0, (%rdi) # sched: [5:1.00]
2115 ; GENERIC-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00]
2116 ; GENERIC-NEXT: retq # sched: [1:1.00]
2118 ; SANDY-LABEL: test_maskmovpd_ymm:
2120 ; SANDY-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm2 # sched: [9:1.00]
2121 ; SANDY-NEXT: vmaskmovpd %ymm1, %ymm0, (%rdi) # sched: [5:1.00]
2122 ; SANDY-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00]
2123 ; SANDY-NEXT: retq # sched: [1:1.00]
2125 ; HASWELL-LABEL: test_maskmovpd_ymm:
2127 ; HASWELL-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm2 # sched: [2:2.00]
2128 ; HASWELL-NEXT: vmaskmovpd %ymm1, %ymm0, (%rdi) # sched: [4:1.00]
2129 ; HASWELL-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00]
2130 ; HASWELL-NEXT: retq # sched: [2:1.00]
2132 ; BROADWELL-LABEL: test_maskmovpd_ymm:
2133 ; BROADWELL: # BB#0:
2134 ; BROADWELL-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm2 # sched: [8:2.00]
2135 ; BROADWELL-NEXT: vmaskmovpd %ymm1, %ymm0, (%rdi) # sched: [5:1.00]
2136 ; BROADWELL-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00]
2137 ; BROADWELL-NEXT: retq # sched: [7:1.00]
2139 ; SKYLAKE-LABEL: test_maskmovpd_ymm:
2141 ; SKYLAKE-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm2 # sched: [8:0.50]
2142 ; SKYLAKE-NEXT: vmaskmovpd %ymm1, %ymm0, (%rdi) # sched: [2:1.00]
2143 ; SKYLAKE-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00]
2144 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
2146 ; SKX-LABEL: test_maskmovpd_ymm:
2148 ; SKX-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm2 # sched: [8:0.50]
2149 ; SKX-NEXT: vmaskmovpd %ymm1, %ymm0, (%rdi) # sched: [2:1.00]
2150 ; SKX-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00]
2151 ; SKX-NEXT: retq # sched: [7:1.00]
2153 ; BTVER2-LABEL: test_maskmovpd_ymm:
2155 ; BTVER2-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm2 # sched: [6:2.00]
2156 ; BTVER2-NEXT: vmaskmovpd %ymm1, %ymm0, (%rdi) # sched: [6:2.00]
2157 ; BTVER2-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:0.50]
2158 ; BTVER2-NEXT: retq # sched: [4:1.00]
2160 ; ZNVER1-LABEL: test_maskmovpd_ymm:
2162 ; ZNVER1-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm2 # sched: [8:1.00]
2163 ; ZNVER1-NEXT: vmaskmovpd %ymm1, %ymm0, (%rdi) # sched: [5:1.00]
2164 ; ZNVER1-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:0.50]
2165 ; ZNVER1-NEXT: retq # sched: [1:0.50]
2166 %1 = call <4 x double> @llvm.x86.avx.maskload.pd.256(i8* %a0, <4 x i64> %a1)
2167 call void @llvm.x86.avx.maskstore.pd.256(i8* %a0, <4 x i64> %a1, <4 x double> %a2)
2170 declare <4 x double> @llvm.x86.avx.maskload.pd.256(i8*, <4 x i64>) nounwind readonly
2171 declare void @llvm.x86.avx.maskstore.pd.256(i8*, <4 x i64>, <4 x double>) nounwind
2173 define <4 x float> @test_maskmovps(i8* %a0, <4 x i32> %a1, <4 x float> %a2) {
2174 ; GENERIC-LABEL: test_maskmovps:
2176 ; GENERIC-NEXT: vmaskmovps (%rdi), %xmm0, %xmm2 # sched: [8:1.00]
2177 ; GENERIC-NEXT: vmaskmovps %xmm1, %xmm0, (%rdi) # sched: [5:1.00]
2178 ; GENERIC-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:1.00]
2179 ; GENERIC-NEXT: retq # sched: [1:1.00]
2181 ; SANDY-LABEL: test_maskmovps:
2183 ; SANDY-NEXT: vmaskmovps (%rdi), %xmm0, %xmm2 # sched: [8:1.00]
2184 ; SANDY-NEXT: vmaskmovps %xmm1, %xmm0, (%rdi) # sched: [5:1.00]
2185 ; SANDY-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:1.00]
2186 ; SANDY-NEXT: retq # sched: [1:1.00]
2188 ; HASWELL-LABEL: test_maskmovps:
2190 ; HASWELL-NEXT: vmaskmovps (%rdi), %xmm0, %xmm2 # sched: [2:2.00]
2191 ; HASWELL-NEXT: vmaskmovps %xmm1, %xmm0, (%rdi) # sched: [4:1.00]
2192 ; HASWELL-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:1.00]
2193 ; HASWELL-NEXT: retq # sched: [2:1.00]
2195 ; BROADWELL-LABEL: test_maskmovps:
2196 ; BROADWELL: # BB#0:
2197 ; BROADWELL-NEXT: vmaskmovps (%rdi), %xmm0, %xmm2 # sched: [7:2.00]
2198 ; BROADWELL-NEXT: vmaskmovps %xmm1, %xmm0, (%rdi) # sched: [5:1.00]
2199 ; BROADWELL-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:1.00]
2200 ; BROADWELL-NEXT: retq # sched: [7:1.00]
2202 ; SKYLAKE-LABEL: test_maskmovps:
2204 ; SKYLAKE-NEXT: vmaskmovps (%rdi), %xmm0, %xmm2 # sched: [7:0.50]
2205 ; SKYLAKE-NEXT: vmaskmovps %xmm1, %xmm0, (%rdi) # sched: [2:1.00]
2206 ; SKYLAKE-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:1.00]
2207 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
2209 ; SKX-LABEL: test_maskmovps:
2211 ; SKX-NEXT: vmaskmovps (%rdi), %xmm0, %xmm2 # sched: [7:0.50]
2212 ; SKX-NEXT: vmaskmovps %xmm1, %xmm0, (%rdi) # sched: [2:1.00]
2213 ; SKX-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:1.00]
2214 ; SKX-NEXT: retq # sched: [7:1.00]
2216 ; BTVER2-LABEL: test_maskmovps:
2218 ; BTVER2-NEXT: vmaskmovps (%rdi), %xmm0, %xmm2 # sched: [6:1.00]
2219 ; BTVER2-NEXT: vmaskmovps %xmm1, %xmm0, (%rdi) # sched: [6:2.00]
2220 ; BTVER2-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:0.50]
2221 ; BTVER2-NEXT: retq # sched: [4:1.00]
2223 ; ZNVER1-LABEL: test_maskmovps:
2225 ; ZNVER1-NEXT: vmaskmovps (%rdi), %xmm0, %xmm2 # sched: [8:0.50]
2226 ; ZNVER1-NEXT: vmaskmovps %xmm1, %xmm0, (%rdi) # sched: [4:0.50]
2227 ; ZNVER1-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:0.50]
2228 ; ZNVER1-NEXT: retq # sched: [1:0.50]
2229 %1 = call <4 x float> @llvm.x86.avx.maskload.ps(i8* %a0, <4 x i32> %a1)
2230 call void @llvm.x86.avx.maskstore.ps(i8* %a0, <4 x i32> %a1, <4 x float> %a2)
2233 declare <4 x float> @llvm.x86.avx.maskload.ps(i8*, <4 x i32>) nounwind readonly
2234 declare void @llvm.x86.avx.maskstore.ps(i8*, <4 x i32>, <4 x float>) nounwind
2236 define <8 x float> @test_maskmovps_ymm(i8* %a0, <8 x i32> %a1, <8 x float> %a2) {
2237 ; GENERIC-LABEL: test_maskmovps_ymm:
2239 ; GENERIC-NEXT: vmaskmovps (%rdi), %ymm0, %ymm2 # sched: [9:1.00]
2240 ; GENERIC-NEXT: vmaskmovps %ymm1, %ymm0, (%rdi) # sched: [5:1.00]
2241 ; GENERIC-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00]
2242 ; GENERIC-NEXT: retq # sched: [1:1.00]
2244 ; SANDY-LABEL: test_maskmovps_ymm:
2246 ; SANDY-NEXT: vmaskmovps (%rdi), %ymm0, %ymm2 # sched: [9:1.00]
2247 ; SANDY-NEXT: vmaskmovps %ymm1, %ymm0, (%rdi) # sched: [5:1.00]
2248 ; SANDY-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00]
2249 ; SANDY-NEXT: retq # sched: [1:1.00]
2251 ; HASWELL-LABEL: test_maskmovps_ymm:
2253 ; HASWELL-NEXT: vmaskmovps (%rdi), %ymm0, %ymm2 # sched: [2:2.00]
2254 ; HASWELL-NEXT: vmaskmovps %ymm1, %ymm0, (%rdi) # sched: [4:1.00]
2255 ; HASWELL-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00]
2256 ; HASWELL-NEXT: retq # sched: [2:1.00]
2258 ; BROADWELL-LABEL: test_maskmovps_ymm:
2259 ; BROADWELL: # BB#0:
2260 ; BROADWELL-NEXT: vmaskmovps (%rdi), %ymm0, %ymm2 # sched: [8:2.00]
2261 ; BROADWELL-NEXT: vmaskmovps %ymm1, %ymm0, (%rdi) # sched: [5:1.00]
2262 ; BROADWELL-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00]
2263 ; BROADWELL-NEXT: retq # sched: [7:1.00]
2265 ; SKYLAKE-LABEL: test_maskmovps_ymm:
2267 ; SKYLAKE-NEXT: vmaskmovps (%rdi), %ymm0, %ymm2 # sched: [8:0.50]
2268 ; SKYLAKE-NEXT: vmaskmovps %ymm1, %ymm0, (%rdi) # sched: [2:1.00]
2269 ; SKYLAKE-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00]
2270 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
2272 ; SKX-LABEL: test_maskmovps_ymm:
2274 ; SKX-NEXT: vmaskmovps (%rdi), %ymm0, %ymm2 # sched: [8:0.50]
2275 ; SKX-NEXT: vmaskmovps %ymm1, %ymm0, (%rdi) # sched: [2:1.00]
2276 ; SKX-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00]
2277 ; SKX-NEXT: retq # sched: [7:1.00]
2279 ; BTVER2-LABEL: test_maskmovps_ymm:
2281 ; BTVER2-NEXT: vmaskmovps (%rdi), %ymm0, %ymm2 # sched: [6:2.00]
2282 ; BTVER2-NEXT: vmaskmovps %ymm1, %ymm0, (%rdi) # sched: [6:2.00]
2283 ; BTVER2-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:0.50]
2284 ; BTVER2-NEXT: retq # sched: [4:1.00]
2286 ; ZNVER1-LABEL: test_maskmovps_ymm:
2288 ; ZNVER1-NEXT: vmaskmovps (%rdi), %ymm0, %ymm2 # sched: [8:1.00]
2289 ; ZNVER1-NEXT: vmaskmovps %ymm1, %ymm0, (%rdi) # sched: [5:1.00]
2290 ; ZNVER1-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:0.50]
2291 ; ZNVER1-NEXT: retq # sched: [1:0.50]
2292 %1 = call <8 x float> @llvm.x86.avx.maskload.ps.256(i8* %a0, <8 x i32> %a1)
2293 call void @llvm.x86.avx.maskstore.ps.256(i8* %a0, <8 x i32> %a1, <8 x float> %a2)
2296 declare <8 x float> @llvm.x86.avx.maskload.ps.256(i8*, <8 x i32>) nounwind readonly
2297 declare void @llvm.x86.avx.maskstore.ps.256(i8*, <8 x i32>, <8 x float>) nounwind
2299 define <4 x double> @test_maxpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) {
2300 ; GENERIC-LABEL: test_maxpd:
2302 ; GENERIC-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
2303 ; GENERIC-NEXT: vmaxpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
2304 ; GENERIC-NEXT: retq # sched: [1:1.00]
2306 ; SANDY-LABEL: test_maxpd:
2308 ; SANDY-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
2309 ; SANDY-NEXT: vmaxpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
2310 ; SANDY-NEXT: retq # sched: [1:1.00]
2312 ; HASWELL-LABEL: test_maxpd:
2314 ; HASWELL-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
2315 ; HASWELL-NEXT: vmaxpd (%rdi), %ymm0, %ymm0 # sched: [3:1.00]
2316 ; HASWELL-NEXT: retq # sched: [2:1.00]
2318 ; BROADWELL-LABEL: test_maxpd:
2319 ; BROADWELL: # BB#0:
2320 ; BROADWELL-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
2321 ; BROADWELL-NEXT: vmaxpd (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
2322 ; BROADWELL-NEXT: retq # sched: [7:1.00]
2324 ; SKYLAKE-LABEL: test_maxpd:
2326 ; SKYLAKE-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
2327 ; SKYLAKE-NEXT: vmaxpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
2328 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
2330 ; SKX-LABEL: test_maxpd:
2332 ; SKX-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
2333 ; SKX-NEXT: vmaxpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
2334 ; SKX-NEXT: retq # sched: [7:1.00]
2336 ; BTVER2-LABEL: test_maxpd:
2338 ; BTVER2-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 # sched: [2:2.00]
2339 ; BTVER2-NEXT: vmaxpd (%rdi), %ymm0, %ymm0 # sched: [7:2.00]
2340 ; BTVER2-NEXT: retq # sched: [4:1.00]
2342 ; ZNVER1-LABEL: test_maxpd:
2344 ; ZNVER1-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
2345 ; ZNVER1-NEXT: vmaxpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
2346 ; ZNVER1-NEXT: retq # sched: [1:0.50]
2347 %1 = call <4 x double> @llvm.x86.avx.max.pd.256(<4 x double> %a0, <4 x double> %a1)
2348 %2 = load <4 x double>, <4 x double> *%a2, align 32
2349 %3 = call <4 x double> @llvm.x86.avx.max.pd.256(<4 x double> %1, <4 x double> %2)
2352 declare <4 x double> @llvm.x86.avx.max.pd.256(<4 x double>, <4 x double>) nounwind readnone
2354 define <8 x float> @test_maxps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) {
2355 ; GENERIC-LABEL: test_maxps:
2357 ; GENERIC-NEXT: vmaxps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
2358 ; GENERIC-NEXT: vmaxps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
2359 ; GENERIC-NEXT: retq # sched: [1:1.00]
2361 ; SANDY-LABEL: test_maxps:
2363 ; SANDY-NEXT: vmaxps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
2364 ; SANDY-NEXT: vmaxps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
2365 ; SANDY-NEXT: retq # sched: [1:1.00]
2367 ; HASWELL-LABEL: test_maxps:
2369 ; HASWELL-NEXT: vmaxps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
2370 ; HASWELL-NEXT: vmaxps (%rdi), %ymm0, %ymm0 # sched: [3:1.00]
2371 ; HASWELL-NEXT: retq # sched: [2:1.00]
2373 ; BROADWELL-LABEL: test_maxps:
2374 ; BROADWELL: # BB#0:
2375 ; BROADWELL-NEXT: vmaxps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
2376 ; BROADWELL-NEXT: vmaxps (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
2377 ; BROADWELL-NEXT: retq # sched: [7:1.00]
2379 ; SKYLAKE-LABEL: test_maxps:
2381 ; SKYLAKE-NEXT: vmaxps %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
2382 ; SKYLAKE-NEXT: vmaxps (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
2383 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
2385 ; SKX-LABEL: test_maxps:
2387 ; SKX-NEXT: vmaxps %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
2388 ; SKX-NEXT: vmaxps (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
2389 ; SKX-NEXT: retq # sched: [7:1.00]
2391 ; BTVER2-LABEL: test_maxps:
2393 ; BTVER2-NEXT: vmaxps %ymm1, %ymm0, %ymm0 # sched: [2:2.00]
2394 ; BTVER2-NEXT: vmaxps (%rdi), %ymm0, %ymm0 # sched: [7:2.00]
2395 ; BTVER2-NEXT: retq # sched: [4:1.00]
2397 ; ZNVER1-LABEL: test_maxps:
2399 ; ZNVER1-NEXT: vmaxps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
2400 ; ZNVER1-NEXT: vmaxps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
2401 ; ZNVER1-NEXT: retq # sched: [1:0.50]
2402 %1 = call <8 x float> @llvm.x86.avx.max.ps.256(<8 x float> %a0, <8 x float> %a1)
2403 %2 = load <8 x float>, <8 x float> *%a2, align 32
2404 %3 = call <8 x float> @llvm.x86.avx.max.ps.256(<8 x float> %1, <8 x float> %2)
2407 declare <8 x float> @llvm.x86.avx.max.ps.256(<8 x float>, <8 x float>) nounwind readnone
2409 define <4 x double> @test_minpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) {
2410 ; GENERIC-LABEL: test_minpd:
2412 ; GENERIC-NEXT: vminpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
2413 ; GENERIC-NEXT: vminpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
2414 ; GENERIC-NEXT: retq # sched: [1:1.00]
2416 ; SANDY-LABEL: test_minpd:
2418 ; SANDY-NEXT: vminpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
2419 ; SANDY-NEXT: vminpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
2420 ; SANDY-NEXT: retq # sched: [1:1.00]
2422 ; HASWELL-LABEL: test_minpd:
2424 ; HASWELL-NEXT: vminpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
2425 ; HASWELL-NEXT: vminpd (%rdi), %ymm0, %ymm0 # sched: [3:1.00]
2426 ; HASWELL-NEXT: retq # sched: [2:1.00]
2428 ; BROADWELL-LABEL: test_minpd:
2429 ; BROADWELL: # BB#0:
2430 ; BROADWELL-NEXT: vminpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
2431 ; BROADWELL-NEXT: vminpd (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
2432 ; BROADWELL-NEXT: retq # sched: [7:1.00]
2434 ; SKYLAKE-LABEL: test_minpd:
2436 ; SKYLAKE-NEXT: vminpd %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
2437 ; SKYLAKE-NEXT: vminpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
2438 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
2440 ; SKX-LABEL: test_minpd:
2442 ; SKX-NEXT: vminpd %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
2443 ; SKX-NEXT: vminpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
2444 ; SKX-NEXT: retq # sched: [7:1.00]
2446 ; BTVER2-LABEL: test_minpd:
2448 ; BTVER2-NEXT: vminpd %ymm1, %ymm0, %ymm0 # sched: [2:2.00]
2449 ; BTVER2-NEXT: vminpd (%rdi), %ymm0, %ymm0 # sched: [7:2.00]
2450 ; BTVER2-NEXT: retq # sched: [4:1.00]
2452 ; ZNVER1-LABEL: test_minpd:
2454 ; ZNVER1-NEXT: vminpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
2455 ; ZNVER1-NEXT: vminpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
2456 ; ZNVER1-NEXT: retq # sched: [1:0.50]
2457 %1 = call <4 x double> @llvm.x86.avx.min.pd.256(<4 x double> %a0, <4 x double> %a1)
2458 %2 = load <4 x double>, <4 x double> *%a2, align 32
2459 %3 = call <4 x double> @llvm.x86.avx.min.pd.256(<4 x double> %1, <4 x double> %2)
2462 declare <4 x double> @llvm.x86.avx.min.pd.256(<4 x double>, <4 x double>) nounwind readnone
2464 define <8 x float> @test_minps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) {
2465 ; GENERIC-LABEL: test_minps:
2467 ; GENERIC-NEXT: vminps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
2468 ; GENERIC-NEXT: vminps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
2469 ; GENERIC-NEXT: retq # sched: [1:1.00]
2471 ; SANDY-LABEL: test_minps:
2473 ; SANDY-NEXT: vminps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
2474 ; SANDY-NEXT: vminps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
2475 ; SANDY-NEXT: retq # sched: [1:1.00]
2477 ; HASWELL-LABEL: test_minps:
2479 ; HASWELL-NEXT: vminps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
2480 ; HASWELL-NEXT: vminps (%rdi), %ymm0, %ymm0 # sched: [3:1.00]
2481 ; HASWELL-NEXT: retq # sched: [2:1.00]
2483 ; BROADWELL-LABEL: test_minps:
2484 ; BROADWELL: # BB#0:
2485 ; BROADWELL-NEXT: vminps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
2486 ; BROADWELL-NEXT: vminps (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
2487 ; BROADWELL-NEXT: retq # sched: [7:1.00]
2489 ; SKYLAKE-LABEL: test_minps:
2491 ; SKYLAKE-NEXT: vminps %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
2492 ; SKYLAKE-NEXT: vminps (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
2493 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
2495 ; SKX-LABEL: test_minps:
2497 ; SKX-NEXT: vminps %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
2498 ; SKX-NEXT: vminps (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
2499 ; SKX-NEXT: retq # sched: [7:1.00]
2501 ; BTVER2-LABEL: test_minps:
2503 ; BTVER2-NEXT: vminps %ymm1, %ymm0, %ymm0 # sched: [2:2.00]
2504 ; BTVER2-NEXT: vminps (%rdi), %ymm0, %ymm0 # sched: [7:2.00]
2505 ; BTVER2-NEXT: retq # sched: [4:1.00]
2507 ; ZNVER1-LABEL: test_minps:
2509 ; ZNVER1-NEXT: vminps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
2510 ; ZNVER1-NEXT: vminps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
2511 ; ZNVER1-NEXT: retq # sched: [1:0.50]
2512 %1 = call <8 x float> @llvm.x86.avx.min.ps.256(<8 x float> %a0, <8 x float> %a1)
2513 %2 = load <8 x float>, <8 x float> *%a2, align 32
2514 %3 = call <8 x float> @llvm.x86.avx.min.ps.256(<8 x float> %1, <8 x float> %2)
2517 declare <8 x float> @llvm.x86.avx.min.ps.256(<8 x float>, <8 x float>) nounwind readnone
2519 define <4 x double> @test_movapd(<4 x double> *%a0, <4 x double> *%a1) {
2520 ; GENERIC-LABEL: test_movapd:
2522 ; GENERIC-NEXT: vmovapd (%rdi), %ymm0 # sched: [7:0.50]
2523 ; GENERIC-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
2524 ; GENERIC-NEXT: vmovapd %ymm0, (%rsi) # sched: [5:1.00]
2525 ; GENERIC-NEXT: retq # sched: [1:1.00]
2527 ; SANDY-LABEL: test_movapd:
2529 ; SANDY-NEXT: vmovapd (%rdi), %ymm0 # sched: [7:0.50]
2530 ; SANDY-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
2531 ; SANDY-NEXT: vmovapd %ymm0, (%rsi) # sched: [5:1.00]
2532 ; SANDY-NEXT: retq # sched: [1:1.00]
2534 ; HASWELL-LABEL: test_movapd:
2536 ; HASWELL-NEXT: vmovapd (%rdi), %ymm0 # sched: [1:0.50]
2537 ; HASWELL-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
2538 ; HASWELL-NEXT: vmovapd %ymm0, (%rsi) # sched: [1:1.00]
2539 ; HASWELL-NEXT: retq # sched: [2:1.00]
2541 ; BROADWELL-LABEL: test_movapd:
2542 ; BROADWELL: # BB#0:
2543 ; BROADWELL-NEXT: vmovapd (%rdi), %ymm0 # sched: [6:0.50]
2544 ; BROADWELL-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
2545 ; BROADWELL-NEXT: vmovapd %ymm0, (%rsi) # sched: [1:1.00]
2546 ; BROADWELL-NEXT: retq # sched: [7:1.00]
2548 ; SKYLAKE-LABEL: test_movapd:
2550 ; SKYLAKE-NEXT: vmovapd (%rdi), %ymm0 # sched: [7:0.50]
2551 ; SKYLAKE-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [4:0.50]
2552 ; SKYLAKE-NEXT: vmovapd %ymm0, (%rsi) # sched: [1:1.00]
2553 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
2555 ; SKX-LABEL: test_movapd:
2557 ; SKX-NEXT: vmovapd (%rdi), %ymm0 # sched: [7:0.50]
2558 ; SKX-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [4:0.33]
2559 ; SKX-NEXT: vmovapd %ymm0, (%rsi) # sched: [1:1.00]
2560 ; SKX-NEXT: retq # sched: [7:1.00]
2562 ; BTVER2-LABEL: test_movapd:
2564 ; BTVER2-NEXT: vmovapd (%rdi), %ymm0 # sched: [5:1.00]
2565 ; BTVER2-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:2.00]
2566 ; BTVER2-NEXT: vmovapd %ymm0, (%rsi) # sched: [1:1.00]
2567 ; BTVER2-NEXT: retq # sched: [4:1.00]
2569 ; ZNVER1-LABEL: test_movapd:
2571 ; ZNVER1-NEXT: vmovapd (%rdi), %ymm0 # sched: [8:0.50]
2572 ; ZNVER1-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
2573 ; ZNVER1-NEXT: vmovapd %ymm0, (%rsi) # sched: [1:0.50]
2574 ; ZNVER1-NEXT: retq # sched: [1:0.50]
2575 %1 = load <4 x double>, <4 x double> *%a0, align 32
2576 %2 = fadd <4 x double> %1, %1
2577 store <4 x double> %2, <4 x double> *%a1, align 32
2581 define <8 x float> @test_movaps(<8 x float> *%a0, <8 x float> *%a1) {
2582 ; GENERIC-LABEL: test_movaps:
2584 ; GENERIC-NEXT: vmovaps (%rdi), %ymm0 # sched: [7:0.50]
2585 ; GENERIC-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
2586 ; GENERIC-NEXT: vmovaps %ymm0, (%rsi) # sched: [5:1.00]
2587 ; GENERIC-NEXT: retq # sched: [1:1.00]
2589 ; SANDY-LABEL: test_movaps:
2591 ; SANDY-NEXT: vmovaps (%rdi), %ymm0 # sched: [7:0.50]
2592 ; SANDY-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
2593 ; SANDY-NEXT: vmovaps %ymm0, (%rsi) # sched: [5:1.00]
2594 ; SANDY-NEXT: retq # sched: [1:1.00]
2596 ; HASWELL-LABEL: test_movaps:
2598 ; HASWELL-NEXT: vmovaps (%rdi), %ymm0 # sched: [1:0.50]
2599 ; HASWELL-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
2600 ; HASWELL-NEXT: vmovaps %ymm0, (%rsi) # sched: [1:1.00]
2601 ; HASWELL-NEXT: retq # sched: [2:1.00]
2603 ; BROADWELL-LABEL: test_movaps:
2604 ; BROADWELL: # BB#0:
2605 ; BROADWELL-NEXT: vmovaps (%rdi), %ymm0 # sched: [6:0.50]
2606 ; BROADWELL-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
2607 ; BROADWELL-NEXT: vmovaps %ymm0, (%rsi) # sched: [1:1.00]
2608 ; BROADWELL-NEXT: retq # sched: [7:1.00]
2610 ; SKYLAKE-LABEL: test_movaps:
2612 ; SKYLAKE-NEXT: vmovaps (%rdi), %ymm0 # sched: [7:0.50]
2613 ; SKYLAKE-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [4:0.50]
2614 ; SKYLAKE-NEXT: vmovaps %ymm0, (%rsi) # sched: [1:1.00]
2615 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
2617 ; SKX-LABEL: test_movaps:
2619 ; SKX-NEXT: vmovaps (%rdi), %ymm0 # sched: [7:0.50]
2620 ; SKX-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [4:0.33]
2621 ; SKX-NEXT: vmovaps %ymm0, (%rsi) # sched: [1:1.00]
2622 ; SKX-NEXT: retq # sched: [7:1.00]
2624 ; BTVER2-LABEL: test_movaps:
2626 ; BTVER2-NEXT: vmovaps (%rdi), %ymm0 # sched: [5:1.00]
2627 ; BTVER2-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:2.00]
2628 ; BTVER2-NEXT: vmovaps %ymm0, (%rsi) # sched: [1:1.00]
2629 ; BTVER2-NEXT: retq # sched: [4:1.00]
2631 ; ZNVER1-LABEL: test_movaps:
2633 ; ZNVER1-NEXT: vmovaps (%rdi), %ymm0 # sched: [8:0.50]
2634 ; ZNVER1-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
2635 ; ZNVER1-NEXT: vmovaps %ymm0, (%rsi) # sched: [1:0.50]
2636 ; ZNVER1-NEXT: retq # sched: [1:0.50]
2637 %1 = load <8 x float>, <8 x float> *%a0, align 32
2638 %2 = fadd <8 x float> %1, %1
2639 store <8 x float> %2, <8 x float> *%a1, align 32
2643 define <4 x double> @test_movddup(<4 x double> %a0, <4 x double> *%a1) {
2644 ; GENERIC-LABEL: test_movddup:
2646 ; GENERIC-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] sched: [1:1.00]
2647 ; GENERIC-NEXT: vmovddup {{.*#+}} ymm1 = mem[0,0,2,2] sched: [7:0.50]
2648 ; GENERIC-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
2649 ; GENERIC-NEXT: retq # sched: [1:1.00]
2651 ; SANDY-LABEL: test_movddup:
2653 ; SANDY-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] sched: [1:1.00]
2654 ; SANDY-NEXT: vmovddup {{.*#+}} ymm1 = mem[0,0,2,2] sched: [7:0.50]
2655 ; SANDY-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
2656 ; SANDY-NEXT: retq # sched: [1:1.00]
2658 ; HASWELL-LABEL: test_movddup:
2660 ; HASWELL-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] sched: [1:1.00]
2661 ; HASWELL-NEXT: vmovddup {{.*#+}} ymm1 = mem[0,0,2,2] sched: [1:0.50]
2662 ; HASWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
2663 ; HASWELL-NEXT: retq # sched: [2:1.00]
2665 ; BROADWELL-LABEL: test_movddup:
2666 ; BROADWELL: # BB#0:
2667 ; BROADWELL-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] sched: [1:1.00]
2668 ; BROADWELL-NEXT: vmovddup {{.*#+}} ymm1 = mem[0,0,2,2] sched: [6:0.50]
2669 ; BROADWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
2670 ; BROADWELL-NEXT: retq # sched: [7:1.00]
2672 ; SKYLAKE-LABEL: test_movddup:
2674 ; SKYLAKE-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] sched: [1:1.00]
2675 ; SKYLAKE-NEXT: vmovddup {{.*#+}} ymm1 = mem[0,0,2,2] sched: [7:0.50]
2676 ; SKYLAKE-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
2677 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
2679 ; SKX-LABEL: test_movddup:
2681 ; SKX-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] sched: [1:1.00]
2682 ; SKX-NEXT: vmovddup {{.*#+}} ymm1 = mem[0,0,2,2] sched: [7:0.50]
2683 ; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
2684 ; SKX-NEXT: retq # sched: [7:1.00]
2686 ; BTVER2-LABEL: test_movddup:
2688 ; BTVER2-NEXT: vmovddup {{.*#+}} ymm1 = mem[0,0,2,2] sched: [5:1.00]
2689 ; BTVER2-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] sched: [1:0.50]
2690 ; BTVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
2691 ; BTVER2-NEXT: retq # sched: [4:1.00]
2693 ; ZNVER1-LABEL: test_movddup:
2695 ; ZNVER1-NEXT: vmovddup {{.*#+}} ymm1 = mem[0,0,2,2] sched: [8:0.50]
2696 ; ZNVER1-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] sched: [1:0.50]
2697 ; ZNVER1-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
2698 ; ZNVER1-NEXT: retq # sched: [1:0.50]
2699 %1 = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
2700 %2 = load <4 x double>, <4 x double> *%a1, align 32
2701 %3 = shufflevector <4 x double> %2, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
2702 %4 = fadd <4 x double> %1, %3
2706 define i32 @test_movmskpd(<4 x double> %a0) {
2707 ; GENERIC-LABEL: test_movmskpd:
2709 ; GENERIC-NEXT: vmovmskpd %ymm0, %eax # sched: [2:1.00]
2710 ; GENERIC-NEXT: vzeroupper
2711 ; GENERIC-NEXT: retq # sched: [1:1.00]
2713 ; SANDY-LABEL: test_movmskpd:
2715 ; SANDY-NEXT: vmovmskpd %ymm0, %eax # sched: [2:1.00]
2716 ; SANDY-NEXT: vzeroupper
2717 ; SANDY-NEXT: retq # sched: [1:1.00]
2719 ; HASWELL-LABEL: test_movmskpd:
2721 ; HASWELL-NEXT: vmovmskpd %ymm0, %eax # sched: [3:1.00]
2722 ; HASWELL-NEXT: vzeroupper # sched: [4:1.00]
2723 ; HASWELL-NEXT: retq # sched: [2:1.00]
2725 ; BROADWELL-LABEL: test_movmskpd:
2726 ; BROADWELL: # BB#0:
2727 ; BROADWELL-NEXT: vmovmskpd %ymm0, %eax # sched: [3:1.00]
2728 ; BROADWELL-NEXT: vzeroupper # sched: [4:1.00]
2729 ; BROADWELL-NEXT: retq # sched: [7:1.00]
2731 ; SKYLAKE-LABEL: test_movmskpd:
2733 ; SKYLAKE-NEXT: vmovmskpd %ymm0, %eax # sched: [2:1.00]
2734 ; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00]
2735 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
2737 ; SKX-LABEL: test_movmskpd:
2739 ; SKX-NEXT: vmovmskpd %ymm0, %eax # sched: [2:1.00]
2740 ; SKX-NEXT: vzeroupper # sched: [4:1.00]
2741 ; SKX-NEXT: retq # sched: [7:1.00]
2743 ; BTVER2-LABEL: test_movmskpd:
2745 ; BTVER2-NEXT: vmovmskpd %ymm0, %eax # sched: [3:1.00]
2746 ; BTVER2-NEXT: retq # sched: [4:1.00]
2748 ; ZNVER1-LABEL: test_movmskpd:
2750 ; ZNVER1-NEXT: vmovmskpd %ymm0, %eax # sched: [1:1.00]
2751 ; ZNVER1-NEXT: vzeroupper # sched: [100:?]
2752 ; ZNVER1-NEXT: retq # sched: [1:0.50]
2753 %1 = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> %a0)
2756 declare i32 @llvm.x86.avx.movmsk.pd.256(<4 x double>) nounwind readnone
2758 define i32 @test_movmskps(<8 x float> %a0) {
2759 ; GENERIC-LABEL: test_movmskps:
2761 ; GENERIC-NEXT: vmovmskps %ymm0, %eax # sched: [2:1.00]
2762 ; GENERIC-NEXT: vzeroupper
2763 ; GENERIC-NEXT: retq # sched: [1:1.00]
2765 ; SANDY-LABEL: test_movmskps:
2767 ; SANDY-NEXT: vmovmskps %ymm0, %eax # sched: [2:1.00]
2768 ; SANDY-NEXT: vzeroupper
2769 ; SANDY-NEXT: retq # sched: [1:1.00]
2771 ; HASWELL-LABEL: test_movmskps:
2773 ; HASWELL-NEXT: vmovmskps %ymm0, %eax # sched: [3:1.00]
2774 ; HASWELL-NEXT: vzeroupper # sched: [4:1.00]
2775 ; HASWELL-NEXT: retq # sched: [2:1.00]
2777 ; BROADWELL-LABEL: test_movmskps:
2778 ; BROADWELL: # BB#0:
2779 ; BROADWELL-NEXT: vmovmskps %ymm0, %eax # sched: [3:1.00]
2780 ; BROADWELL-NEXT: vzeroupper # sched: [4:1.00]
2781 ; BROADWELL-NEXT: retq # sched: [7:1.00]
2783 ; SKYLAKE-LABEL: test_movmskps:
2785 ; SKYLAKE-NEXT: vmovmskps %ymm0, %eax # sched: [2:1.00]
2786 ; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00]
2787 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
2789 ; SKX-LABEL: test_movmskps:
2791 ; SKX-NEXT: vmovmskps %ymm0, %eax # sched: [2:1.00]
2792 ; SKX-NEXT: vzeroupper # sched: [4:1.00]
2793 ; SKX-NEXT: retq # sched: [7:1.00]
2795 ; BTVER2-LABEL: test_movmskps:
2797 ; BTVER2-NEXT: vmovmskps %ymm0, %eax # sched: [3:1.00]
2798 ; BTVER2-NEXT: retq # sched: [4:1.00]
2800 ; ZNVER1-LABEL: test_movmskps:
2802 ; ZNVER1-NEXT: vmovmskps %ymm0, %eax # sched: [1:1.00]
2803 ; ZNVER1-NEXT: vzeroupper # sched: [100:?]
2804 ; ZNVER1-NEXT: retq # sched: [1:0.50]
2805 %1 = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %a0)
2808 declare i32 @llvm.x86.avx.movmsk.ps.256(<8 x float>) nounwind readnone
2810 define void @test_movntdq(<4 x i64> %a0, <4 x i64> *%a1) {
2811 ; GENERIC-LABEL: test_movntdq:
2813 ; GENERIC-NEXT: #APP
2814 ; GENERIC-NEXT: vmovntdq %ymm0, (%rdi) # sched: [5:1.00]
2815 ; GENERIC-NEXT: #NO_APP
2816 ; GENERIC-NEXT: vzeroupper
2817 ; GENERIC-NEXT: retq # sched: [1:1.00]
2819 ; SANDY-LABEL: test_movntdq:
2822 ; SANDY-NEXT: vmovntdq %ymm0, (%rdi) # sched: [5:1.00]
2823 ; SANDY-NEXT: #NO_APP
2824 ; SANDY-NEXT: vzeroupper
2825 ; SANDY-NEXT: retq # sched: [1:1.00]
2827 ; HASWELL-LABEL: test_movntdq:
2829 ; HASWELL-NEXT: #APP
2830 ; HASWELL-NEXT: vmovntdq %ymm0, (%rdi) # sched: [1:1.00]
2831 ; HASWELL-NEXT: #NO_APP
2832 ; HASWELL-NEXT: vzeroupper # sched: [4:1.00]
2833 ; HASWELL-NEXT: retq # sched: [2:1.00]
2835 ; BROADWELL-LABEL: test_movntdq:
2836 ; BROADWELL: # BB#0:
2837 ; BROADWELL-NEXT: #APP
2838 ; BROADWELL-NEXT: vmovntdq %ymm0, (%rdi) # sched: [1:1.00]
2839 ; BROADWELL-NEXT: #NO_APP
2840 ; BROADWELL-NEXT: vzeroupper # sched: [4:1.00]
2841 ; BROADWELL-NEXT: retq # sched: [7:1.00]
2843 ; SKYLAKE-LABEL: test_movntdq:
2845 ; SKYLAKE-NEXT: #APP
2846 ; SKYLAKE-NEXT: vmovntdq %ymm0, (%rdi) # sched: [1:1.00]
2847 ; SKYLAKE-NEXT: #NO_APP
2848 ; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00]
2849 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
2851 ; SKX-LABEL: test_movntdq:
2854 ; SKX-NEXT: vmovntdq %ymm0, (%rdi) # sched: [1:1.00]
2856 ; SKX-NEXT: vzeroupper # sched: [4:1.00]
2857 ; SKX-NEXT: retq # sched: [7:1.00]
2859 ; BTVER2-LABEL: test_movntdq:
2862 ; BTVER2-NEXT: vmovntdq %ymm0, (%rdi) # sched: [3:2.00]
2863 ; BTVER2-NEXT: #NO_APP
2864 ; BTVER2-NEXT: retq # sched: [4:1.00]
2866 ; ZNVER1-LABEL: test_movntdq:
2869 ; ZNVER1-NEXT: vmovntdq %ymm0, (%rdi) # sched: [1:0.50]
2870 ; ZNVER1-NEXT: #NO_APP
2871 ; ZNVER1-NEXT: vzeroupper # sched: [100:?]
2872 ; ZNVER1-NEXT: retq # sched: [1:0.50]
2873 call void asm sideeffect "vmovntdq $0, $1", "x,*m"(<4 x i64> %a0, <4 x i64> *%a1)
2877 define <4 x double> @test_movntpd(<4 x double> %a0, <4 x double> *%a1) {
2878 ; GENERIC-LABEL: test_movntpd:
2880 ; GENERIC-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
2881 ; GENERIC-NEXT: vmovntpd %ymm0, (%rdi) # sched: [5:1.00]
2882 ; GENERIC-NEXT: retq # sched: [1:1.00]
2884 ; SANDY-LABEL: test_movntpd:
2886 ; SANDY-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
2887 ; SANDY-NEXT: vmovntpd %ymm0, (%rdi) # sched: [5:1.00]
2888 ; SANDY-NEXT: retq # sched: [1:1.00]
2890 ; HASWELL-LABEL: test_movntpd:
2892 ; HASWELL-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
2893 ; HASWELL-NEXT: vmovntpd %ymm0, (%rdi) # sched: [1:1.00]
2894 ; HASWELL-NEXT: retq # sched: [2:1.00]
2896 ; BROADWELL-LABEL: test_movntpd:
2897 ; BROADWELL: # BB#0:
2898 ; BROADWELL-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
2899 ; BROADWELL-NEXT: vmovntpd %ymm0, (%rdi) # sched: [1:1.00]
2900 ; BROADWELL-NEXT: retq # sched: [7:1.00]
2902 ; SKYLAKE-LABEL: test_movntpd:
2904 ; SKYLAKE-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [4:0.50]
2905 ; SKYLAKE-NEXT: vmovntpd %ymm0, (%rdi) # sched: [1:1.00]
2906 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
2908 ; SKX-LABEL: test_movntpd:
2910 ; SKX-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [4:0.33]
2911 ; SKX-NEXT: vmovntpd %ymm0, (%rdi) # sched: [1:1.00]
2912 ; SKX-NEXT: retq # sched: [7:1.00]
2914 ; BTVER2-LABEL: test_movntpd:
2916 ; BTVER2-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:2.00]
2917 ; BTVER2-NEXT: vmovntpd %ymm0, (%rdi) # sched: [3:2.00]
2918 ; BTVER2-NEXT: retq # sched: [4:1.00]
2920 ; ZNVER1-LABEL: test_movntpd:
2922 ; ZNVER1-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
2923 ; ZNVER1-NEXT: vmovntpd %ymm0, (%rdi) # sched: [1:0.50]
2924 ; ZNVER1-NEXT: retq # sched: [1:0.50]
2925 %1 = fadd <4 x double> %a0, %a0
2926 store <4 x double> %1, <4 x double> *%a1, align 32, !nontemporal !0
2930 define <8 x float> @test_movntps(<8 x float> %a0, <8 x float> *%a1) {
2931 ; GENERIC-LABEL: test_movntps:
2933 ; GENERIC-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
2934 ; GENERIC-NEXT: vmovntps %ymm0, (%rdi) # sched: [5:1.00]
2935 ; GENERIC-NEXT: retq # sched: [1:1.00]
2937 ; SANDY-LABEL: test_movntps:
2939 ; SANDY-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
2940 ; SANDY-NEXT: vmovntps %ymm0, (%rdi) # sched: [5:1.00]
2941 ; SANDY-NEXT: retq # sched: [1:1.00]
2943 ; HASWELL-LABEL: test_movntps:
2945 ; HASWELL-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
2946 ; HASWELL-NEXT: vmovntps %ymm0, (%rdi) # sched: [1:1.00]
2947 ; HASWELL-NEXT: retq # sched: [2:1.00]
2949 ; BROADWELL-LABEL: test_movntps:
2950 ; BROADWELL: # BB#0:
2951 ; BROADWELL-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
2952 ; BROADWELL-NEXT: vmovntps %ymm0, (%rdi) # sched: [1:1.00]
2953 ; BROADWELL-NEXT: retq # sched: [7:1.00]
2955 ; SKYLAKE-LABEL: test_movntps:
2957 ; SKYLAKE-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [4:0.50]
2958 ; SKYLAKE-NEXT: vmovntps %ymm0, (%rdi) # sched: [1:1.00]
2959 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
2961 ; SKX-LABEL: test_movntps:
2963 ; SKX-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [4:0.33]
2964 ; SKX-NEXT: vmovntps %ymm0, (%rdi) # sched: [1:1.00]
2965 ; SKX-NEXT: retq # sched: [7:1.00]
2967 ; BTVER2-LABEL: test_movntps:
2969 ; BTVER2-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:2.00]
2970 ; BTVER2-NEXT: vmovntps %ymm0, (%rdi) # sched: [3:2.00]
2971 ; BTVER2-NEXT: retq # sched: [4:1.00]
2973 ; ZNVER1-LABEL: test_movntps:
2975 ; ZNVER1-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
2976 ; ZNVER1-NEXT: vmovntps %ymm0, (%rdi) # sched: [1:0.50]
2977 ; ZNVER1-NEXT: retq # sched: [1:0.50]
2978 %1 = fadd <8 x float> %a0, %a0
2979 store <8 x float> %1, <8 x float> *%a1, align 32, !nontemporal !0
2983 define <8 x float> @test_movshdup(<8 x float> %a0, <8 x float> *%a1) {
2984 ; GENERIC-LABEL: test_movshdup:
2986 ; GENERIC-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] sched: [1:1.00]
2987 ; GENERIC-NEXT: vmovshdup {{.*#+}} ymm1 = mem[1,1,3,3,5,5,7,7] sched: [7:0.50]
2988 ; GENERIC-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
2989 ; GENERIC-NEXT: retq # sched: [1:1.00]
2991 ; SANDY-LABEL: test_movshdup:
2993 ; SANDY-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] sched: [1:1.00]
2994 ; SANDY-NEXT: vmovshdup {{.*#+}} ymm1 = mem[1,1,3,3,5,5,7,7] sched: [7:0.50]
2995 ; SANDY-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
2996 ; SANDY-NEXT: retq # sched: [1:1.00]
2998 ; HASWELL-LABEL: test_movshdup:
3000 ; HASWELL-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] sched: [1:1.00]
3001 ; HASWELL-NEXT: vmovshdup {{.*#+}} ymm1 = mem[1,1,3,3,5,5,7,7] sched: [1:0.50]
3002 ; HASWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
3003 ; HASWELL-NEXT: retq # sched: [2:1.00]
3005 ; BROADWELL-LABEL: test_movshdup:
3006 ; BROADWELL: # BB#0:
3007 ; BROADWELL-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] sched: [1:1.00]
3008 ; BROADWELL-NEXT: vmovshdup {{.*#+}} ymm1 = mem[1,1,3,3,5,5,7,7] sched: [6:0.50]
3009 ; BROADWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
3010 ; BROADWELL-NEXT: retq # sched: [7:1.00]
3012 ; SKYLAKE-LABEL: test_movshdup:
3014 ; SKYLAKE-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] sched: [1:1.00]
3015 ; SKYLAKE-NEXT: vmovshdup {{.*#+}} ymm1 = mem[1,1,3,3,5,5,7,7] sched: [7:0.50]
3016 ; SKYLAKE-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
3017 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
3019 ; SKX-LABEL: test_movshdup:
3021 ; SKX-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] sched: [1:1.00]
3022 ; SKX-NEXT: vmovshdup {{.*#+}} ymm1 = mem[1,1,3,3,5,5,7,7] sched: [7:0.50]
3023 ; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
3024 ; SKX-NEXT: retq # sched: [7:1.00]
3026 ; BTVER2-LABEL: test_movshdup:
3028 ; BTVER2-NEXT: vmovshdup {{.*#+}} ymm1 = mem[1,1,3,3,5,5,7,7] sched: [5:1.00]
3029 ; BTVER2-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] sched: [1:0.50]
3030 ; BTVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
3031 ; BTVER2-NEXT: retq # sched: [4:1.00]
3033 ; ZNVER1-LABEL: test_movshdup:
3035 ; ZNVER1-NEXT: vmovshdup {{.*#+}} ymm1 = mem[1,1,3,3,5,5,7,7] sched: [8:0.50]
3036 ; ZNVER1-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] sched: [1:0.50]
3037 ; ZNVER1-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
3038 ; ZNVER1-NEXT: retq # sched: [1:0.50]
3039 %1 = shufflevector <8 x float> %a0, <8 x float> undef, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
3040 %2 = load <8 x float>, <8 x float> *%a1, align 32
3041 %3 = shufflevector <8 x float> %2, <8 x float> undef, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
3042 %4 = fadd <8 x float> %1, %3
3046 define <8 x float> @test_movsldup(<8 x float> %a0, <8 x float> *%a1) {
3047 ; GENERIC-LABEL: test_movsldup:
3049 ; GENERIC-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] sched: [1:1.00]
3050 ; GENERIC-NEXT: vmovsldup {{.*#+}} ymm1 = mem[0,0,2,2,4,4,6,6] sched: [7:0.50]
3051 ; GENERIC-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
3052 ; GENERIC-NEXT: retq # sched: [1:1.00]
3054 ; SANDY-LABEL: test_movsldup:
3056 ; SANDY-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] sched: [1:1.00]
3057 ; SANDY-NEXT: vmovsldup {{.*#+}} ymm1 = mem[0,0,2,2,4,4,6,6] sched: [7:0.50]
3058 ; SANDY-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
3059 ; SANDY-NEXT: retq # sched: [1:1.00]
3061 ; HASWELL-LABEL: test_movsldup:
3063 ; HASWELL-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] sched: [1:1.00]
3064 ; HASWELL-NEXT: vmovsldup {{.*#+}} ymm1 = mem[0,0,2,2,4,4,6,6] sched: [1:0.50]
3065 ; HASWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
3066 ; HASWELL-NEXT: retq # sched: [2:1.00]
3068 ; BROADWELL-LABEL: test_movsldup:
3069 ; BROADWELL: # BB#0:
3070 ; BROADWELL-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] sched: [1:1.00]
3071 ; BROADWELL-NEXT: vmovsldup {{.*#+}} ymm1 = mem[0,0,2,2,4,4,6,6] sched: [6:0.50]
3072 ; BROADWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
3073 ; BROADWELL-NEXT: retq # sched: [7:1.00]
3075 ; SKYLAKE-LABEL: test_movsldup:
3077 ; SKYLAKE-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] sched: [1:1.00]
3078 ; SKYLAKE-NEXT: vmovsldup {{.*#+}} ymm1 = mem[0,0,2,2,4,4,6,6] sched: [7:0.50]
3079 ; SKYLAKE-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
3080 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
3082 ; SKX-LABEL: test_movsldup:
3084 ; SKX-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] sched: [1:1.00]
3085 ; SKX-NEXT: vmovsldup {{.*#+}} ymm1 = mem[0,0,2,2,4,4,6,6] sched: [7:0.50]
3086 ; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
3087 ; SKX-NEXT: retq # sched: [7:1.00]
3089 ; BTVER2-LABEL: test_movsldup:
3091 ; BTVER2-NEXT: vmovsldup {{.*#+}} ymm1 = mem[0,0,2,2,4,4,6,6] sched: [5:1.00]
3092 ; BTVER2-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] sched: [1:0.50]
3093 ; BTVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
3094 ; BTVER2-NEXT: retq # sched: [4:1.00]
3096 ; ZNVER1-LABEL: test_movsldup:
3098 ; ZNVER1-NEXT: vmovsldup {{.*#+}} ymm1 = mem[0,0,2,2,4,4,6,6] sched: [8:0.50]
3099 ; ZNVER1-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] sched: [1:0.50]
3100 ; ZNVER1-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
3101 ; ZNVER1-NEXT: retq # sched: [1:0.50]
3102 %1 = shufflevector <8 x float> %a0, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
3103 %2 = load <8 x float>, <8 x float> *%a1, align 32
3104 %3 = shufflevector <8 x float> %2, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
3105 %4 = fadd <8 x float> %1, %3
3109 define <4 x double> @test_movupd(<4 x double> *%a0, <4 x double> *%a1) {
3110 ; GENERIC-LABEL: test_movupd:
3112 ; GENERIC-NEXT: vmovupd (%rdi), %ymm0 # sched: [7:0.50]
3113 ; GENERIC-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
3114 ; GENERIC-NEXT: vmovupd %ymm0, (%rsi) # sched: [5:1.00]
3115 ; GENERIC-NEXT: retq # sched: [1:1.00]
3117 ; SANDY-LABEL: test_movupd:
3119 ; SANDY-NEXT: vmovups (%rdi), %xmm0 # sched: [6:0.50]
3120 ; SANDY-NEXT: vinsertf128 $1, 16(%rdi), %ymm0, %ymm0 # sched: [7:0.50]
3121 ; SANDY-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
3122 ; SANDY-NEXT: vextractf128 $1, %ymm0, 16(%rsi) # sched: [5:1.00]
3123 ; SANDY-NEXT: vmovupd %xmm0, (%rsi) # sched: [5:1.00]
3124 ; SANDY-NEXT: retq # sched: [1:1.00]
3126 ; HASWELL-LABEL: test_movupd:
3128 ; HASWELL-NEXT: vmovupd (%rdi), %ymm0 # sched: [1:0.50]
3129 ; HASWELL-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
3130 ; HASWELL-NEXT: vmovupd %ymm0, (%rsi) # sched: [1:1.00]
3131 ; HASWELL-NEXT: retq # sched: [2:1.00]
3133 ; BROADWELL-LABEL: test_movupd:
3134 ; BROADWELL: # BB#0:
3135 ; BROADWELL-NEXT: vmovupd (%rdi), %ymm0 # sched: [6:0.50]
3136 ; BROADWELL-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
3137 ; BROADWELL-NEXT: vmovupd %ymm0, (%rsi) # sched: [1:1.00]
3138 ; BROADWELL-NEXT: retq # sched: [7:1.00]
3140 ; SKYLAKE-LABEL: test_movupd:
3142 ; SKYLAKE-NEXT: vmovupd (%rdi), %ymm0 # sched: [7:0.50]
3143 ; SKYLAKE-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [4:0.50]
3144 ; SKYLAKE-NEXT: vmovupd %ymm0, (%rsi) # sched: [1:1.00]
3145 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
3147 ; SKX-LABEL: test_movupd:
3149 ; SKX-NEXT: vmovupd (%rdi), %ymm0 # sched: [7:0.50]
3150 ; SKX-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [4:0.33]
3151 ; SKX-NEXT: vmovupd %ymm0, (%rsi) # sched: [1:1.00]
3152 ; SKX-NEXT: retq # sched: [7:1.00]
3154 ; BTVER2-LABEL: test_movupd:
3156 ; BTVER2-NEXT: vmovupd (%rdi), %ymm0 # sched: [5:1.00]
3157 ; BTVER2-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:2.00]
3158 ; BTVER2-NEXT: vmovupd %ymm0, (%rsi) # sched: [1:1.00]
3159 ; BTVER2-NEXT: retq # sched: [4:1.00]
3161 ; ZNVER1-LABEL: test_movupd:
3163 ; ZNVER1-NEXT: vmovupd (%rdi), %ymm0 # sched: [8:0.50]
3164 ; ZNVER1-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
3165 ; ZNVER1-NEXT: vmovupd %ymm0, (%rsi) # sched: [1:0.50]
3166 ; ZNVER1-NEXT: retq # sched: [1:0.50]
3167 %1 = load <4 x double>, <4 x double> *%a0, align 1
3168 %2 = fadd <4 x double> %1, %1
3169 store <4 x double> %2, <4 x double> *%a1, align 1
3173 define <8 x float> @test_movups(<8 x float> *%a0, <8 x float> *%a1) {
3174 ; GENERIC-LABEL: test_movups:
3176 ; GENERIC-NEXT: vmovups (%rdi), %ymm0 # sched: [7:0.50]
3177 ; GENERIC-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
3178 ; GENERIC-NEXT: vmovups %ymm0, (%rsi) # sched: [5:1.00]
3179 ; GENERIC-NEXT: retq # sched: [1:1.00]
3181 ; SANDY-LABEL: test_movups:
3183 ; SANDY-NEXT: vmovups (%rdi), %xmm0 # sched: [6:0.50]
3184 ; SANDY-NEXT: vinsertf128 $1, 16(%rdi), %ymm0, %ymm0 # sched: [7:0.50]
3185 ; SANDY-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
3186 ; SANDY-NEXT: vextractf128 $1, %ymm0, 16(%rsi) # sched: [5:1.00]
3187 ; SANDY-NEXT: vmovups %xmm0, (%rsi) # sched: [5:1.00]
3188 ; SANDY-NEXT: retq # sched: [1:1.00]
3190 ; HASWELL-LABEL: test_movups:
3192 ; HASWELL-NEXT: vmovups (%rdi), %ymm0 # sched: [1:0.50]
3193 ; HASWELL-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
3194 ; HASWELL-NEXT: vmovups %ymm0, (%rsi) # sched: [1:1.00]
3195 ; HASWELL-NEXT: retq # sched: [2:1.00]
3197 ; BROADWELL-LABEL: test_movups:
3198 ; BROADWELL: # BB#0:
3199 ; BROADWELL-NEXT: vmovups (%rdi), %ymm0 # sched: [6:0.50]
3200 ; BROADWELL-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
3201 ; BROADWELL-NEXT: vmovups %ymm0, (%rsi) # sched: [1:1.00]
3202 ; BROADWELL-NEXT: retq # sched: [7:1.00]
3204 ; SKYLAKE-LABEL: test_movups:
3206 ; SKYLAKE-NEXT: vmovups (%rdi), %ymm0 # sched: [7:0.50]
3207 ; SKYLAKE-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [4:0.50]
3208 ; SKYLAKE-NEXT: vmovups %ymm0, (%rsi) # sched: [1:1.00]
3209 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
3211 ; SKX-LABEL: test_movups:
3213 ; SKX-NEXT: vmovups (%rdi), %ymm0 # sched: [7:0.50]
3214 ; SKX-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [4:0.33]
3215 ; SKX-NEXT: vmovups %ymm0, (%rsi) # sched: [1:1.00]
3216 ; SKX-NEXT: retq # sched: [7:1.00]
3218 ; BTVER2-LABEL: test_movups:
3220 ; BTVER2-NEXT: vmovups (%rdi), %ymm0 # sched: [5:1.00]
3221 ; BTVER2-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:2.00]
3222 ; BTVER2-NEXT: vmovups %ymm0, (%rsi) # sched: [1:1.00]
3223 ; BTVER2-NEXT: retq # sched: [4:1.00]
3225 ; ZNVER1-LABEL: test_movups:
3227 ; ZNVER1-NEXT: vmovups (%rdi), %ymm0 # sched: [8:0.50]
3228 ; ZNVER1-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
3229 ; ZNVER1-NEXT: vmovups %ymm0, (%rsi) # sched: [1:0.50]
3230 ; ZNVER1-NEXT: retq # sched: [1:0.50]
3231 %1 = load <8 x float>, <8 x float> *%a0, align 1
3232 %2 = fadd <8 x float> %1, %1
3233 store <8 x float> %2, <8 x float> *%a1, align 1
3237 define <4 x double> @test_mulpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) {
3238 ; GENERIC-LABEL: test_mulpd:
3240 ; GENERIC-NEXT: vmulpd %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
3241 ; GENERIC-NEXT: vmulpd (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
3242 ; GENERIC-NEXT: retq # sched: [1:1.00]
3244 ; SANDY-LABEL: test_mulpd:
3246 ; SANDY-NEXT: vmulpd %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
3247 ; SANDY-NEXT: vmulpd (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
3248 ; SANDY-NEXT: retq # sched: [1:1.00]
3250 ; HASWELL-LABEL: test_mulpd:
3252 ; HASWELL-NEXT: vmulpd %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
3253 ; HASWELL-NEXT: vmulpd (%rdi), %ymm0, %ymm0 # sched: [5:0.50]
3254 ; HASWELL-NEXT: retq # sched: [2:1.00]
3256 ; BROADWELL-LABEL: test_mulpd:
3257 ; BROADWELL: # BB#0:
3258 ; BROADWELL-NEXT: vmulpd %ymm1, %ymm0, %ymm0 # sched: [3:0.50]
3259 ; BROADWELL-NEXT: vmulpd (%rdi), %ymm0, %ymm0 # sched: [9:0.50]
3260 ; BROADWELL-NEXT: retq # sched: [7:1.00]
3262 ; SKYLAKE-LABEL: test_mulpd:
3264 ; SKYLAKE-NEXT: vmulpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
3265 ; SKYLAKE-NEXT: vmulpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
3266 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
3268 ; SKX-LABEL: test_mulpd:
3270 ; SKX-NEXT: vmulpd %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
3271 ; SKX-NEXT: vmulpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
3272 ; SKX-NEXT: retq # sched: [7:1.00]
3274 ; BTVER2-LABEL: test_mulpd:
3276 ; BTVER2-NEXT: vmulpd %ymm1, %ymm0, %ymm0 # sched: [4:4.00]
3277 ; BTVER2-NEXT: vmulpd (%rdi), %ymm0, %ymm0 # sched: [9:4.00]
3278 ; BTVER2-NEXT: retq # sched: [4:1.00]
3280 ; ZNVER1-LABEL: test_mulpd:
3282 ; ZNVER1-NEXT: vmulpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
3283 ; ZNVER1-NEXT: vmulpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
3284 ; ZNVER1-NEXT: retq # sched: [1:0.50]
3285 %1 = fmul <4 x double> %a0, %a1
3286 %2 = load <4 x double>, <4 x double> *%a2, align 32
3287 %3 = fmul <4 x double> %1, %2
3291 define <8 x float> @test_mulps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) {
3292 ; GENERIC-LABEL: test_mulps:
3294 ; GENERIC-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
3295 ; GENERIC-NEXT: vmulps (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
3296 ; GENERIC-NEXT: retq # sched: [1:1.00]
3298 ; SANDY-LABEL: test_mulps:
3300 ; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
3301 ; SANDY-NEXT: vmulps (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
3302 ; SANDY-NEXT: retq # sched: [1:1.00]
3304 ; HASWELL-LABEL: test_mulps:
3306 ; HASWELL-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
3307 ; HASWELL-NEXT: vmulps (%rdi), %ymm0, %ymm0 # sched: [5:0.50]
3308 ; HASWELL-NEXT: retq # sched: [2:1.00]
3310 ; BROADWELL-LABEL: test_mulps:
3311 ; BROADWELL: # BB#0:
3312 ; BROADWELL-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [3:0.50]
3313 ; BROADWELL-NEXT: vmulps (%rdi), %ymm0, %ymm0 # sched: [9:0.50]
3314 ; BROADWELL-NEXT: retq # sched: [7:1.00]
3316 ; SKYLAKE-LABEL: test_mulps:
3318 ; SKYLAKE-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
3319 ; SKYLAKE-NEXT: vmulps (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
3320 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
3322 ; SKX-LABEL: test_mulps:
3324 ; SKX-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
3325 ; SKX-NEXT: vmulps (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
3326 ; SKX-NEXT: retq # sched: [7:1.00]
3328 ; BTVER2-LABEL: test_mulps:
3330 ; BTVER2-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [2:2.00]
3331 ; BTVER2-NEXT: vmulps (%rdi), %ymm0, %ymm0 # sched: [7:2.00]
3332 ; BTVER2-NEXT: retq # sched: [4:1.00]
3334 ; ZNVER1-LABEL: test_mulps:
3336 ; ZNVER1-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
3337 ; ZNVER1-NEXT: vmulps (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
3338 ; ZNVER1-NEXT: retq # sched: [1:0.50]
3339 %1 = fmul <8 x float> %a0, %a1
3340 %2 = load <8 x float>, <8 x float> *%a2, align 32
3341 %3 = fmul <8 x float> %1, %2
3345 define <4 x double> @orpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) {
3346 ; GENERIC-LABEL: orpd:
3348 ; GENERIC-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
3349 ; GENERIC-NEXT: vorpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
3350 ; GENERIC-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
3351 ; GENERIC-NEXT: retq # sched: [1:1.00]
3353 ; SANDY-LABEL: orpd:
3355 ; SANDY-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
3356 ; SANDY-NEXT: vorpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
3357 ; SANDY-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
3358 ; SANDY-NEXT: retq # sched: [1:1.00]
3360 ; HASWELL-LABEL: orpd:
3362 ; HASWELL-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
3363 ; HASWELL-NEXT: vorpd (%rdi), %ymm0, %ymm0 # sched: [1:1.00]
3364 ; HASWELL-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
3365 ; HASWELL-NEXT: retq # sched: [2:1.00]
3367 ; BROADWELL-LABEL: orpd:
3368 ; BROADWELL: # BB#0:
3369 ; BROADWELL-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
3370 ; BROADWELL-NEXT: vorpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
3371 ; BROADWELL-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
3372 ; BROADWELL-NEXT: retq # sched: [7:1.00]
3374 ; SKYLAKE-LABEL: orpd:
3376 ; SKYLAKE-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
3377 ; SKYLAKE-NEXT: vorpd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3378 ; SKYLAKE-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
3379 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
3383 ; SKX-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
3384 ; SKX-NEXT: vorpd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3385 ; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.33]
3386 ; SKX-NEXT: retq # sched: [7:1.00]
3388 ; BTVER2-LABEL: orpd:
3390 ; BTVER2-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3391 ; BTVER2-NEXT: vorpd (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
3392 ; BTVER2-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:2.00]
3393 ; BTVER2-NEXT: retq # sched: [4:1.00]
3395 ; ZNVER1-LABEL: orpd:
3397 ; ZNVER1-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
3398 ; ZNVER1-NEXT: vorpd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3399 ; ZNVER1-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
3400 ; ZNVER1-NEXT: retq # sched: [1:0.50]
3401 %1 = bitcast <4 x double> %a0 to <4 x i64>
3402 %2 = bitcast <4 x double> %a1 to <4 x i64>
3403 %3 = or <4 x i64> %1, %2
3404 %4 = load <4 x double>, <4 x double> *%a2, align 32
3405 %5 = bitcast <4 x double> %4 to <4 x i64>
3406 %6 = or <4 x i64> %3, %5
3407 %7 = bitcast <4 x i64> %6 to <4 x double>
3408 %8 = fadd <4 x double> %a1, %7
3412 define <8 x float> @test_orps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) {
3413 ; GENERIC-LABEL: test_orps:
3415 ; GENERIC-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
3416 ; GENERIC-NEXT: vorps (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
3417 ; GENERIC-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
3418 ; GENERIC-NEXT: retq # sched: [1:1.00]
3420 ; SANDY-LABEL: test_orps:
3422 ; SANDY-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
3423 ; SANDY-NEXT: vorps (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
3424 ; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
3425 ; SANDY-NEXT: retq # sched: [1:1.00]
3427 ; HASWELL-LABEL: test_orps:
3429 ; HASWELL-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
3430 ; HASWELL-NEXT: vorps (%rdi), %ymm0, %ymm0 # sched: [1:1.00]
3431 ; HASWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
3432 ; HASWELL-NEXT: retq # sched: [2:1.00]
3434 ; BROADWELL-LABEL: test_orps:
3435 ; BROADWELL: # BB#0:
3436 ; BROADWELL-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
3437 ; BROADWELL-NEXT: vorps (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
3438 ; BROADWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
3439 ; BROADWELL-NEXT: retq # sched: [7:1.00]
3441 ; SKYLAKE-LABEL: test_orps:
3443 ; SKYLAKE-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
3444 ; SKYLAKE-NEXT: vorps (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3445 ; SKYLAKE-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
3446 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
3448 ; SKX-LABEL: test_orps:
3450 ; SKX-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
3451 ; SKX-NEXT: vorps (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3452 ; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.33]
3453 ; SKX-NEXT: retq # sched: [7:1.00]
3455 ; BTVER2-LABEL: test_orps:
3457 ; BTVER2-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3458 ; BTVER2-NEXT: vorps (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
3459 ; BTVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:2.00]
3460 ; BTVER2-NEXT: retq # sched: [4:1.00]
3462 ; ZNVER1-LABEL: test_orps:
3464 ; ZNVER1-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
3465 ; ZNVER1-NEXT: vorps (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3466 ; ZNVER1-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
3467 ; ZNVER1-NEXT: retq # sched: [1:0.50]
3468 %1 = bitcast <8 x float> %a0 to <4 x i64>
3469 %2 = bitcast <8 x float> %a1 to <4 x i64>
3470 %3 = or <4 x i64> %1, %2
3471 %4 = load <8 x float>, <8 x float> *%a2, align 32
3472 %5 = bitcast <8 x float> %4 to <4 x i64>
3473 %6 = or <4 x i64> %3, %5
3474 %7 = bitcast <4 x i64> %6 to <8 x float>
3475 %8 = fadd <8 x float> %a1, %7
3479 define <4 x double> @test_perm2f128(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) {
3480 ; GENERIC-LABEL: test_perm2f128:
3482 ; GENERIC-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [1:1.00]
3483 ; GENERIC-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [8:1.00]
3484 ; GENERIC-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
3485 ; GENERIC-NEXT: retq # sched: [1:1.00]
3487 ; SANDY-LABEL: test_perm2f128:
3489 ; SANDY-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [1:1.00]
3490 ; SANDY-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [8:1.00]
3491 ; SANDY-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
3492 ; SANDY-NEXT: retq # sched: [1:1.00]
3494 ; HASWELL-LABEL: test_perm2f128:
3496 ; HASWELL-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [3:1.00]
3497 ; HASWELL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [3:1.00]
3498 ; HASWELL-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
3499 ; HASWELL-NEXT: retq # sched: [2:1.00]
3501 ; BROADWELL-LABEL: test_perm2f128:
3502 ; BROADWELL: # BB#0:
3503 ; BROADWELL-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [3:1.00]
3504 ; BROADWELL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [9:1.00]
3505 ; BROADWELL-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
3506 ; BROADWELL-NEXT: retq # sched: [7:1.00]
3508 ; SKYLAKE-LABEL: test_perm2f128:
3510 ; SKYLAKE-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [3:1.00]
3511 ; SKYLAKE-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [10:1.00]
3512 ; SKYLAKE-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
3513 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
3515 ; SKX-LABEL: test_perm2f128:
3517 ; SKX-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [3:1.00]
3518 ; SKX-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [10:1.00]
3519 ; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.33]
3520 ; SKX-NEXT: retq # sched: [7:1.00]
3522 ; BTVER2-LABEL: test_perm2f128:
3524 ; BTVER2-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [1:0.50]
3525 ; BTVER2-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [6:1.00]
3526 ; BTVER2-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:2.00]
3527 ; BTVER2-NEXT: retq # sched: [4:1.00]
3529 ; ZNVER1-LABEL: test_perm2f128:
3531 ; ZNVER1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [100:?]
3532 ; ZNVER1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [100:?]
3533 ; ZNVER1-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
3534 ; ZNVER1-NEXT: retq # sched: [1:0.50]
3535 %1 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
3536 %2 = load <4 x double>, <4 x double> *%a2, align 32
3537 %3 = shufflevector <4 x double> %a0, <4 x double> %2, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
3538 %4 = fadd <4 x double> %1, %3
3542 define <2 x double> @test_permilpd(<2 x double> %a0, <2 x double> *%a1) {
3543 ; GENERIC-LABEL: test_permilpd:
3545 ; GENERIC-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] sched: [1:1.00]
3546 ; GENERIC-NEXT: vpermilpd {{.*#+}} xmm1 = mem[1,0] sched: [7:1.00]
3547 ; GENERIC-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
3548 ; GENERIC-NEXT: retq # sched: [1:1.00]
3550 ; SANDY-LABEL: test_permilpd:
3552 ; SANDY-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] sched: [1:1.00]
3553 ; SANDY-NEXT: vpermilpd {{.*#+}} xmm1 = mem[1,0] sched: [7:1.00]
3554 ; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
3555 ; SANDY-NEXT: retq # sched: [1:1.00]
3557 ; HASWELL-LABEL: test_permilpd:
3559 ; HASWELL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] sched: [1:1.00]
3560 ; HASWELL-NEXT: vpermilpd {{.*#+}} xmm1 = mem[1,0] sched: [1:1.00]
3561 ; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
3562 ; HASWELL-NEXT: retq # sched: [2:1.00]
3564 ; BROADWELL-LABEL: test_permilpd:
3565 ; BROADWELL: # BB#0:
3566 ; BROADWELL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] sched: [1:1.00]
3567 ; BROADWELL-NEXT: vpermilpd {{.*#+}} xmm1 = mem[1,0] sched: [6:1.00]
3568 ; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
3569 ; BROADWELL-NEXT: retq # sched: [7:1.00]
3571 ; SKYLAKE-LABEL: test_permilpd:
3573 ; SKYLAKE-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] sched: [1:1.00]
3574 ; SKYLAKE-NEXT: vpermilpd {{.*#+}} xmm1 = mem[1,0] sched: [7:1.00]
3575 ; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
3576 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
3578 ; SKX-LABEL: test_permilpd:
3580 ; SKX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] sched: [1:1.00]
3581 ; SKX-NEXT: vpermilpd {{.*#+}} xmm1 = mem[1,0] sched: [7:1.00]
3582 ; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
3583 ; SKX-NEXT: retq # sched: [7:1.00]
3585 ; BTVER2-LABEL: test_permilpd:
3587 ; BTVER2-NEXT: vpermilpd {{.*#+}} xmm1 = mem[1,0] sched: [6:1.00]
3588 ; BTVER2-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] sched: [1:0.50]
3589 ; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
3590 ; BTVER2-NEXT: retq # sched: [4:1.00]
3592 ; ZNVER1-LABEL: test_permilpd:
3594 ; ZNVER1-NEXT: vpermilpd {{.*#+}} xmm1 = mem[1,0] sched: [8:0.50]
3595 ; ZNVER1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] sched: [1:0.50]
3596 ; ZNVER1-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
3597 ; ZNVER1-NEXT: retq # sched: [1:0.50]
3598 %1 = shufflevector <2 x double> %a0, <2 x double> undef, <2 x i32> <i32 1, i32 0>
3599 %2 = load <2 x double>, <2 x double> *%a1, align 16
3600 %3 = shufflevector <2 x double> %2, <2 x double> undef, <2 x i32> <i32 1, i32 0>
3601 %4 = fadd <2 x double> %1, %3
3605 define <4 x double> @test_permilpd_ymm(<4 x double> %a0, <4 x double> *%a1) {
3606 ; GENERIC-LABEL: test_permilpd_ymm:
3608 ; GENERIC-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3] sched: [1:1.00]
3609 ; GENERIC-NEXT: vpermilpd {{.*#+}} ymm1 = mem[1,0,2,3] sched: [8:1.00]
3610 ; GENERIC-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
3611 ; GENERIC-NEXT: retq # sched: [1:1.00]
3613 ; SANDY-LABEL: test_permilpd_ymm:
3615 ; SANDY-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3] sched: [1:1.00]
3616 ; SANDY-NEXT: vpermilpd {{.*#+}} ymm1 = mem[1,0,2,3] sched: [8:1.00]
3617 ; SANDY-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
3618 ; SANDY-NEXT: retq # sched: [1:1.00]
3620 ; HASWELL-LABEL: test_permilpd_ymm:
3622 ; HASWELL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3] sched: [1:1.00]
3623 ; HASWELL-NEXT: vpermilpd {{.*#+}} ymm1 = mem[1,0,2,3] sched: [1:1.00]
3624 ; HASWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
3625 ; HASWELL-NEXT: retq # sched: [2:1.00]
3627 ; BROADWELL-LABEL: test_permilpd_ymm:
3628 ; BROADWELL: # BB#0:
3629 ; BROADWELL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3] sched: [1:1.00]
3630 ; BROADWELL-NEXT: vpermilpd {{.*#+}} ymm1 = mem[1,0,2,3] sched: [7:1.00]
3631 ; BROADWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
3632 ; BROADWELL-NEXT: retq # sched: [7:1.00]
3634 ; SKYLAKE-LABEL: test_permilpd_ymm:
3636 ; SKYLAKE-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3] sched: [1:1.00]
3637 ; SKYLAKE-NEXT: vpermilpd {{.*#+}} ymm1 = mem[1,0,2,3] sched: [8:1.00]
3638 ; SKYLAKE-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
3639 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
3641 ; SKX-LABEL: test_permilpd_ymm:
3643 ; SKX-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3] sched: [1:1.00]
3644 ; SKX-NEXT: vpermilpd {{.*#+}} ymm1 = mem[1,0,2,3] sched: [8:1.00]
3645 ; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
3646 ; SKX-NEXT: retq # sched: [7:1.00]
3648 ; BTVER2-LABEL: test_permilpd_ymm:
3650 ; BTVER2-NEXT: vpermilpd {{.*#+}} ymm1 = mem[1,0,2,3] sched: [6:1.00]
3651 ; BTVER2-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3] sched: [1:0.50]
3652 ; BTVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
3653 ; BTVER2-NEXT: retq # sched: [4:1.00]
3655 ; ZNVER1-LABEL: test_permilpd_ymm:
3657 ; ZNVER1-NEXT: vpermilpd {{.*#+}} ymm1 = mem[1,0,2,3] sched: [8:0.50]
3658 ; ZNVER1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3] sched: [1:0.50]
3659 ; ZNVER1-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
3660 ; ZNVER1-NEXT: retq # sched: [1:0.50]
3661 %1 = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32> <i32 1, i32 0, i32 2, i32 3>
3662 %2 = load <4 x double>, <4 x double> *%a1, align 32
3663 %3 = shufflevector <4 x double> %2, <4 x double> undef, <4 x i32> <i32 1, i32 0, i32 2, i32 3>
3664 %4 = fadd <4 x double> %1, %3
3668 define <4 x float> @test_permilps(<4 x float> %a0, <4 x float> *%a1) {
3669 ; GENERIC-LABEL: test_permilps:
3671 ; GENERIC-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] sched: [1:1.00]
3672 ; GENERIC-NEXT: vpermilps {{.*#+}} xmm1 = mem[3,2,1,0] sched: [7:1.00]
3673 ; GENERIC-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
3674 ; GENERIC-NEXT: retq # sched: [1:1.00]
3676 ; SANDY-LABEL: test_permilps:
3678 ; SANDY-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] sched: [1:1.00]
3679 ; SANDY-NEXT: vpermilps {{.*#+}} xmm1 = mem[3,2,1,0] sched: [7:1.00]
3680 ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
3681 ; SANDY-NEXT: retq # sched: [1:1.00]
3683 ; HASWELL-LABEL: test_permilps:
3685 ; HASWELL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] sched: [1:1.00]
3686 ; HASWELL-NEXT: vpermilps {{.*#+}} xmm1 = mem[3,2,1,0] sched: [1:1.00]
3687 ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
3688 ; HASWELL-NEXT: retq # sched: [2:1.00]
3690 ; BROADWELL-LABEL: test_permilps:
3691 ; BROADWELL: # BB#0:
3692 ; BROADWELL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] sched: [1:1.00]
3693 ; BROADWELL-NEXT: vpermilps {{.*#+}} xmm1 = mem[3,2,1,0] sched: [6:1.00]
3694 ; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
3695 ; BROADWELL-NEXT: retq # sched: [7:1.00]
3697 ; SKYLAKE-LABEL: test_permilps:
3699 ; SKYLAKE-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] sched: [1:1.00]
3700 ; SKYLAKE-NEXT: vpermilps {{.*#+}} xmm1 = mem[3,2,1,0] sched: [7:1.00]
3701 ; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
3702 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
3704 ; SKX-LABEL: test_permilps:
3706 ; SKX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] sched: [1:1.00]
3707 ; SKX-NEXT: vpermilps {{.*#+}} xmm1 = mem[3,2,1,0] sched: [7:1.00]
3708 ; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
3709 ; SKX-NEXT: retq # sched: [7:1.00]
3711 ; BTVER2-LABEL: test_permilps:
3713 ; BTVER2-NEXT: vpermilps {{.*#+}} xmm1 = mem[3,2,1,0] sched: [6:1.00]
3714 ; BTVER2-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] sched: [1:0.50]
3715 ; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
3716 ; BTVER2-NEXT: retq # sched: [4:1.00]
3718 ; ZNVER1-LABEL: test_permilps:
3720 ; ZNVER1-NEXT: vpermilps {{.*#+}} xmm1 = mem[3,2,1,0] sched: [8:0.50]
3721 ; ZNVER1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] sched: [1:0.50]
3722 ; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
3723 ; ZNVER1-NEXT: retq # sched: [1:0.50]
3724 %1 = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
3725 %2 = load <4 x float>, <4 x float> *%a1, align 16
3726 %3 = shufflevector <4 x float> %2, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
3727 %4 = fadd <4 x float> %1, %3
3731 define <8 x float> @test_permilps_ymm(<8 x float> %a0, <8 x float> *%a1) {
3732 ; GENERIC-LABEL: test_permilps_ymm:
3734 ; GENERIC-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00]
3735 ; GENERIC-NEXT: vpermilps {{.*#+}} ymm1 = mem[3,2,1,0,7,6,5,4] sched: [8:1.00]
3736 ; GENERIC-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
3737 ; GENERIC-NEXT: retq # sched: [1:1.00]
3739 ; SANDY-LABEL: test_permilps_ymm:
3741 ; SANDY-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00]
3742 ; SANDY-NEXT: vpermilps {{.*#+}} ymm1 = mem[3,2,1,0,7,6,5,4] sched: [8:1.00]
3743 ; SANDY-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
3744 ; SANDY-NEXT: retq # sched: [1:1.00]
3746 ; HASWELL-LABEL: test_permilps_ymm:
3748 ; HASWELL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00]
3749 ; HASWELL-NEXT: vpermilps {{.*#+}} ymm1 = mem[3,2,1,0,7,6,5,4] sched: [1:1.00]
3750 ; HASWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
3751 ; HASWELL-NEXT: retq # sched: [2:1.00]
3753 ; BROADWELL-LABEL: test_permilps_ymm:
3754 ; BROADWELL: # BB#0:
3755 ; BROADWELL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00]
3756 ; BROADWELL-NEXT: vpermilps {{.*#+}} ymm1 = mem[3,2,1,0,7,6,5,4] sched: [7:1.00]
3757 ; BROADWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
3758 ; BROADWELL-NEXT: retq # sched: [7:1.00]
3760 ; SKYLAKE-LABEL: test_permilps_ymm:
3762 ; SKYLAKE-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00]
3763 ; SKYLAKE-NEXT: vpermilps {{.*#+}} ymm1 = mem[3,2,1,0,7,6,5,4] sched: [8:1.00]
3764 ; SKYLAKE-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
3765 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
3767 ; SKX-LABEL: test_permilps_ymm:
3769 ; SKX-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00]
3770 ; SKX-NEXT: vpermilps {{.*#+}} ymm1 = mem[3,2,1,0,7,6,5,4] sched: [8:1.00]
3771 ; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
3772 ; SKX-NEXT: retq # sched: [7:1.00]
3774 ; BTVER2-LABEL: test_permilps_ymm:
3776 ; BTVER2-NEXT: vpermilps {{.*#+}} ymm1 = mem[3,2,1,0,7,6,5,4] sched: [6:1.00]
3777 ; BTVER2-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:0.50]
3778 ; BTVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
3779 ; BTVER2-NEXT: retq # sched: [4:1.00]
3781 ; ZNVER1-LABEL: test_permilps_ymm:
3783 ; ZNVER1-NEXT: vpermilps {{.*#+}} ymm1 = mem[3,2,1,0,7,6,5,4] sched: [8:0.50]
3784 ; ZNVER1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:0.50]
3785 ; ZNVER1-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
3786 ; ZNVER1-NEXT: retq # sched: [1:0.50]
3787 %1 = shufflevector <8 x float> %a0, <8 x float> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
3788 %2 = load <8 x float>, <8 x float> *%a1, align 32
3789 %3 = shufflevector <8 x float> %2, <8 x float> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
3790 %4 = fadd <8 x float> %1, %3
3794 define <2 x double> @test_permilvarpd(<2 x double> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
3795 ; GENERIC-LABEL: test_permilvarpd:
3797 ; GENERIC-NEXT: vpermilpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
3798 ; GENERIC-NEXT: vpermilpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
3799 ; GENERIC-NEXT: retq # sched: [1:1.00]
3801 ; SANDY-LABEL: test_permilvarpd:
3803 ; SANDY-NEXT: vpermilpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
3804 ; SANDY-NEXT: vpermilpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
3805 ; SANDY-NEXT: retq # sched: [1:1.00]
3807 ; HASWELL-LABEL: test_permilvarpd:
3809 ; HASWELL-NEXT: vpermilpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
3810 ; HASWELL-NEXT: vpermilpd (%rdi), %xmm0, %xmm0 # sched: [1:1.00]
3811 ; HASWELL-NEXT: retq # sched: [2:1.00]
3813 ; BROADWELL-LABEL: test_permilvarpd:
3814 ; BROADWELL: # BB#0:
3815 ; BROADWELL-NEXT: vpermilpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
3816 ; BROADWELL-NEXT: vpermilpd (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
3817 ; BROADWELL-NEXT: retq # sched: [7:1.00]
3819 ; SKYLAKE-LABEL: test_permilvarpd:
3821 ; SKYLAKE-NEXT: vpermilpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
3822 ; SKYLAKE-NEXT: vpermilpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
3823 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
3825 ; SKX-LABEL: test_permilvarpd:
3827 ; SKX-NEXT: vpermilpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
3828 ; SKX-NEXT: vpermilpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
3829 ; SKX-NEXT: retq # sched: [7:1.00]
3831 ; BTVER2-LABEL: test_permilvarpd:
3833 ; BTVER2-NEXT: vpermilpd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
3834 ; BTVER2-NEXT: vpermilpd (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
3835 ; BTVER2-NEXT: retq # sched: [4:1.00]
3837 ; ZNVER1-LABEL: test_permilvarpd:
3839 ; ZNVER1-NEXT: vpermilpd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
3840 ; ZNVER1-NEXT: vpermilpd (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
3841 ; ZNVER1-NEXT: retq # sched: [1:0.50]
3842 %1 = call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %a0, <2 x i64> %a1)
3843 %2 = load <2 x i64>, <2 x i64> *%a2, align 16
3844 %3 = call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %1, <2 x i64> %2)
3847 declare <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double>, <2 x i64>) nounwind readnone
3849 define <4 x double> @test_permilvarpd_ymm(<4 x double> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
3850 ; GENERIC-LABEL: test_permilvarpd_ymm:
3852 ; GENERIC-NEXT: vpermilpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
3853 ; GENERIC-NEXT: vpermilpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
3854 ; GENERIC-NEXT: retq # sched: [1:1.00]
3856 ; SANDY-LABEL: test_permilvarpd_ymm:
3858 ; SANDY-NEXT: vpermilpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
3859 ; SANDY-NEXT: vpermilpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
3860 ; SANDY-NEXT: retq # sched: [1:1.00]
3862 ; HASWELL-LABEL: test_permilvarpd_ymm:
3864 ; HASWELL-NEXT: vpermilpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
3865 ; HASWELL-NEXT: vpermilpd (%rdi), %ymm0, %ymm0 # sched: [1:1.00]
3866 ; HASWELL-NEXT: retq # sched: [2:1.00]
3868 ; BROADWELL-LABEL: test_permilvarpd_ymm:
3869 ; BROADWELL: # BB#0:
3870 ; BROADWELL-NEXT: vpermilpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
3871 ; BROADWELL-NEXT: vpermilpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
3872 ; BROADWELL-NEXT: retq # sched: [7:1.00]
3874 ; SKYLAKE-LABEL: test_permilvarpd_ymm:
3876 ; SKYLAKE-NEXT: vpermilpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
3877 ; SKYLAKE-NEXT: vpermilpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
3878 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
3880 ; SKX-LABEL: test_permilvarpd_ymm:
3882 ; SKX-NEXT: vpermilpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
3883 ; SKX-NEXT: vpermilpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
3884 ; SKX-NEXT: retq # sched: [7:1.00]
3886 ; BTVER2-LABEL: test_permilvarpd_ymm:
3888 ; BTVER2-NEXT: vpermilpd %ymm1, %ymm0, %ymm0 # sched: [3:3.00]
3889 ; BTVER2-NEXT: vpermilpd (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
3890 ; BTVER2-NEXT: retq # sched: [4:1.00]
3892 ; ZNVER1-LABEL: test_permilvarpd_ymm:
3894 ; ZNVER1-NEXT: vpermilpd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
3895 ; ZNVER1-NEXT: vpermilpd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
3896 ; ZNVER1-NEXT: retq # sched: [1:0.50]
3897 %1 = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> %a1)
3898 %2 = load <4 x i64>, <4 x i64> *%a2, align 32
3899 %3 = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %1, <4 x i64> %2)
3902 declare <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double>, <4 x i64>) nounwind readnone
3904 define <4 x float> @test_permilvarps(<4 x float> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
3905 ; GENERIC-LABEL: test_permilvarps:
3907 ; GENERIC-NEXT: vpermilps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
3908 ; GENERIC-NEXT: vpermilps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
3909 ; GENERIC-NEXT: retq # sched: [1:1.00]
3911 ; SANDY-LABEL: test_permilvarps:
3913 ; SANDY-NEXT: vpermilps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
3914 ; SANDY-NEXT: vpermilps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
3915 ; SANDY-NEXT: retq # sched: [1:1.00]
3917 ; HASWELL-LABEL: test_permilvarps:
3919 ; HASWELL-NEXT: vpermilps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
3920 ; HASWELL-NEXT: vpermilps (%rdi), %xmm0, %xmm0 # sched: [1:1.00]
3921 ; HASWELL-NEXT: retq # sched: [2:1.00]
3923 ; BROADWELL-LABEL: test_permilvarps:
3924 ; BROADWELL: # BB#0:
3925 ; BROADWELL-NEXT: vpermilps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
3926 ; BROADWELL-NEXT: vpermilps (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
3927 ; BROADWELL-NEXT: retq # sched: [7:1.00]
3929 ; SKYLAKE-LABEL: test_permilvarps:
3931 ; SKYLAKE-NEXT: vpermilps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
3932 ; SKYLAKE-NEXT: vpermilps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
3933 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
3935 ; SKX-LABEL: test_permilvarps:
3937 ; SKX-NEXT: vpermilps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
3938 ; SKX-NEXT: vpermilps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
3939 ; SKX-NEXT: retq # sched: [7:1.00]
3941 ; BTVER2-LABEL: test_permilvarps:
3943 ; BTVER2-NEXT: vpermilps %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
3944 ; BTVER2-NEXT: vpermilps (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
3945 ; BTVER2-NEXT: retq # sched: [4:1.00]
3947 ; ZNVER1-LABEL: test_permilvarps:
3949 ; ZNVER1-NEXT: vpermilps %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
3950 ; ZNVER1-NEXT: vpermilps (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
3951 ; ZNVER1-NEXT: retq # sched: [1:0.50]
3952 %1 = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %a1)
3953 %2 = load <4 x i32>, <4 x i32> *%a2, align 16
3954 %3 = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %1, <4 x i32> %2)
3957 declare <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float>, <4 x i32>) nounwind readnone
3959 define <8 x float> @test_permilvarps_ymm(<8 x float> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
3960 ; GENERIC-LABEL: test_permilvarps_ymm:
3962 ; GENERIC-NEXT: vpermilps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
3963 ; GENERIC-NEXT: vpermilps (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
3964 ; GENERIC-NEXT: retq # sched: [1:1.00]
3966 ; SANDY-LABEL: test_permilvarps_ymm:
3968 ; SANDY-NEXT: vpermilps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
3969 ; SANDY-NEXT: vpermilps (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
3970 ; SANDY-NEXT: retq # sched: [1:1.00]
3972 ; HASWELL-LABEL: test_permilvarps_ymm:
3974 ; HASWELL-NEXT: vpermilps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
3975 ; HASWELL-NEXT: vpermilps (%rdi), %ymm0, %ymm0 # sched: [1:1.00]
3976 ; HASWELL-NEXT: retq # sched: [2:1.00]
3978 ; BROADWELL-LABEL: test_permilvarps_ymm:
3979 ; BROADWELL: # BB#0:
3980 ; BROADWELL-NEXT: vpermilps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
3981 ; BROADWELL-NEXT: vpermilps (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
3982 ; BROADWELL-NEXT: retq # sched: [7:1.00]
3984 ; SKYLAKE-LABEL: test_permilvarps_ymm:
3986 ; SKYLAKE-NEXT: vpermilps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
3987 ; SKYLAKE-NEXT: vpermilps (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
3988 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
3990 ; SKX-LABEL: test_permilvarps_ymm:
3992 ; SKX-NEXT: vpermilps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
3993 ; SKX-NEXT: vpermilps (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
3994 ; SKX-NEXT: retq # sched: [7:1.00]
3996 ; BTVER2-LABEL: test_permilvarps_ymm:
3998 ; BTVER2-NEXT: vpermilps %ymm1, %ymm0, %ymm0 # sched: [3:3.00]
3999 ; BTVER2-NEXT: vpermilps (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
4000 ; BTVER2-NEXT: retq # sched: [4:1.00]
4002 ; ZNVER1-LABEL: test_permilvarps_ymm:
4004 ; ZNVER1-NEXT: vpermilps %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
4005 ; ZNVER1-NEXT: vpermilps (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
4006 ; ZNVER1-NEXT: retq # sched: [1:0.50]
4007 %1 = call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> %a1)
4008 %2 = load <8 x i32>, <8 x i32> *%a2, align 32
4009 %3 = call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %1, <8 x i32> %2)
4012 declare <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float>, <8 x i32>) nounwind readnone
4014 define <8 x float> @test_rcpps(<8 x float> %a0, <8 x float> *%a1) {
4015 ; GENERIC-LABEL: test_rcpps:
4017 ; GENERIC-NEXT: vrcpps (%rdi), %ymm1 # sched: [14:2.00]
4018 ; GENERIC-NEXT: vrcpps %ymm0, %ymm0 # sched: [7:2.00]
4019 ; GENERIC-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4020 ; GENERIC-NEXT: retq # sched: [1:1.00]
4022 ; SANDY-LABEL: test_rcpps:
4024 ; SANDY-NEXT: vrcpps (%rdi), %ymm1 # sched: [14:2.00]
4025 ; SANDY-NEXT: vrcpps %ymm0, %ymm0 # sched: [7:2.00]
4026 ; SANDY-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4027 ; SANDY-NEXT: retq # sched: [1:1.00]
4029 ; HASWELL-LABEL: test_rcpps:
4031 ; HASWELL-NEXT: vrcpps (%rdi), %ymm1 # sched: [11:2.00]
4032 ; HASWELL-NEXT: vrcpps %ymm0, %ymm0 # sched: [11:2.00]
4033 ; HASWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4034 ; HASWELL-NEXT: retq # sched: [2:1.00]
4036 ; BROADWELL-LABEL: test_rcpps:
4037 ; BROADWELL: # BB#0:
4038 ; BROADWELL-NEXT: vrcpps (%rdi), %ymm1 # sched: [17:2.00]
4039 ; BROADWELL-NEXT: vrcpps %ymm0, %ymm0 # sched: [11:2.00]
4040 ; BROADWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4041 ; BROADWELL-NEXT: retq # sched: [7:1.00]
4043 ; SKYLAKE-LABEL: test_rcpps:
4045 ; SKYLAKE-NEXT: vrcpps %ymm0, %ymm0 # sched: [4:1.00]
4046 ; SKYLAKE-NEXT: vrcpps (%rdi), %ymm1 # sched: [11:1.00]
4047 ; SKYLAKE-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
4048 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
4050 ; SKX-LABEL: test_rcpps:
4052 ; SKX-NEXT: vrcpps %ymm0, %ymm0 # sched: [4:1.00]
4053 ; SKX-NEXT: vrcpps (%rdi), %ymm1 # sched: [11:1.00]
4054 ; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
4055 ; SKX-NEXT: retq # sched: [7:1.00]
4057 ; BTVER2-LABEL: test_rcpps:
4059 ; BTVER2-NEXT: vrcpps (%rdi), %ymm1 # sched: [7:2.00]
4060 ; BTVER2-NEXT: vrcpps %ymm0, %ymm0 # sched: [2:2.00]
4061 ; BTVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
4062 ; BTVER2-NEXT: retq # sched: [4:1.00]
4064 ; ZNVER1-LABEL: test_rcpps:
4066 ; ZNVER1-NEXT: vrcpps (%rdi), %ymm1 # sched: [12:0.50]
4067 ; ZNVER1-NEXT: vrcpps %ymm0, %ymm0 # sched: [5:0.50]
4068 ; ZNVER1-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4069 ; ZNVER1-NEXT: retq # sched: [1:0.50]
4070 %1 = call <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float> %a0)
4071 %2 = load <8 x float>, <8 x float> *%a1, align 32
4072 %3 = call <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float> %2)
4073 %4 = fadd <8 x float> %1, %3
4076 declare <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float>) nounwind readnone
4078 define <4 x double> @test_roundpd(<4 x double> %a0, <4 x double> *%a1) {
4079 ; GENERIC-LABEL: test_roundpd:
4081 ; GENERIC-NEXT: vroundpd $7, %ymm0, %ymm0 # sched: [3:1.00]
4082 ; GENERIC-NEXT: vroundpd $7, (%rdi), %ymm1 # sched: [10:1.00]
4083 ; GENERIC-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4084 ; GENERIC-NEXT: retq # sched: [1:1.00]
4086 ; SANDY-LABEL: test_roundpd:
4088 ; SANDY-NEXT: vroundpd $7, %ymm0, %ymm0 # sched: [3:1.00]
4089 ; SANDY-NEXT: vroundpd $7, (%rdi), %ymm1 # sched: [10:1.00]
4090 ; SANDY-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4091 ; SANDY-NEXT: retq # sched: [1:1.00]
4093 ; HASWELL-LABEL: test_roundpd:
4095 ; HASWELL-NEXT: vroundpd $7, %ymm0, %ymm0 # sched: [5:1.25]
4096 ; HASWELL-NEXT: vroundpd $7, (%rdi), %ymm1 # sched: [6:2.00]
4097 ; HASWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4098 ; HASWELL-NEXT: retq # sched: [2:1.00]
4100 ; BROADWELL-LABEL: test_roundpd:
4101 ; BROADWELL: # BB#0:
4102 ; BROADWELL-NEXT: vroundpd $7, %ymm0, %ymm0 # sched: [6:0.50]
4103 ; BROADWELL-NEXT: vroundpd $7, (%rdi), %ymm1 # sched: [12:2.00]
4104 ; BROADWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4105 ; BROADWELL-NEXT: retq # sched: [7:1.00]
4107 ; SKYLAKE-LABEL: test_roundpd:
4109 ; SKYLAKE-NEXT: vroundpd $7, %ymm0, %ymm0 # sched: [8:0.67]
4110 ; SKYLAKE-NEXT: vroundpd $7, (%rdi), %ymm1 # sched: [15:0.67]
4111 ; SKYLAKE-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
4112 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
4114 ; SKX-LABEL: test_roundpd:
4116 ; SKX-NEXT: vrndscalepd $7, %ymm0, %ymm0 # sched: [8:0.67]
4117 ; SKX-NEXT: vrndscalepd $7, (%rdi), %ymm1 # sched: [15:0.67]
4118 ; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
4119 ; SKX-NEXT: retq # sched: [7:1.00]
4121 ; BTVER2-LABEL: test_roundpd:
4123 ; BTVER2-NEXT: vroundpd $7, (%rdi), %ymm1 # sched: [8:2.00]
4124 ; BTVER2-NEXT: vroundpd $7, %ymm0, %ymm0 # sched: [3:2.00]
4125 ; BTVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
4126 ; BTVER2-NEXT: retq # sched: [4:1.00]
4128 ; ZNVER1-LABEL: test_roundpd:
4130 ; ZNVER1-NEXT: vroundpd $7, (%rdi), %ymm1 # sched: [11:1.00]
4131 ; ZNVER1-NEXT: vroundpd $7, %ymm0, %ymm0 # sched: [4:1.00]
4132 ; ZNVER1-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4133 ; ZNVER1-NEXT: retq # sched: [1:0.50]
4134 %1 = call <4 x double> @llvm.x86.avx.round.pd.256(<4 x double> %a0, i32 7)
4135 %2 = load <4 x double>, <4 x double> *%a1, align 32
4136 %3 = call <4 x double> @llvm.x86.avx.round.pd.256(<4 x double> %2, i32 7)
4137 %4 = fadd <4 x double> %1, %3
4140 declare <4 x double> @llvm.x86.avx.round.pd.256(<4 x double>, i32) nounwind readnone
4142 define <8 x float> @test_roundps(<8 x float> %a0, <8 x float> *%a1) {
4143 ; GENERIC-LABEL: test_roundps:
4145 ; GENERIC-NEXT: vroundps $7, %ymm0, %ymm0 # sched: [3:1.00]
4146 ; GENERIC-NEXT: vroundps $7, (%rdi), %ymm1 # sched: [10:1.00]
4147 ; GENERIC-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4148 ; GENERIC-NEXT: retq # sched: [1:1.00]
4150 ; SANDY-LABEL: test_roundps:
4152 ; SANDY-NEXT: vroundps $7, %ymm0, %ymm0 # sched: [3:1.00]
4153 ; SANDY-NEXT: vroundps $7, (%rdi), %ymm1 # sched: [10:1.00]
4154 ; SANDY-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4155 ; SANDY-NEXT: retq # sched: [1:1.00]
4157 ; HASWELL-LABEL: test_roundps:
4159 ; HASWELL-NEXT: vroundps $7, %ymm0, %ymm0 # sched: [5:1.25]
4160 ; HASWELL-NEXT: vroundps $7, (%rdi), %ymm1 # sched: [6:2.00]
4161 ; HASWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4162 ; HASWELL-NEXT: retq # sched: [2:1.00]
4164 ; BROADWELL-LABEL: test_roundps:
4165 ; BROADWELL: # BB#0:
4166 ; BROADWELL-NEXT: vroundps $7, %ymm0, %ymm0 # sched: [6:0.50]
4167 ; BROADWELL-NEXT: vroundps $7, (%rdi), %ymm1 # sched: [12:2.00]
4168 ; BROADWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4169 ; BROADWELL-NEXT: retq # sched: [7:1.00]
4171 ; SKYLAKE-LABEL: test_roundps:
4173 ; SKYLAKE-NEXT: vroundps $7, %ymm0, %ymm0 # sched: [8:0.67]
4174 ; SKYLAKE-NEXT: vroundps $7, (%rdi), %ymm1 # sched: [15:0.67]
4175 ; SKYLAKE-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
4176 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
4178 ; SKX-LABEL: test_roundps:
4180 ; SKX-NEXT: vrndscaleps $7, %ymm0, %ymm0 # sched: [8:0.67]
4181 ; SKX-NEXT: vrndscaleps $7, (%rdi), %ymm1 # sched: [15:0.67]
4182 ; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
4183 ; SKX-NEXT: retq # sched: [7:1.00]
4185 ; BTVER2-LABEL: test_roundps:
4187 ; BTVER2-NEXT: vroundps $7, (%rdi), %ymm1 # sched: [8:2.00]
4188 ; BTVER2-NEXT: vroundps $7, %ymm0, %ymm0 # sched: [3:2.00]
4189 ; BTVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
4190 ; BTVER2-NEXT: retq # sched: [4:1.00]
4192 ; ZNVER1-LABEL: test_roundps:
4194 ; ZNVER1-NEXT: vroundps $7, (%rdi), %ymm1 # sched: [11:1.00]
4195 ; ZNVER1-NEXT: vroundps $7, %ymm0, %ymm0 # sched: [4:1.00]
4196 ; ZNVER1-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4197 ; ZNVER1-NEXT: retq # sched: [1:0.50]
4198 %1 = call <8 x float> @llvm.x86.avx.round.ps.256(<8 x float> %a0, i32 7)
4199 %2 = load <8 x float>, <8 x float> *%a1, align 32
4200 %3 = call <8 x float> @llvm.x86.avx.round.ps.256(<8 x float> %2, i32 7)
4201 %4 = fadd <8 x float> %1, %3
4204 declare <8 x float> @llvm.x86.avx.round.ps.256(<8 x float>, i32) nounwind readnone
4206 define <8 x float> @test_rsqrtps(<8 x float> %a0, <8 x float> *%a1) {
4207 ; GENERIC-LABEL: test_rsqrtps:
4209 ; GENERIC-NEXT: vrsqrtps (%rdi), %ymm1 # sched: [14:2.00]
4210 ; GENERIC-NEXT: vrsqrtps %ymm0, %ymm0 # sched: [7:2.00]
4211 ; GENERIC-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4212 ; GENERIC-NEXT: retq # sched: [1:1.00]
4214 ; SANDY-LABEL: test_rsqrtps:
4216 ; SANDY-NEXT: vrsqrtps (%rdi), %ymm1 # sched: [14:2.00]
4217 ; SANDY-NEXT: vrsqrtps %ymm0, %ymm0 # sched: [7:2.00]
4218 ; SANDY-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4219 ; SANDY-NEXT: retq # sched: [1:1.00]
4221 ; HASWELL-LABEL: test_rsqrtps:
4223 ; HASWELL-NEXT: vrsqrtps (%rdi), %ymm1 # sched: [11:2.00]
4224 ; HASWELL-NEXT: vrsqrtps %ymm0, %ymm0 # sched: [11:2.00]
4225 ; HASWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4226 ; HASWELL-NEXT: retq # sched: [2:1.00]
4228 ; BROADWELL-LABEL: test_rsqrtps:
4229 ; BROADWELL: # BB#0:
4230 ; BROADWELL-NEXT: vrsqrtps (%rdi), %ymm1 # sched: [17:2.00]
4231 ; BROADWELL-NEXT: vrsqrtps %ymm0, %ymm0 # sched: [11:2.00]
4232 ; BROADWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4233 ; BROADWELL-NEXT: retq # sched: [7:1.00]
4235 ; SKYLAKE-LABEL: test_rsqrtps:
4237 ; SKYLAKE-NEXT: vrsqrtps %ymm0, %ymm0 # sched: [4:1.00]
4238 ; SKYLAKE-NEXT: vrsqrtps (%rdi), %ymm1 # sched: [11:1.00]
4239 ; SKYLAKE-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
4240 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
4242 ; SKX-LABEL: test_rsqrtps:
4244 ; SKX-NEXT: vrsqrtps %ymm0, %ymm0 # sched: [4:1.00]
4245 ; SKX-NEXT: vrsqrtps (%rdi), %ymm1 # sched: [11:1.00]
4246 ; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
4247 ; SKX-NEXT: retq # sched: [7:1.00]
4249 ; BTVER2-LABEL: test_rsqrtps:
4251 ; BTVER2-NEXT: vrsqrtps (%rdi), %ymm1 # sched: [7:2.00]
4252 ; BTVER2-NEXT: vrsqrtps %ymm0, %ymm0 # sched: [2:2.00]
4253 ; BTVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
4254 ; BTVER2-NEXT: retq # sched: [4:1.00]
4256 ; ZNVER1-LABEL: test_rsqrtps:
4258 ; ZNVER1-NEXT: vrsqrtps (%rdi), %ymm1 # sched: [12:0.50]
4259 ; ZNVER1-NEXT: vrsqrtps %ymm0, %ymm0 # sched: [5:0.50]
4260 ; ZNVER1-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4261 ; ZNVER1-NEXT: retq # sched: [1:0.50]
4262 %1 = call <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float> %a0)
4263 %2 = load <8 x float>, <8 x float> *%a1, align 32
4264 %3 = call <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float> %2)
4265 %4 = fadd <8 x float> %1, %3
4268 declare <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float>) nounwind readnone
4270 define <4 x double> @test_shufpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) {
4271 ; GENERIC-LABEL: test_shufpd:
4273 ; GENERIC-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[0],ymm0[2],ymm1[3] sched: [1:1.00]
4274 ; GENERIC-NEXT: vshufpd {{.*#+}} ymm1 = ymm1[1],mem[0],ymm1[2],mem[3] sched: [8:1.00]
4275 ; GENERIC-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4276 ; GENERIC-NEXT: retq # sched: [1:1.00]
4278 ; SANDY-LABEL: test_shufpd:
4280 ; SANDY-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[0],ymm0[2],ymm1[3] sched: [1:1.00]
4281 ; SANDY-NEXT: vshufpd {{.*#+}} ymm1 = ymm1[1],mem[0],ymm1[2],mem[3] sched: [8:1.00]
4282 ; SANDY-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4283 ; SANDY-NEXT: retq # sched: [1:1.00]
4285 ; HASWELL-LABEL: test_shufpd:
4287 ; HASWELL-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[0],ymm0[2],ymm1[3] sched: [1:1.00]
4288 ; HASWELL-NEXT: vshufpd {{.*#+}} ymm1 = ymm1[1],mem[0],ymm1[2],mem[3] sched: [1:1.00]
4289 ; HASWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4290 ; HASWELL-NEXT: retq # sched: [2:1.00]
4292 ; BROADWELL-LABEL: test_shufpd:
4293 ; BROADWELL: # BB#0:
4294 ; BROADWELL-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[0],ymm0[2],ymm1[3] sched: [1:1.00]
4295 ; BROADWELL-NEXT: vshufpd {{.*#+}} ymm1 = ymm1[1],mem[0],ymm1[2],mem[3] sched: [7:1.00]
4296 ; BROADWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4297 ; BROADWELL-NEXT: retq # sched: [7:1.00]
4299 ; SKYLAKE-LABEL: test_shufpd:
4301 ; SKYLAKE-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[0],ymm0[2],ymm1[3] sched: [1:1.00]
4302 ; SKYLAKE-NEXT: vshufpd {{.*#+}} ymm1 = ymm1[1],mem[0],ymm1[2],mem[3] sched: [8:1.00]
4303 ; SKYLAKE-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
4304 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
4306 ; SKX-LABEL: test_shufpd:
4308 ; SKX-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[0],ymm0[2],ymm1[3] sched: [1:1.00]
4309 ; SKX-NEXT: vshufpd {{.*#+}} ymm1 = ymm1[1],mem[0],ymm1[2],mem[3] sched: [8:1.00]
4310 ; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
4311 ; SKX-NEXT: retq # sched: [7:1.00]
4313 ; BTVER2-LABEL: test_shufpd:
4315 ; BTVER2-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[0],ymm0[2],ymm1[3] sched: [1:0.50]
4316 ; BTVER2-NEXT: vshufpd {{.*#+}} ymm1 = ymm1[1],mem[0],ymm1[2],mem[3] sched: [6:1.00]
4317 ; BTVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
4318 ; BTVER2-NEXT: retq # sched: [4:1.00]
4320 ; ZNVER1-LABEL: test_shufpd:
4322 ; ZNVER1-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[0],ymm0[2],ymm1[3] sched: [1:0.50]
4323 ; ZNVER1-NEXT: vshufpd {{.*#+}} ymm1 = ymm1[1],mem[0],ymm1[2],mem[3] sched: [8:0.50]
4324 ; ZNVER1-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4325 ; ZNVER1-NEXT: retq # sched: [1:0.50]
4326 %1 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 1, i32 4, i32 2, i32 7>
4327 %2 = load <4 x double>, <4 x double> *%a2, align 32
4328 %3 = shufflevector <4 x double> %a1, <4 x double> %2, <4 x i32> <i32 1, i32 4, i32 2, i32 7>
4329 %4 = fadd <4 x double> %1, %3
4333 define <8 x float> @test_shufps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) nounwind {
4334 ; GENERIC-LABEL: test_shufps:
4336 ; GENERIC-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4] sched: [1:1.00]
4337 ; GENERIC-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,3],mem[0,0],ymm0[4,7],mem[4,4] sched: [8:1.00]
4338 ; GENERIC-NEXT: retq # sched: [1:1.00]
4340 ; SANDY-LABEL: test_shufps:
4342 ; SANDY-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4] sched: [1:1.00]
4343 ; SANDY-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,3],mem[0,0],ymm0[4,7],mem[4,4] sched: [8:1.00]
4344 ; SANDY-NEXT: retq # sched: [1:1.00]
4346 ; HASWELL-LABEL: test_shufps:
4348 ; HASWELL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4] sched: [1:1.00]
4349 ; HASWELL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,3],mem[0,0],ymm0[4,7],mem[4,4] sched: [1:1.00]
4350 ; HASWELL-NEXT: retq # sched: [2:1.00]
4352 ; BROADWELL-LABEL: test_shufps:
4353 ; BROADWELL: # BB#0:
4354 ; BROADWELL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4] sched: [1:1.00]
4355 ; BROADWELL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,3],mem[0,0],ymm0[4,7],mem[4,4] sched: [7:1.00]
4356 ; BROADWELL-NEXT: retq # sched: [7:1.00]
4358 ; SKYLAKE-LABEL: test_shufps:
4360 ; SKYLAKE-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4] sched: [1:1.00]
4361 ; SKYLAKE-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,3],mem[0,0],ymm0[4,7],mem[4,4] sched: [8:1.00]
4362 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
4364 ; SKX-LABEL: test_shufps:
4366 ; SKX-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4] sched: [1:1.00]
4367 ; SKX-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,3],mem[0,0],ymm0[4,7],mem[4,4] sched: [8:1.00]
4368 ; SKX-NEXT: retq # sched: [7:1.00]
4370 ; BTVER2-LABEL: test_shufps:
4372 ; BTVER2-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4] sched: [1:0.50]
4373 ; BTVER2-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,3],mem[0,0],ymm0[4,7],mem[4,4] sched: [6:1.00]
4374 ; BTVER2-NEXT: retq # sched: [4:1.00]
4376 ; ZNVER1-LABEL: test_shufps:
4378 ; ZNVER1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4] sched: [1:0.50]
4379 ; ZNVER1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,3],mem[0,0],ymm0[4,7],mem[4,4] sched: [8:0.50]
4380 ; ZNVER1-NEXT: retq # sched: [1:0.50]
4381 %1 = shufflevector <8 x float> %a0, <8 x float> %a1, <8 x i32> <i32 0, i32 0, i32 8, i32 8, i32 4, i32 4, i32 12, i32 12>
4382 %2 = load <8 x float>, <8 x float> *%a2, align 32
4383 %3 = shufflevector <8 x float> %1, <8 x float> %2, <8 x i32> <i32 0, i32 3, i32 8, i32 8, i32 4, i32 7, i32 12, i32 12>
4387 define <4 x double> @test_sqrtpd(<4 x double> %a0, <4 x double> *%a1) {
4388 ; GENERIC-LABEL: test_sqrtpd:
4390 ; GENERIC-NEXT: vsqrtpd (%rdi), %ymm1 # sched: [52:2.00]
4391 ; GENERIC-NEXT: vsqrtpd %ymm0, %ymm0 # sched: [45:2.00]
4392 ; GENERIC-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4393 ; GENERIC-NEXT: retq # sched: [1:1.00]
4395 ; SANDY-LABEL: test_sqrtpd:
4397 ; SANDY-NEXT: vsqrtpd (%rdi), %ymm1 # sched: [52:2.00]
4398 ; SANDY-NEXT: vsqrtpd %ymm0, %ymm0 # sched: [45:2.00]
4399 ; SANDY-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4400 ; SANDY-NEXT: retq # sched: [1:1.00]
4402 ; HASWELL-LABEL: test_sqrtpd:
4404 ; HASWELL-NEXT: vsqrtpd (%rdi), %ymm1 # sched: [35:2.00]
4405 ; HASWELL-NEXT: vsqrtpd %ymm0, %ymm0 # sched: [35:2.00]
4406 ; HASWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4407 ; HASWELL-NEXT: retq # sched: [2:1.00]
4409 ; BROADWELL-LABEL: test_sqrtpd:
4410 ; BROADWELL: # BB#0:
4411 ; BROADWELL-NEXT: vsqrtpd (%rdi), %ymm1 # sched: [40:2.00]
4412 ; BROADWELL-NEXT: vsqrtpd %ymm0, %ymm0 # sched: [34:2.00]
4413 ; BROADWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4414 ; BROADWELL-NEXT: retq # sched: [7:1.00]
4416 ; SKYLAKE-LABEL: test_sqrtpd:
4418 ; SKYLAKE-NEXT: vsqrtpd %ymm0, %ymm0 # sched: [18:1.00]
4419 ; SKYLAKE-NEXT: vsqrtpd (%rdi), %ymm1 # sched: [25:1.00]
4420 ; SKYLAKE-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
4421 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
4423 ; SKX-LABEL: test_sqrtpd:
4425 ; SKX-NEXT: vsqrtpd %ymm0, %ymm0 # sched: [18:1.00]
4426 ; SKX-NEXT: vsqrtpd (%rdi), %ymm1 # sched: [25:1.00]
4427 ; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
4428 ; SKX-NEXT: retq # sched: [7:1.00]
4430 ; BTVER2-LABEL: test_sqrtpd:
4432 ; BTVER2-NEXT: vsqrtpd (%rdi), %ymm1 # sched: [59:54.00]
4433 ; BTVER2-NEXT: vsqrtpd %ymm0, %ymm0 # sched: [54:54.00]
4434 ; BTVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
4435 ; BTVER2-NEXT: retq # sched: [4:1.00]
4437 ; ZNVER1-LABEL: test_sqrtpd:
4439 ; ZNVER1-NEXT: vsqrtpd (%rdi), %ymm1 # sched: [47:47.00]
4440 ; ZNVER1-NEXT: vsqrtpd %ymm0, %ymm0 # sched: [40:40.00]
4441 ; ZNVER1-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4442 ; ZNVER1-NEXT: retq # sched: [1:0.50]
4443 %1 = call <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double> %a0)
4444 %2 = load <4 x double>, <4 x double> *%a1, align 32
4445 %3 = call <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double> %2)
4446 %4 = fadd <4 x double> %1, %3
4449 declare <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double>) nounwind readnone
4451 define <8 x float> @test_sqrtps(<8 x float> %a0, <8 x float> *%a1) {
4452 ; GENERIC-LABEL: test_sqrtps:
4454 ; GENERIC-NEXT: vsqrtps (%rdi), %ymm1 # sched: [36:2.00]
4455 ; GENERIC-NEXT: vsqrtps %ymm0, %ymm0 # sched: [29:2.00]
4456 ; GENERIC-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4457 ; GENERIC-NEXT: retq # sched: [1:1.00]
4459 ; SANDY-LABEL: test_sqrtps:
4461 ; SANDY-NEXT: vsqrtps (%rdi), %ymm1 # sched: [36:2.00]
4462 ; SANDY-NEXT: vsqrtps %ymm0, %ymm0 # sched: [29:2.00]
4463 ; SANDY-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4464 ; SANDY-NEXT: retq # sched: [1:1.00]
4466 ; HASWELL-LABEL: test_sqrtps:
4468 ; HASWELL-NEXT: vsqrtps (%rdi), %ymm1 # sched: [21:2.00]
4469 ; HASWELL-NEXT: vsqrtps %ymm0, %ymm0 # sched: [21:2.00]
4470 ; HASWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4471 ; HASWELL-NEXT: retq # sched: [2:1.00]
4473 ; BROADWELL-LABEL: test_sqrtps:
4474 ; BROADWELL: # BB#0:
4475 ; BROADWELL-NEXT: vsqrtps (%rdi), %ymm1 # sched: [27:2.00]
4476 ; BROADWELL-NEXT: vsqrtps %ymm0, %ymm0 # sched: [21:2.00]
4477 ; BROADWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4478 ; BROADWELL-NEXT: retq # sched: [7:1.00]
4480 ; SKYLAKE-LABEL: test_sqrtps:
4482 ; SKYLAKE-NEXT: vsqrtps %ymm0, %ymm0 # sched: [12:1.00]
4483 ; SKYLAKE-NEXT: vsqrtps (%rdi), %ymm1 # sched: [19:1.00]
4484 ; SKYLAKE-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
4485 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
4487 ; SKX-LABEL: test_sqrtps:
4489 ; SKX-NEXT: vsqrtps %ymm0, %ymm0 # sched: [12:1.00]
4490 ; SKX-NEXT: vsqrtps (%rdi), %ymm1 # sched: [19:1.00]
4491 ; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
4492 ; SKX-NEXT: retq # sched: [7:1.00]
4494 ; BTVER2-LABEL: test_sqrtps:
4496 ; BTVER2-NEXT: vsqrtps (%rdi), %ymm1 # sched: [47:42.00]
4497 ; BTVER2-NEXT: vsqrtps %ymm0, %ymm0 # sched: [42:42.00]
4498 ; BTVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
4499 ; BTVER2-NEXT: retq # sched: [4:1.00]
4501 ; ZNVER1-LABEL: test_sqrtps:
4503 ; ZNVER1-NEXT: vsqrtps (%rdi), %ymm1 # sched: [35:35.00]
4504 ; ZNVER1-NEXT: vsqrtps %ymm0, %ymm0 # sched: [28:28.00]
4505 ; ZNVER1-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4506 ; ZNVER1-NEXT: retq # sched: [1:0.50]
4507 %1 = call <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float> %a0)
4508 %2 = load <8 x float>, <8 x float> *%a1, align 32
4509 %3 = call <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float> %2)
4510 %4 = fadd <8 x float> %1, %3
4513 declare <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float>) nounwind readnone
4515 define <4 x double> @test_subpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) {
4516 ; GENERIC-LABEL: test_subpd:
4518 ; GENERIC-NEXT: vsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4519 ; GENERIC-NEXT: vsubpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
4520 ; GENERIC-NEXT: retq # sched: [1:1.00]
4522 ; SANDY-LABEL: test_subpd:
4524 ; SANDY-NEXT: vsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4525 ; SANDY-NEXT: vsubpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
4526 ; SANDY-NEXT: retq # sched: [1:1.00]
4528 ; HASWELL-LABEL: test_subpd:
4530 ; HASWELL-NEXT: vsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4531 ; HASWELL-NEXT: vsubpd (%rdi), %ymm0, %ymm0 # sched: [3:1.00]
4532 ; HASWELL-NEXT: retq # sched: [2:1.00]
4534 ; BROADWELL-LABEL: test_subpd:
4535 ; BROADWELL: # BB#0:
4536 ; BROADWELL-NEXT: vsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4537 ; BROADWELL-NEXT: vsubpd (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
4538 ; BROADWELL-NEXT: retq # sched: [7:1.00]
4540 ; SKYLAKE-LABEL: test_subpd:
4542 ; SKYLAKE-NEXT: vsubpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
4543 ; SKYLAKE-NEXT: vsubpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
4544 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
4546 ; SKX-LABEL: test_subpd:
4548 ; SKX-NEXT: vsubpd %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
4549 ; SKX-NEXT: vsubpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
4550 ; SKX-NEXT: retq # sched: [7:1.00]
4552 ; BTVER2-LABEL: test_subpd:
4554 ; BTVER2-NEXT: vsubpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
4555 ; BTVER2-NEXT: vsubpd (%rdi), %ymm0, %ymm0 # sched: [8:2.00]
4556 ; BTVER2-NEXT: retq # sched: [4:1.00]
4558 ; ZNVER1-LABEL: test_subpd:
4560 ; ZNVER1-NEXT: vsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4561 ; ZNVER1-NEXT: vsubpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
4562 ; ZNVER1-NEXT: retq # sched: [1:0.50]
4563 %1 = fsub <4 x double> %a0, %a1
4564 %2 = load <4 x double>, <4 x double> *%a2, align 32
4565 %3 = fsub <4 x double> %1, %2
4569 define <8 x float> @test_subps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) {
4570 ; GENERIC-LABEL: test_subps:
4572 ; GENERIC-NEXT: vsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4573 ; GENERIC-NEXT: vsubps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
4574 ; GENERIC-NEXT: retq # sched: [1:1.00]
4576 ; SANDY-LABEL: test_subps:
4578 ; SANDY-NEXT: vsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4579 ; SANDY-NEXT: vsubps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
4580 ; SANDY-NEXT: retq # sched: [1:1.00]
4582 ; HASWELL-LABEL: test_subps:
4584 ; HASWELL-NEXT: vsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4585 ; HASWELL-NEXT: vsubps (%rdi), %ymm0, %ymm0 # sched: [3:1.00]
4586 ; HASWELL-NEXT: retq # sched: [2:1.00]
4588 ; BROADWELL-LABEL: test_subps:
4589 ; BROADWELL: # BB#0:
4590 ; BROADWELL-NEXT: vsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4591 ; BROADWELL-NEXT: vsubps (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
4592 ; BROADWELL-NEXT: retq # sched: [7:1.00]
4594 ; SKYLAKE-LABEL: test_subps:
4596 ; SKYLAKE-NEXT: vsubps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
4597 ; SKYLAKE-NEXT: vsubps (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
4598 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
4600 ; SKX-LABEL: test_subps:
4602 ; SKX-NEXT: vsubps %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
4603 ; SKX-NEXT: vsubps (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
4604 ; SKX-NEXT: retq # sched: [7:1.00]
4606 ; BTVER2-LABEL: test_subps:
4608 ; BTVER2-NEXT: vsubps %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
4609 ; BTVER2-NEXT: vsubps (%rdi), %ymm0, %ymm0 # sched: [8:2.00]
4610 ; BTVER2-NEXT: retq # sched: [4:1.00]
4612 ; ZNVER1-LABEL: test_subps:
4614 ; ZNVER1-NEXT: vsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4615 ; ZNVER1-NEXT: vsubps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
4616 ; ZNVER1-NEXT: retq # sched: [1:0.50]
4617 %1 = fsub <8 x float> %a0, %a1
4618 %2 = load <8 x float>, <8 x float> *%a2, align 32
4619 %3 = fsub <8 x float> %1, %2
4623 define i32 @test_testpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
4624 ; GENERIC-LABEL: test_testpd:
4626 ; GENERIC-NEXT: xorl %eax, %eax # sched: [1:0.33]
4627 ; GENERIC-NEXT: vtestpd %xmm1, %xmm0 # sched: [1:1.00]
4628 ; GENERIC-NEXT: setb %al # sched: [1:0.50]
4629 ; GENERIC-NEXT: vtestpd (%rdi), %xmm0 # sched: [7:1.00]
4630 ; GENERIC-NEXT: adcl $0, %eax # sched: [2:0.67]
4631 ; GENERIC-NEXT: retq # sched: [1:1.00]
4633 ; SANDY-LABEL: test_testpd:
4635 ; SANDY-NEXT: xorl %eax, %eax # sched: [1:0.33]
4636 ; SANDY-NEXT: vtestpd %xmm1, %xmm0 # sched: [1:1.00]
4637 ; SANDY-NEXT: setb %al # sched: [1:0.50]
4638 ; SANDY-NEXT: vtestpd (%rdi), %xmm0 # sched: [7:1.00]
4639 ; SANDY-NEXT: adcl $0, %eax # sched: [2:0.67]
4640 ; SANDY-NEXT: retq # sched: [1:1.00]
4642 ; HASWELL-LABEL: test_testpd:
4644 ; HASWELL-NEXT: xorl %eax, %eax # sched: [1:0.25]
4645 ; HASWELL-NEXT: vtestpd %xmm1, %xmm0 # sched: [1:1.00]
4646 ; HASWELL-NEXT: setb %al # sched: [1:0.50]
4647 ; HASWELL-NEXT: vtestpd (%rdi), %xmm0 # sched: [1:1.00]
4648 ; HASWELL-NEXT: adcl $0, %eax # sched: [2:0.50]
4649 ; HASWELL-NEXT: retq # sched: [2:1.00]
4651 ; BROADWELL-LABEL: test_testpd:
4652 ; BROADWELL: # BB#0:
4653 ; BROADWELL-NEXT: xorl %eax, %eax # sched: [1:0.25]
4654 ; BROADWELL-NEXT: vtestpd %xmm1, %xmm0 # sched: [1:1.00]
4655 ; BROADWELL-NEXT: setb %al # sched: [1:0.50]
4656 ; BROADWELL-NEXT: vtestpd (%rdi), %xmm0 # sched: [6:1.00]
4657 ; BROADWELL-NEXT: adcl $0, %eax # sched: [1:0.50]
4658 ; BROADWELL-NEXT: retq # sched: [7:1.00]
4660 ; SKYLAKE-LABEL: test_testpd:
4662 ; SKYLAKE-NEXT: xorl %eax, %eax # sched: [1:0.25]
4663 ; SKYLAKE-NEXT: vtestpd %xmm1, %xmm0 # sched: [2:1.00]
4664 ; SKYLAKE-NEXT: setb %al # sched: [1:0.50]
4665 ; SKYLAKE-NEXT: vtestpd (%rdi), %xmm0 # sched: [8:1.00]
4666 ; SKYLAKE-NEXT: adcl $0, %eax # sched: [1:0.50]
4667 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
4669 ; SKX-LABEL: test_testpd:
4671 ; SKX-NEXT: xorl %eax, %eax # sched: [1:0.25]
4672 ; SKX-NEXT: vtestpd %xmm1, %xmm0 # sched: [2:1.00]
4673 ; SKX-NEXT: setb %al # sched: [1:0.50]
4674 ; SKX-NEXT: vtestpd (%rdi), %xmm0 # sched: [8:1.00]
4675 ; SKX-NEXT: adcl $0, %eax # sched: [1:0.50]
4676 ; SKX-NEXT: retq # sched: [7:1.00]
4678 ; BTVER2-LABEL: test_testpd:
4680 ; BTVER2-NEXT: xorl %eax, %eax # sched: [1:0.50]
4681 ; BTVER2-NEXT: vtestpd %xmm1, %xmm0 # sched: [3:1.00]
4682 ; BTVER2-NEXT: setb %al # sched: [1:0.50]
4683 ; BTVER2-NEXT: vtestpd (%rdi), %xmm0 # sched: [8:1.00]
4684 ; BTVER2-NEXT: adcl $0, %eax # sched: [1:0.50]
4685 ; BTVER2-NEXT: retq # sched: [4:1.00]
4687 ; ZNVER1-LABEL: test_testpd:
4689 ; ZNVER1-NEXT: xorl %eax, %eax # sched: [1:0.25]
4690 ; ZNVER1-NEXT: vtestpd %xmm1, %xmm0 # sched: [1:0.25]
4691 ; ZNVER1-NEXT: setb %al # sched: [1:0.25]
4692 ; ZNVER1-NEXT: vtestpd (%rdi), %xmm0 # sched: [8:0.50]
4693 ; ZNVER1-NEXT: adcl $0, %eax # sched: [1:0.25]
4694 ; ZNVER1-NEXT: retq # sched: [1:0.50]
4695 %1 = call i32 @llvm.x86.avx.vtestc.pd(<2 x double> %a0, <2 x double> %a1)
4696 %2 = load <2 x double>, <2 x double> *%a2, align 16
4697 %3 = call i32 @llvm.x86.avx.vtestc.pd(<2 x double> %a0, <2 x double> %2)
4701 declare i32 @llvm.x86.avx.vtestc.pd(<2 x double>, <2 x double>) nounwind readnone
4703 define i32 @test_testpd_ymm(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) {
4704 ; GENERIC-LABEL: test_testpd_ymm:
4706 ; GENERIC-NEXT: xorl %eax, %eax # sched: [1:0.33]
4707 ; GENERIC-NEXT: vtestpd %ymm1, %ymm0 # sched: [1:1.00]
4708 ; GENERIC-NEXT: setb %al # sched: [1:0.50]
4709 ; GENERIC-NEXT: vtestpd (%rdi), %ymm0 # sched: [8:1.00]
4710 ; GENERIC-NEXT: adcl $0, %eax # sched: [2:0.67]
4711 ; GENERIC-NEXT: vzeroupper
4712 ; GENERIC-NEXT: retq # sched: [1:1.00]
4714 ; SANDY-LABEL: test_testpd_ymm:
4716 ; SANDY-NEXT: xorl %eax, %eax # sched: [1:0.33]
4717 ; SANDY-NEXT: vtestpd %ymm1, %ymm0 # sched: [1:1.00]
4718 ; SANDY-NEXT: setb %al # sched: [1:0.50]
4719 ; SANDY-NEXT: vtestpd (%rdi), %ymm0 # sched: [8:1.00]
4720 ; SANDY-NEXT: adcl $0, %eax # sched: [2:0.67]
4721 ; SANDY-NEXT: vzeroupper
4722 ; SANDY-NEXT: retq # sched: [1:1.00]
4724 ; HASWELL-LABEL: test_testpd_ymm:
4726 ; HASWELL-NEXT: xorl %eax, %eax # sched: [1:0.25]
4727 ; HASWELL-NEXT: vtestpd %ymm1, %ymm0 # sched: [1:1.00]
4728 ; HASWELL-NEXT: setb %al # sched: [1:0.50]
4729 ; HASWELL-NEXT: vtestpd (%rdi), %ymm0 # sched: [1:1.00]
4730 ; HASWELL-NEXT: adcl $0, %eax # sched: [2:0.50]
4731 ; HASWELL-NEXT: vzeroupper # sched: [4:1.00]
4732 ; HASWELL-NEXT: retq # sched: [2:1.00]
4734 ; BROADWELL-LABEL: test_testpd_ymm:
4735 ; BROADWELL: # BB#0:
4736 ; BROADWELL-NEXT: xorl %eax, %eax # sched: [1:0.25]
4737 ; BROADWELL-NEXT: vtestpd %ymm1, %ymm0 # sched: [1:1.00]
4738 ; BROADWELL-NEXT: setb %al # sched: [1:0.50]
4739 ; BROADWELL-NEXT: vtestpd (%rdi), %ymm0 # sched: [7:1.00]
4740 ; BROADWELL-NEXT: adcl $0, %eax # sched: [1:0.50]
4741 ; BROADWELL-NEXT: vzeroupper # sched: [4:1.00]
4742 ; BROADWELL-NEXT: retq # sched: [7:1.00]
4744 ; SKYLAKE-LABEL: test_testpd_ymm:
4746 ; SKYLAKE-NEXT: xorl %eax, %eax # sched: [1:0.25]
4747 ; SKYLAKE-NEXT: vtestpd %ymm1, %ymm0 # sched: [2:1.00]
4748 ; SKYLAKE-NEXT: setb %al # sched: [1:0.50]
4749 ; SKYLAKE-NEXT: vtestpd (%rdi), %ymm0 # sched: [9:1.00]
4750 ; SKYLAKE-NEXT: adcl $0, %eax # sched: [1:0.50]
4751 ; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00]
4752 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
4754 ; SKX-LABEL: test_testpd_ymm:
4756 ; SKX-NEXT: xorl %eax, %eax # sched: [1:0.25]
4757 ; SKX-NEXT: vtestpd %ymm1, %ymm0 # sched: [2:1.00]
4758 ; SKX-NEXT: setb %al # sched: [1:0.50]
4759 ; SKX-NEXT: vtestpd (%rdi), %ymm0 # sched: [9:1.00]
4760 ; SKX-NEXT: adcl $0, %eax # sched: [1:0.50]
4761 ; SKX-NEXT: vzeroupper # sched: [4:1.00]
4762 ; SKX-NEXT: retq # sched: [7:1.00]
4764 ; BTVER2-LABEL: test_testpd_ymm:
4766 ; BTVER2-NEXT: xorl %eax, %eax # sched: [1:0.50]
4767 ; BTVER2-NEXT: vtestpd %ymm1, %ymm0 # sched: [4:2.00]
4768 ; BTVER2-NEXT: setb %al # sched: [1:0.50]
4769 ; BTVER2-NEXT: vtestpd (%rdi), %ymm0 # sched: [9:2.00]
4770 ; BTVER2-NEXT: adcl $0, %eax # sched: [1:0.50]
4771 ; BTVER2-NEXT: retq # sched: [4:1.00]
4773 ; ZNVER1-LABEL: test_testpd_ymm:
4775 ; ZNVER1-NEXT: xorl %eax, %eax # sched: [1:0.25]
4776 ; ZNVER1-NEXT: vtestpd %ymm1, %ymm0 # sched: [1:0.25]
4777 ; ZNVER1-NEXT: setb %al # sched: [1:0.25]
4778 ; ZNVER1-NEXT: vtestpd (%rdi), %ymm0 # sched: [8:0.50]
4779 ; ZNVER1-NEXT: adcl $0, %eax # sched: [1:0.25]
4780 ; ZNVER1-NEXT: vzeroupper # sched: [100:?]
4781 ; ZNVER1-NEXT: retq # sched: [1:0.50]
4782 %1 = call i32 @llvm.x86.avx.vtestc.pd.256(<4 x double> %a0, <4 x double> %a1)
4783 %2 = load <4 x double>, <4 x double> *%a2, align 32
4784 %3 = call i32 @llvm.x86.avx.vtestc.pd.256(<4 x double> %a0, <4 x double> %2)
4788 declare i32 @llvm.x86.avx.vtestc.pd.256(<4 x double>, <4 x double>) nounwind readnone
4790 define i32 @test_testps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
4791 ; GENERIC-LABEL: test_testps:
4793 ; GENERIC-NEXT: xorl %eax, %eax # sched: [1:0.33]
4794 ; GENERIC-NEXT: vtestps %xmm1, %xmm0 # sched: [1:1.00]
4795 ; GENERIC-NEXT: setb %al # sched: [1:0.50]
4796 ; GENERIC-NEXT: vtestps (%rdi), %xmm0 # sched: [7:1.00]
4797 ; GENERIC-NEXT: adcl $0, %eax # sched: [2:0.67]
4798 ; GENERIC-NEXT: retq # sched: [1:1.00]
4800 ; SANDY-LABEL: test_testps:
4802 ; SANDY-NEXT: xorl %eax, %eax # sched: [1:0.33]
4803 ; SANDY-NEXT: vtestps %xmm1, %xmm0 # sched: [1:1.00]
4804 ; SANDY-NEXT: setb %al # sched: [1:0.50]
4805 ; SANDY-NEXT: vtestps (%rdi), %xmm0 # sched: [7:1.00]
4806 ; SANDY-NEXT: adcl $0, %eax # sched: [2:0.67]
4807 ; SANDY-NEXT: retq # sched: [1:1.00]
4809 ; HASWELL-LABEL: test_testps:
4811 ; HASWELL-NEXT: xorl %eax, %eax # sched: [1:0.25]
4812 ; HASWELL-NEXT: vtestps %xmm1, %xmm0 # sched: [1:1.00]
4813 ; HASWELL-NEXT: setb %al # sched: [1:0.50]
4814 ; HASWELL-NEXT: vtestps (%rdi), %xmm0 # sched: [1:1.00]
4815 ; HASWELL-NEXT: adcl $0, %eax # sched: [2:0.50]
4816 ; HASWELL-NEXT: retq # sched: [2:1.00]
4818 ; BROADWELL-LABEL: test_testps:
4819 ; BROADWELL: # BB#0:
4820 ; BROADWELL-NEXT: xorl %eax, %eax # sched: [1:0.25]
4821 ; BROADWELL-NEXT: vtestps %xmm1, %xmm0 # sched: [1:1.00]
4822 ; BROADWELL-NEXT: setb %al # sched: [1:0.50]
4823 ; BROADWELL-NEXT: vtestps (%rdi), %xmm0 # sched: [6:1.00]
4824 ; BROADWELL-NEXT: adcl $0, %eax # sched: [1:0.50]
4825 ; BROADWELL-NEXT: retq # sched: [7:1.00]
4827 ; SKYLAKE-LABEL: test_testps:
4829 ; SKYLAKE-NEXT: xorl %eax, %eax # sched: [1:0.25]
4830 ; SKYLAKE-NEXT: vtestps %xmm1, %xmm0 # sched: [2:1.00]
4831 ; SKYLAKE-NEXT: setb %al # sched: [1:0.50]
4832 ; SKYLAKE-NEXT: vtestps (%rdi), %xmm0 # sched: [8:1.00]
4833 ; SKYLAKE-NEXT: adcl $0, %eax # sched: [1:0.50]
4834 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
4836 ; SKX-LABEL: test_testps:
4838 ; SKX-NEXT: xorl %eax, %eax # sched: [1:0.25]
4839 ; SKX-NEXT: vtestps %xmm1, %xmm0 # sched: [2:1.00]
4840 ; SKX-NEXT: setb %al # sched: [1:0.50]
4841 ; SKX-NEXT: vtestps (%rdi), %xmm0 # sched: [8:1.00]
4842 ; SKX-NEXT: adcl $0, %eax # sched: [1:0.50]
4843 ; SKX-NEXT: retq # sched: [7:1.00]
4845 ; BTVER2-LABEL: test_testps:
4847 ; BTVER2-NEXT: xorl %eax, %eax # sched: [1:0.50]
4848 ; BTVER2-NEXT: vtestps %xmm1, %xmm0 # sched: [3:1.00]
4849 ; BTVER2-NEXT: setb %al # sched: [1:0.50]
4850 ; BTVER2-NEXT: vtestps (%rdi), %xmm0 # sched: [8:1.00]
4851 ; BTVER2-NEXT: adcl $0, %eax # sched: [1:0.50]
4852 ; BTVER2-NEXT: retq # sched: [4:1.00]
4854 ; ZNVER1-LABEL: test_testps:
4856 ; ZNVER1-NEXT: xorl %eax, %eax # sched: [1:0.25]
4857 ; ZNVER1-NEXT: vtestps %xmm1, %xmm0 # sched: [1:0.25]
4858 ; ZNVER1-NEXT: setb %al # sched: [1:0.25]
4859 ; ZNVER1-NEXT: vtestps (%rdi), %xmm0 # sched: [8:0.50]
4860 ; ZNVER1-NEXT: adcl $0, %eax # sched: [1:0.25]
4861 ; ZNVER1-NEXT: retq # sched: [1:0.50]
4862 %1 = call i32 @llvm.x86.avx.vtestc.ps(<4 x float> %a0, <4 x float> %a1)
4863 %2 = load <4 x float>, <4 x float> *%a2, align 16
4864 %3 = call i32 @llvm.x86.avx.vtestc.ps(<4 x float> %a0, <4 x float> %2)
4868 declare i32 @llvm.x86.avx.vtestc.ps(<4 x float>, <4 x float>) nounwind readnone
4870 define i32 @test_testps_ymm(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) {
4871 ; GENERIC-LABEL: test_testps_ymm:
4873 ; GENERIC-NEXT: xorl %eax, %eax # sched: [1:0.33]
4874 ; GENERIC-NEXT: vtestps %ymm1, %ymm0 # sched: [1:1.00]
4875 ; GENERIC-NEXT: setb %al # sched: [1:0.50]
4876 ; GENERIC-NEXT: vtestps (%rdi), %ymm0 # sched: [8:1.00]
4877 ; GENERIC-NEXT: adcl $0, %eax # sched: [2:0.67]
4878 ; GENERIC-NEXT: vzeroupper
4879 ; GENERIC-NEXT: retq # sched: [1:1.00]
4881 ; SANDY-LABEL: test_testps_ymm:
4883 ; SANDY-NEXT: xorl %eax, %eax # sched: [1:0.33]
4884 ; SANDY-NEXT: vtestps %ymm1, %ymm0 # sched: [1:1.00]
4885 ; SANDY-NEXT: setb %al # sched: [1:0.50]
4886 ; SANDY-NEXT: vtestps (%rdi), %ymm0 # sched: [8:1.00]
4887 ; SANDY-NEXT: adcl $0, %eax # sched: [2:0.67]
4888 ; SANDY-NEXT: vzeroupper
4889 ; SANDY-NEXT: retq # sched: [1:1.00]
4891 ; HASWELL-LABEL: test_testps_ymm:
4893 ; HASWELL-NEXT: xorl %eax, %eax # sched: [1:0.25]
4894 ; HASWELL-NEXT: vtestps %ymm1, %ymm0 # sched: [1:1.00]
4895 ; HASWELL-NEXT: setb %al # sched: [1:0.50]
4896 ; HASWELL-NEXT: vtestps (%rdi), %ymm0 # sched: [1:1.00]
4897 ; HASWELL-NEXT: adcl $0, %eax # sched: [2:0.50]
4898 ; HASWELL-NEXT: vzeroupper # sched: [4:1.00]
4899 ; HASWELL-NEXT: retq # sched: [2:1.00]
4901 ; BROADWELL-LABEL: test_testps_ymm:
4902 ; BROADWELL: # BB#0:
4903 ; BROADWELL-NEXT: xorl %eax, %eax # sched: [1:0.25]
4904 ; BROADWELL-NEXT: vtestps %ymm1, %ymm0 # sched: [1:1.00]
4905 ; BROADWELL-NEXT: setb %al # sched: [1:0.50]
4906 ; BROADWELL-NEXT: vtestps (%rdi), %ymm0 # sched: [7:1.00]
4907 ; BROADWELL-NEXT: adcl $0, %eax # sched: [1:0.50]
4908 ; BROADWELL-NEXT: vzeroupper # sched: [4:1.00]
4909 ; BROADWELL-NEXT: retq # sched: [7:1.00]
4911 ; SKYLAKE-LABEL: test_testps_ymm:
4913 ; SKYLAKE-NEXT: xorl %eax, %eax # sched: [1:0.25]
4914 ; SKYLAKE-NEXT: vtestps %ymm1, %ymm0 # sched: [2:1.00]
4915 ; SKYLAKE-NEXT: setb %al # sched: [1:0.50]
4916 ; SKYLAKE-NEXT: vtestps (%rdi), %ymm0 # sched: [9:1.00]
4917 ; SKYLAKE-NEXT: adcl $0, %eax # sched: [1:0.50]
4918 ; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00]
4919 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
4921 ; SKX-LABEL: test_testps_ymm:
4923 ; SKX-NEXT: xorl %eax, %eax # sched: [1:0.25]
4924 ; SKX-NEXT: vtestps %ymm1, %ymm0 # sched: [2:1.00]
4925 ; SKX-NEXT: setb %al # sched: [1:0.50]
4926 ; SKX-NEXT: vtestps (%rdi), %ymm0 # sched: [9:1.00]
4927 ; SKX-NEXT: adcl $0, %eax # sched: [1:0.50]
4928 ; SKX-NEXT: vzeroupper # sched: [4:1.00]
4929 ; SKX-NEXT: retq # sched: [7:1.00]
4931 ; BTVER2-LABEL: test_testps_ymm:
4933 ; BTVER2-NEXT: xorl %eax, %eax # sched: [1:0.50]
4934 ; BTVER2-NEXT: vtestps %ymm1, %ymm0 # sched: [4:2.00]
4935 ; BTVER2-NEXT: setb %al # sched: [1:0.50]
4936 ; BTVER2-NEXT: vtestps (%rdi), %ymm0 # sched: [9:2.00]
4937 ; BTVER2-NEXT: adcl $0, %eax # sched: [1:0.50]
4938 ; BTVER2-NEXT: retq # sched: [4:1.00]
4940 ; ZNVER1-LABEL: test_testps_ymm:
4942 ; ZNVER1-NEXT: xorl %eax, %eax # sched: [1:0.25]
4943 ; ZNVER1-NEXT: vtestps %ymm1, %ymm0 # sched: [1:0.25]
4944 ; ZNVER1-NEXT: setb %al # sched: [1:0.25]
4945 ; ZNVER1-NEXT: vtestps (%rdi), %ymm0 # sched: [8:0.50]
4946 ; ZNVER1-NEXT: adcl $0, %eax # sched: [1:0.25]
4947 ; ZNVER1-NEXT: vzeroupper # sched: [100:?]
4948 ; ZNVER1-NEXT: retq # sched: [1:0.50]
4949 %1 = call i32 @llvm.x86.avx.vtestc.ps.256(<8 x float> %a0, <8 x float> %a1)
4950 %2 = load <8 x float>, <8 x float> *%a2, align 32
4951 %3 = call i32 @llvm.x86.avx.vtestc.ps.256(<8 x float> %a0, <8 x float> %2)
4955 declare i32 @llvm.x86.avx.vtestc.ps.256(<8 x float>, <8 x float>) nounwind readnone
4957 define <4 x double> @test_unpckhpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) {
4958 ; GENERIC-LABEL: test_unpckhpd:
4960 ; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00]
4961 ; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm1 = ymm1[1],mem[1],ymm1[3],mem[3] sched: [8:1.00]
4962 ; GENERIC-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4963 ; GENERIC-NEXT: retq # sched: [1:1.00]
4965 ; SANDY-LABEL: test_unpckhpd:
4967 ; SANDY-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00]
4968 ; SANDY-NEXT: vunpckhpd {{.*#+}} ymm1 = ymm1[1],mem[1],ymm1[3],mem[3] sched: [8:1.00]
4969 ; SANDY-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4970 ; SANDY-NEXT: retq # sched: [1:1.00]
4972 ; HASWELL-LABEL: test_unpckhpd:
4974 ; HASWELL-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00]
4975 ; HASWELL-NEXT: vunpckhpd {{.*#+}} ymm1 = ymm1[1],mem[1],ymm1[3],mem[3] sched: [1:1.00]
4976 ; HASWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4977 ; HASWELL-NEXT: retq # sched: [2:1.00]
4979 ; BROADWELL-LABEL: test_unpckhpd:
4980 ; BROADWELL: # BB#0:
4981 ; BROADWELL-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00]
4982 ; BROADWELL-NEXT: vunpckhpd {{.*#+}} ymm1 = ymm1[1],mem[1],ymm1[3],mem[3] sched: [7:1.00]
4983 ; BROADWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
4984 ; BROADWELL-NEXT: retq # sched: [7:1.00]
4986 ; SKYLAKE-LABEL: test_unpckhpd:
4988 ; SKYLAKE-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00]
4989 ; SKYLAKE-NEXT: vunpckhpd {{.*#+}} ymm1 = ymm1[1],mem[1],ymm1[3],mem[3] sched: [8:1.00]
4990 ; SKYLAKE-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
4991 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
4993 ; SKX-LABEL: test_unpckhpd:
4995 ; SKX-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00]
4996 ; SKX-NEXT: vunpckhpd {{.*#+}} ymm1 = ymm1[1],mem[1],ymm1[3],mem[3] sched: [8:1.00]
4997 ; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
4998 ; SKX-NEXT: retq # sched: [7:1.00]
5000 ; BTVER2-LABEL: test_unpckhpd:
5002 ; BTVER2-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:0.50]
5003 ; BTVER2-NEXT: vunpckhpd {{.*#+}} ymm1 = ymm1[1],mem[1],ymm1[3],mem[3] sched: [6:1.00]
5004 ; BTVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
5005 ; BTVER2-NEXT: retq # sched: [4:1.00]
5007 ; ZNVER1-LABEL: test_unpckhpd:
5009 ; ZNVER1-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:0.50]
5010 ; ZNVER1-NEXT: vunpckhpd {{.*#+}} ymm1 = ymm1[1],mem[1],ymm1[3],mem[3] sched: [8:0.50]
5011 ; ZNVER1-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
5012 ; ZNVER1-NEXT: retq # sched: [1:0.50]
5013 %1 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
5014 %2 = load <4 x double>, <4 x double> *%a2, align 32
5015 %3 = shufflevector <4 x double> %a1, <4 x double> %2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
5016 %4 = fadd <4 x double> %1, %3
5020 define <8 x float> @test_unpckhps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) nounwind {
5021 ; GENERIC-LABEL: test_unpckhps:
5023 ; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00]
5024 ; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00]
5025 ; GENERIC-NEXT: retq # sched: [1:1.00]
5027 ; SANDY-LABEL: test_unpckhps:
5029 ; SANDY-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00]
5030 ; SANDY-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00]
5031 ; SANDY-NEXT: retq # sched: [1:1.00]
5033 ; HASWELL-LABEL: test_unpckhps:
5035 ; HASWELL-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00]
5036 ; HASWELL-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [1:1.00]
5037 ; HASWELL-NEXT: retq # sched: [2:1.00]
5039 ; BROADWELL-LABEL: test_unpckhps:
5040 ; BROADWELL: # BB#0:
5041 ; BROADWELL-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00]
5042 ; BROADWELL-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [7:1.00]
5043 ; BROADWELL-NEXT: retq # sched: [7:1.00]
5045 ; SKYLAKE-LABEL: test_unpckhps:
5047 ; SKYLAKE-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00]
5048 ; SKYLAKE-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00]
5049 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
5051 ; SKX-LABEL: test_unpckhps:
5053 ; SKX-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00]
5054 ; SKX-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00]
5055 ; SKX-NEXT: retq # sched: [7:1.00]
5057 ; BTVER2-LABEL: test_unpckhps:
5059 ; BTVER2-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:0.50]
5060 ; BTVER2-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [6:1.00]
5061 ; BTVER2-NEXT: retq # sched: [4:1.00]
5063 ; ZNVER1-LABEL: test_unpckhps:
5065 ; ZNVER1-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:0.50]
5066 ; ZNVER1-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:0.50]
5067 ; ZNVER1-NEXT: retq # sched: [1:0.50]
5068 %1 = shufflevector <8 x float> %a0, <8 x float> %a1, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
5069 %2 = load <8 x float>, <8 x float> *%a2, align 32
5070 %3 = shufflevector <8 x float> %1, <8 x float> %2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
5074 define <4 x double> @test_unpcklpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) {
5075 ; GENERIC-LABEL: test_unpcklpd:
5077 ; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00]
5078 ; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm1 = ymm1[0],mem[0],ymm1[2],mem[2] sched: [8:1.00]
5079 ; GENERIC-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
5080 ; GENERIC-NEXT: retq # sched: [1:1.00]
5082 ; SANDY-LABEL: test_unpcklpd:
5084 ; SANDY-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00]
5085 ; SANDY-NEXT: vunpcklpd {{.*#+}} ymm1 = ymm1[0],mem[0],ymm1[2],mem[2] sched: [8:1.00]
5086 ; SANDY-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
5087 ; SANDY-NEXT: retq # sched: [1:1.00]
5089 ; HASWELL-LABEL: test_unpcklpd:
5091 ; HASWELL-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00]
5092 ; HASWELL-NEXT: vunpcklpd {{.*#+}} ymm1 = ymm1[0],mem[0],ymm1[2],mem[2] sched: [1:1.00]
5093 ; HASWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
5094 ; HASWELL-NEXT: retq # sched: [2:1.00]
5096 ; BROADWELL-LABEL: test_unpcklpd:
5097 ; BROADWELL: # BB#0:
5098 ; BROADWELL-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00]
5099 ; BROADWELL-NEXT: vunpcklpd {{.*#+}} ymm1 = ymm1[0],mem[0],ymm1[2],mem[2] sched: [7:1.00]
5100 ; BROADWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
5101 ; BROADWELL-NEXT: retq # sched: [7:1.00]
5103 ; SKYLAKE-LABEL: test_unpcklpd:
5105 ; SKYLAKE-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00]
5106 ; SKYLAKE-NEXT: vunpcklpd {{.*#+}} ymm1 = ymm1[0],mem[0],ymm1[2],mem[2] sched: [8:1.00]
5107 ; SKYLAKE-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
5108 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
5110 ; SKX-LABEL: test_unpcklpd:
5112 ; SKX-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00]
5113 ; SKX-NEXT: vunpcklpd {{.*#+}} ymm1 = ymm1[0],mem[0],ymm1[2],mem[2] sched: [8:1.00]
5114 ; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.33]
5115 ; SKX-NEXT: retq # sched: [7:1.00]
5117 ; BTVER2-LABEL: test_unpcklpd:
5119 ; BTVER2-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:0.50]
5120 ; BTVER2-NEXT: vunpcklpd {{.*#+}} ymm1 = ymm1[0],mem[0],ymm1[2],mem[2] sched: [6:1.00]
5121 ; BTVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
5122 ; BTVER2-NEXT: retq # sched: [4:1.00]
5124 ; ZNVER1-LABEL: test_unpcklpd:
5126 ; ZNVER1-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:0.50]
5127 ; ZNVER1-NEXT: vunpcklpd {{.*#+}} ymm1 = ymm1[0],mem[0],ymm1[2],mem[2] sched: [8:0.50]
5128 ; ZNVER1-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
5129 ; ZNVER1-NEXT: retq # sched: [1:0.50]
5130 %1 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
5131 %2 = load <4 x double>, <4 x double> *%a2, align 32
5132 %3 = shufflevector <4 x double> %a1, <4 x double> %2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
5133 %4 = fadd <4 x double> %1, %3
5137 define <8 x float> @test_unpcklps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) nounwind {
5138 ; GENERIC-LABEL: test_unpcklps:
5140 ; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00]
5141 ; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00]
5142 ; GENERIC-NEXT: retq # sched: [1:1.00]
5144 ; SANDY-LABEL: test_unpcklps:
5146 ; SANDY-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00]
5147 ; SANDY-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00]
5148 ; SANDY-NEXT: retq # sched: [1:1.00]
5150 ; HASWELL-LABEL: test_unpcklps:
5152 ; HASWELL-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00]
5153 ; HASWELL-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [1:1.00]
5154 ; HASWELL-NEXT: retq # sched: [2:1.00]
5156 ; BROADWELL-LABEL: test_unpcklps:
5157 ; BROADWELL: # BB#0:
5158 ; BROADWELL-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00]
5159 ; BROADWELL-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [7:1.00]
5160 ; BROADWELL-NEXT: retq # sched: [7:1.00]
5162 ; SKYLAKE-LABEL: test_unpcklps:
5164 ; SKYLAKE-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00]
5165 ; SKYLAKE-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00]
5166 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
5168 ; SKX-LABEL: test_unpcklps:
5170 ; SKX-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00]
5171 ; SKX-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00]
5172 ; SKX-NEXT: retq # sched: [7:1.00]
5174 ; BTVER2-LABEL: test_unpcklps:
5176 ; BTVER2-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:0.50]
5177 ; BTVER2-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [6:1.00]
5178 ; BTVER2-NEXT: retq # sched: [4:1.00]
5180 ; ZNVER1-LABEL: test_unpcklps:
5182 ; ZNVER1-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:0.50]
5183 ; ZNVER1-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:0.50]
5184 ; ZNVER1-NEXT: retq # sched: [1:0.50]
5185 %1 = shufflevector <8 x float> %a0, <8 x float> %a1, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
5186 %2 = load <8 x float>, <8 x float> *%a2, align 32
5187 %3 = shufflevector <8 x float> %1, <8 x float> %2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
5191 define <4 x double> @test_xorpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) {
5192 ; GENERIC-LABEL: test_xorpd:
5194 ; GENERIC-NEXT: vxorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
5195 ; GENERIC-NEXT: vxorpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
5196 ; GENERIC-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
5197 ; GENERIC-NEXT: retq # sched: [1:1.00]
5199 ; SANDY-LABEL: test_xorpd:
5201 ; SANDY-NEXT: vxorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
5202 ; SANDY-NEXT: vxorpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
5203 ; SANDY-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
5204 ; SANDY-NEXT: retq # sched: [1:1.00]
5206 ; HASWELL-LABEL: test_xorpd:
5208 ; HASWELL-NEXT: vxorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
5209 ; HASWELL-NEXT: vxorpd (%rdi), %ymm0, %ymm0 # sched: [1:1.00]
5210 ; HASWELL-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
5211 ; HASWELL-NEXT: retq # sched: [2:1.00]
5213 ; BROADWELL-LABEL: test_xorpd:
5214 ; BROADWELL: # BB#0:
5215 ; BROADWELL-NEXT: vxorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
5216 ; BROADWELL-NEXT: vxorpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
5217 ; BROADWELL-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
5218 ; BROADWELL-NEXT: retq # sched: [7:1.00]
5220 ; SKYLAKE-LABEL: test_xorpd:
5222 ; SKYLAKE-NEXT: vxorpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
5223 ; SKYLAKE-NEXT: vxorpd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5224 ; SKYLAKE-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
5225 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
5227 ; SKX-LABEL: test_xorpd:
5229 ; SKX-NEXT: vxorpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
5230 ; SKX-NEXT: vxorpd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5231 ; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.33]
5232 ; SKX-NEXT: retq # sched: [7:1.00]
5234 ; BTVER2-LABEL: test_xorpd:
5236 ; BTVER2-NEXT: vxorpd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5237 ; BTVER2-NEXT: vxorpd (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
5238 ; BTVER2-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:2.00]
5239 ; BTVER2-NEXT: retq # sched: [4:1.00]
5241 ; ZNVER1-LABEL: test_xorpd:
5243 ; ZNVER1-NEXT: vxorpd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
5244 ; ZNVER1-NEXT: vxorpd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5245 ; ZNVER1-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
5246 ; ZNVER1-NEXT: retq # sched: [1:0.50]
5247 %1 = bitcast <4 x double> %a0 to <4 x i64>
5248 %2 = bitcast <4 x double> %a1 to <4 x i64>
5249 %3 = xor <4 x i64> %1, %2
5250 %4 = load <4 x double>, <4 x double> *%a2, align 32
5251 %5 = bitcast <4 x double> %4 to <4 x i64>
5252 %6 = xor <4 x i64> %3, %5
5253 %7 = bitcast <4 x i64> %6 to <4 x double>
5254 %8 = fadd <4 x double> %a1, %7
5258 define <8 x float> @test_xorps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) {
5259 ; GENERIC-LABEL: test_xorps:
5261 ; GENERIC-NEXT: vxorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
5262 ; GENERIC-NEXT: vxorps (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
5263 ; GENERIC-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
5264 ; GENERIC-NEXT: retq # sched: [1:1.00]
5266 ; SANDY-LABEL: test_xorps:
5268 ; SANDY-NEXT: vxorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
5269 ; SANDY-NEXT: vxorps (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
5270 ; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
5271 ; SANDY-NEXT: retq # sched: [1:1.00]
5273 ; HASWELL-LABEL: test_xorps:
5275 ; HASWELL-NEXT: vxorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
5276 ; HASWELL-NEXT: vxorps (%rdi), %ymm0, %ymm0 # sched: [1:1.00]
5277 ; HASWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
5278 ; HASWELL-NEXT: retq # sched: [2:1.00]
5280 ; BROADWELL-LABEL: test_xorps:
5281 ; BROADWELL: # BB#0:
5282 ; BROADWELL-NEXT: vxorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
5283 ; BROADWELL-NEXT: vxorps (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
5284 ; BROADWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
5285 ; BROADWELL-NEXT: retq # sched: [7:1.00]
5287 ; SKYLAKE-LABEL: test_xorps:
5289 ; SKYLAKE-NEXT: vxorps %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
5290 ; SKYLAKE-NEXT: vxorps (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5291 ; SKYLAKE-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
5292 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
5294 ; SKX-LABEL: test_xorps:
5296 ; SKX-NEXT: vxorps %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
5297 ; SKX-NEXT: vxorps (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5298 ; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.33]
5299 ; SKX-NEXT: retq # sched: [7:1.00]
5301 ; BTVER2-LABEL: test_xorps:
5303 ; BTVER2-NEXT: vxorps %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
5304 ; BTVER2-NEXT: vxorps (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
5305 ; BTVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:2.00]
5306 ; BTVER2-NEXT: retq # sched: [4:1.00]
5308 ; ZNVER1-LABEL: test_xorps:
5310 ; ZNVER1-NEXT: vxorps %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
5311 ; ZNVER1-NEXT: vxorps (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
5312 ; ZNVER1-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
5313 ; ZNVER1-NEXT: retq # sched: [1:0.50]
5314 %1 = bitcast <8 x float> %a0 to <4 x i64>
5315 %2 = bitcast <8 x float> %a1 to <4 x i64>
5316 %3 = xor <4 x i64> %1, %2
5317 %4 = load <8 x float>, <8 x float> *%a2, align 32
5318 %5 = bitcast <8 x float> %4 to <4 x i64>
5319 %6 = xor <4 x i64> %3, %5
5320 %7 = bitcast <4 x i64> %6 to <8 x float>
5321 %8 = fadd <8 x float> %a1, %7
5325 define void @test_zeroall() {
5326 ; GENERIC-LABEL: test_zeroall:
5328 ; GENERIC-NEXT: vzeroall
5329 ; GENERIC-NEXT: retq # sched: [1:1.00]
5331 ; SANDY-LABEL: test_zeroall:
5333 ; SANDY-NEXT: vzeroall
5334 ; SANDY-NEXT: retq # sched: [1:1.00]
5336 ; HASWELL-LABEL: test_zeroall:
5338 ; HASWELL-NEXT: vzeroall # sched: [16:16.00]
5339 ; HASWELL-NEXT: retq # sched: [2:1.00]
5341 ; BROADWELL-LABEL: test_zeroall:
5342 ; BROADWELL: # BB#0:
5343 ; BROADWELL-NEXT: vzeroall # sched: [16:16.00]
5344 ; BROADWELL-NEXT: retq # sched: [7:1.00]
5346 ; SKYLAKE-LABEL: test_zeroall:
5348 ; SKYLAKE-NEXT: vzeroall # sched: [16:4.00]
5349 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
5351 ; SKX-LABEL: test_zeroall:
5353 ; SKX-NEXT: vzeroall # sched: [16:4.00]
5354 ; SKX-NEXT: retq # sched: [7:1.00]
5356 ; BTVER2-LABEL: test_zeroall:
5358 ; BTVER2-NEXT: vzeroall # sched: [90:?]
5359 ; BTVER2-NEXT: retq # sched: [4:1.00]
5361 ; ZNVER1-LABEL: test_zeroall:
5363 ; ZNVER1-NEXT: vzeroall # sched: [100:?]
5364 ; ZNVER1-NEXT: retq # sched: [1:0.50]
5365 call void @llvm.x86.avx.vzeroall()
5368 declare void @llvm.x86.avx.vzeroall() nounwind
5370 define void @test_zeroupper() {
5371 ; GENERIC-LABEL: test_zeroupper:
5373 ; GENERIC-NEXT: vzeroupper
5374 ; GENERIC-NEXT: retq # sched: [1:1.00]
5376 ; SANDY-LABEL: test_zeroupper:
5378 ; SANDY-NEXT: vzeroupper
5379 ; SANDY-NEXT: retq # sched: [1:1.00]
5381 ; HASWELL-LABEL: test_zeroupper:
5383 ; HASWELL-NEXT: vzeroupper # sched: [4:1.00]
5384 ; HASWELL-NEXT: retq # sched: [2:1.00]
5386 ; BROADWELL-LABEL: test_zeroupper:
5387 ; BROADWELL: # BB#0:
5388 ; BROADWELL-NEXT: vzeroupper # sched: [4:1.00]
5389 ; BROADWELL-NEXT: retq # sched: [7:1.00]
5391 ; SKYLAKE-LABEL: test_zeroupper:
5393 ; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00]
5394 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
5396 ; SKX-LABEL: test_zeroupper:
5398 ; SKX-NEXT: vzeroupper # sched: [4:1.00]
5399 ; SKX-NEXT: retq # sched: [7:1.00]
5401 ; BTVER2-LABEL: test_zeroupper:
5403 ; BTVER2-NEXT: vzeroupper # sched: [46:?]
5404 ; BTVER2-NEXT: retq # sched: [4:1.00]
5406 ; ZNVER1-LABEL: test_zeroupper:
5408 ; ZNVER1-NEXT: vzeroupper # sched: [100:?]
5409 ; ZNVER1-NEXT: retq # sched: [1:0.50]
5410 call void @llvm.x86.avx.vzeroupper()
5413 declare void @llvm.x86.avx.vzeroupper() nounwind