1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+sse3 | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=atom | FileCheck %s --check-prefix=CHECK --check-prefix=ATOM
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm | FileCheck %s --check-prefix=CHECK --check-prefix=SLM
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL
9 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE
10 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
11 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
12 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
14 define <2 x double> @test_addsubpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
15 ; GENERIC-LABEL: test_addsubpd:
17 ; GENERIC-NEXT: addsubpd %xmm1, %xmm0 # sched: [3:1.00]
18 ; GENERIC-NEXT: addsubpd (%rdi), %xmm0 # sched: [9:1.00]
19 ; GENERIC-NEXT: retq # sched: [1:1.00]
21 ; ATOM-LABEL: test_addsubpd:
23 ; ATOM-NEXT: addsubpd %xmm1, %xmm0 # sched: [6:3.00]
24 ; ATOM-NEXT: addsubpd (%rdi), %xmm0 # sched: [6:3.00]
25 ; ATOM-NEXT: retq # sched: [79:39.50]
27 ; SLM-LABEL: test_addsubpd:
29 ; SLM-NEXT: addsubpd %xmm1, %xmm0 # sched: [3:1.00]
30 ; SLM-NEXT: addsubpd (%rdi), %xmm0 # sched: [6:1.00]
31 ; SLM-NEXT: retq # sched: [4:1.00]
33 ; SANDY-LABEL: test_addsubpd:
35 ; SANDY-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
36 ; SANDY-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
37 ; SANDY-NEXT: retq # sched: [1:1.00]
39 ; HASWELL-LABEL: test_addsubpd:
41 ; HASWELL-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
42 ; HASWELL-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
43 ; HASWELL-NEXT: retq # sched: [2:1.00]
45 ; BROADWELL-LABEL: test_addsubpd:
47 ; BROADWELL-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
48 ; BROADWELL-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
49 ; BROADWELL-NEXT: retq # sched: [7:1.00]
51 ; SKYLAKE-LABEL: test_addsubpd:
53 ; SKYLAKE-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
54 ; SKYLAKE-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
55 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
57 ; SKX-LABEL: test_addsubpd:
59 ; SKX-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
60 ; SKX-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
61 ; SKX-NEXT: retq # sched: [7:1.00]
63 ; BTVER2-LABEL: test_addsubpd:
65 ; BTVER2-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
66 ; BTVER2-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
67 ; BTVER2-NEXT: retq # sched: [4:1.00]
69 ; ZNVER1-LABEL: test_addsubpd:
71 ; ZNVER1-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
72 ; ZNVER1-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
73 ; ZNVER1-NEXT: retq # sched: [1:0.50]
74 %1 = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %a0, <2 x double> %a1)
75 %2 = load <2 x double>, <2 x double> *%a2, align 16
76 %3 = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %1, <2 x double> %2)
79 declare <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double>, <2 x double>) nounwind readnone
81 define <4 x float> @test_addsubps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
82 ; GENERIC-LABEL: test_addsubps:
84 ; GENERIC-NEXT: addsubps %xmm1, %xmm0 # sched: [3:1.00]
85 ; GENERIC-NEXT: addsubps (%rdi), %xmm0 # sched: [9:1.00]
86 ; GENERIC-NEXT: retq # sched: [1:1.00]
88 ; ATOM-LABEL: test_addsubps:
90 ; ATOM-NEXT: addsubps %xmm1, %xmm0 # sched: [5:5.00]
91 ; ATOM-NEXT: addsubps (%rdi), %xmm0 # sched: [5:5.00]
92 ; ATOM-NEXT: retq # sched: [79:39.50]
94 ; SLM-LABEL: test_addsubps:
96 ; SLM-NEXT: addsubps %xmm1, %xmm0 # sched: [3:1.00]
97 ; SLM-NEXT: addsubps (%rdi), %xmm0 # sched: [6:1.00]
98 ; SLM-NEXT: retq # sched: [4:1.00]
100 ; SANDY-LABEL: test_addsubps:
102 ; SANDY-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
103 ; SANDY-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
104 ; SANDY-NEXT: retq # sched: [1:1.00]
106 ; HASWELL-LABEL: test_addsubps:
108 ; HASWELL-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
109 ; HASWELL-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
110 ; HASWELL-NEXT: retq # sched: [2:1.00]
112 ; BROADWELL-LABEL: test_addsubps:
114 ; BROADWELL-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
115 ; BROADWELL-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
116 ; BROADWELL-NEXT: retq # sched: [7:1.00]
118 ; SKYLAKE-LABEL: test_addsubps:
120 ; SKYLAKE-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
121 ; SKYLAKE-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
122 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
124 ; SKX-LABEL: test_addsubps:
126 ; SKX-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
127 ; SKX-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
128 ; SKX-NEXT: retq # sched: [7:1.00]
130 ; BTVER2-LABEL: test_addsubps:
132 ; BTVER2-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
133 ; BTVER2-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
134 ; BTVER2-NEXT: retq # sched: [4:1.00]
136 ; ZNVER1-LABEL: test_addsubps:
138 ; ZNVER1-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
139 ; ZNVER1-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
140 ; ZNVER1-NEXT: retq # sched: [1:0.50]
141 %1 = call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %a0, <4 x float> %a1)
142 %2 = load <4 x float>, <4 x float> *%a2, align 16
143 %3 = call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %1, <4 x float> %2)
146 declare <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float>, <4 x float>) nounwind readnone
148 define <2 x double> @test_haddpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
149 ; GENERIC-LABEL: test_haddpd:
151 ; GENERIC-NEXT: haddpd %xmm1, %xmm0 # sched: [5:2.00]
152 ; GENERIC-NEXT: haddpd (%rdi), %xmm0 # sched: [11:2.00]
153 ; GENERIC-NEXT: retq # sched: [1:1.00]
155 ; ATOM-LABEL: test_haddpd:
157 ; ATOM-NEXT: haddpd %xmm1, %xmm0 # sched: [8:4.00]
158 ; ATOM-NEXT: haddpd (%rdi), %xmm0 # sched: [9:4.50]
159 ; ATOM-NEXT: retq # sched: [79:39.50]
161 ; SLM-LABEL: test_haddpd:
163 ; SLM-NEXT: haddpd %xmm1, %xmm0 # sched: [3:1.00]
164 ; SLM-NEXT: haddpd (%rdi), %xmm0 # sched: [6:1.00]
165 ; SLM-NEXT: retq # sched: [4:1.00]
167 ; SANDY-LABEL: test_haddpd:
169 ; SANDY-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
170 ; SANDY-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
171 ; SANDY-NEXT: retq # sched: [1:1.00]
173 ; HASWELL-LABEL: test_haddpd:
175 ; HASWELL-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
176 ; HASWELL-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [5:2.00]
177 ; HASWELL-NEXT: retq # sched: [2:1.00]
179 ; BROADWELL-LABEL: test_haddpd:
181 ; BROADWELL-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
182 ; BROADWELL-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [10:2.00]
183 ; BROADWELL-NEXT: retq # sched: [7:1.00]
185 ; SKYLAKE-LABEL: test_haddpd:
187 ; SKYLAKE-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [6:2.00]
188 ; SKYLAKE-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [12:2.00]
189 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
191 ; SKX-LABEL: test_haddpd:
193 ; SKX-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [6:2.00]
194 ; SKX-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [12:2.00]
195 ; SKX-NEXT: retq # sched: [7:1.00]
197 ; BTVER2-LABEL: test_haddpd:
199 ; BTVER2-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
200 ; BTVER2-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
201 ; BTVER2-NEXT: retq # sched: [4:1.00]
203 ; ZNVER1-LABEL: test_haddpd:
205 ; ZNVER1-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [100:?]
206 ; ZNVER1-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [100:?]
207 ; ZNVER1-NEXT: retq # sched: [1:0.50]
208 %1 = call <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double> %a0, <2 x double> %a1)
209 %2 = load <2 x double>, <2 x double> *%a2, align 16
210 %3 = call <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double> %1, <2 x double> %2)
213 declare <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double>, <2 x double>) nounwind readnone
215 define <4 x float> @test_haddps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
216 ; GENERIC-LABEL: test_haddps:
218 ; GENERIC-NEXT: haddps %xmm1, %xmm0 # sched: [5:2.00]
219 ; GENERIC-NEXT: haddps (%rdi), %xmm0 # sched: [11:2.00]
220 ; GENERIC-NEXT: retq # sched: [1:1.00]
222 ; ATOM-LABEL: test_haddps:
224 ; ATOM-NEXT: haddps %xmm1, %xmm0 # sched: [8:4.00]
225 ; ATOM-NEXT: haddps (%rdi), %xmm0 # sched: [9:4.50]
226 ; ATOM-NEXT: retq # sched: [79:39.50]
228 ; SLM-LABEL: test_haddps:
230 ; SLM-NEXT: haddps %xmm1, %xmm0 # sched: [3:1.00]
231 ; SLM-NEXT: haddps (%rdi), %xmm0 # sched: [6:1.00]
232 ; SLM-NEXT: retq # sched: [4:1.00]
234 ; SANDY-LABEL: test_haddps:
236 ; SANDY-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
237 ; SANDY-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
238 ; SANDY-NEXT: retq # sched: [1:1.00]
240 ; HASWELL-LABEL: test_haddps:
242 ; HASWELL-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
243 ; HASWELL-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [5:2.00]
244 ; HASWELL-NEXT: retq # sched: [2:1.00]
246 ; BROADWELL-LABEL: test_haddps:
248 ; BROADWELL-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
249 ; BROADWELL-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [10:2.00]
250 ; BROADWELL-NEXT: retq # sched: [7:1.00]
252 ; SKYLAKE-LABEL: test_haddps:
254 ; SKYLAKE-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [6:2.00]
255 ; SKYLAKE-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [12:2.00]
256 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
258 ; SKX-LABEL: test_haddps:
260 ; SKX-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [6:2.00]
261 ; SKX-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [12:2.00]
262 ; SKX-NEXT: retq # sched: [7:1.00]
264 ; BTVER2-LABEL: test_haddps:
266 ; BTVER2-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
267 ; BTVER2-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
268 ; BTVER2-NEXT: retq # sched: [4:1.00]
270 ; ZNVER1-LABEL: test_haddps:
272 ; ZNVER1-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [100:?]
273 ; ZNVER1-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [100:?]
274 ; ZNVER1-NEXT: retq # sched: [1:0.50]
275 %1 = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %a0, <4 x float> %a1)
276 %2 = load <4 x float>, <4 x float> *%a2, align 16
277 %3 = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %1, <4 x float> %2)
280 declare <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float>, <4 x float>) nounwind readnone
282 define <2 x double> @test_hsubpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
283 ; GENERIC-LABEL: test_hsubpd:
285 ; GENERIC-NEXT: hsubpd %xmm1, %xmm0 # sched: [5:2.00]
286 ; GENERIC-NEXT: hsubpd (%rdi), %xmm0 # sched: [11:2.00]
287 ; GENERIC-NEXT: retq # sched: [1:1.00]
289 ; ATOM-LABEL: test_hsubpd:
291 ; ATOM-NEXT: hsubpd %xmm1, %xmm0 # sched: [8:4.00]
292 ; ATOM-NEXT: hsubpd (%rdi), %xmm0 # sched: [9:4.50]
293 ; ATOM-NEXT: retq # sched: [79:39.50]
295 ; SLM-LABEL: test_hsubpd:
297 ; SLM-NEXT: hsubpd %xmm1, %xmm0 # sched: [3:1.00]
298 ; SLM-NEXT: hsubpd (%rdi), %xmm0 # sched: [6:1.00]
299 ; SLM-NEXT: retq # sched: [4:1.00]
301 ; SANDY-LABEL: test_hsubpd:
303 ; SANDY-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
304 ; SANDY-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
305 ; SANDY-NEXT: retq # sched: [1:1.00]
307 ; HASWELL-LABEL: test_hsubpd:
309 ; HASWELL-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
310 ; HASWELL-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [5:2.00]
311 ; HASWELL-NEXT: retq # sched: [2:1.00]
313 ; BROADWELL-LABEL: test_hsubpd:
315 ; BROADWELL-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
316 ; BROADWELL-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [10:2.00]
317 ; BROADWELL-NEXT: retq # sched: [7:1.00]
319 ; SKYLAKE-LABEL: test_hsubpd:
321 ; SKYLAKE-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [6:2.00]
322 ; SKYLAKE-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [12:2.00]
323 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
325 ; SKX-LABEL: test_hsubpd:
327 ; SKX-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [6:2.00]
328 ; SKX-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [12:2.00]
329 ; SKX-NEXT: retq # sched: [7:1.00]
331 ; BTVER2-LABEL: test_hsubpd:
333 ; BTVER2-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
334 ; BTVER2-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
335 ; BTVER2-NEXT: retq # sched: [4:1.00]
337 ; ZNVER1-LABEL: test_hsubpd:
339 ; ZNVER1-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [100:?]
340 ; ZNVER1-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [100:?]
341 ; ZNVER1-NEXT: retq # sched: [1:0.50]
342 %1 = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %a0, <2 x double> %a1)
343 %2 = load <2 x double>, <2 x double> *%a2, align 16
344 %3 = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %1, <2 x double> %2)
347 declare <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double>, <2 x double>) nounwind readnone
349 define <4 x float> @test_hsubps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
350 ; GENERIC-LABEL: test_hsubps:
352 ; GENERIC-NEXT: hsubps %xmm1, %xmm0 # sched: [5:2.00]
353 ; GENERIC-NEXT: hsubps (%rdi), %xmm0 # sched: [11:2.00]
354 ; GENERIC-NEXT: retq # sched: [1:1.00]
356 ; ATOM-LABEL: test_hsubps:
358 ; ATOM-NEXT: hsubps %xmm1, %xmm0 # sched: [8:4.00]
359 ; ATOM-NEXT: hsubps (%rdi), %xmm0 # sched: [9:4.50]
360 ; ATOM-NEXT: retq # sched: [79:39.50]
362 ; SLM-LABEL: test_hsubps:
364 ; SLM-NEXT: hsubps %xmm1, %xmm0 # sched: [3:1.00]
365 ; SLM-NEXT: hsubps (%rdi), %xmm0 # sched: [6:1.00]
366 ; SLM-NEXT: retq # sched: [4:1.00]
368 ; SANDY-LABEL: test_hsubps:
370 ; SANDY-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
371 ; SANDY-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
372 ; SANDY-NEXT: retq # sched: [1:1.00]
374 ; HASWELL-LABEL: test_hsubps:
376 ; HASWELL-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
377 ; HASWELL-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [5:2.00]
378 ; HASWELL-NEXT: retq # sched: [2:1.00]
380 ; BROADWELL-LABEL: test_hsubps:
382 ; BROADWELL-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
383 ; BROADWELL-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [10:2.00]
384 ; BROADWELL-NEXT: retq # sched: [7:1.00]
386 ; SKYLAKE-LABEL: test_hsubps:
388 ; SKYLAKE-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [6:2.00]
389 ; SKYLAKE-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [12:2.00]
390 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
392 ; SKX-LABEL: test_hsubps:
394 ; SKX-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [6:2.00]
395 ; SKX-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [12:2.00]
396 ; SKX-NEXT: retq # sched: [7:1.00]
398 ; BTVER2-LABEL: test_hsubps:
400 ; BTVER2-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
401 ; BTVER2-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
402 ; BTVER2-NEXT: retq # sched: [4:1.00]
404 ; ZNVER1-LABEL: test_hsubps:
406 ; ZNVER1-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [100:?]
407 ; ZNVER1-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [100:?]
408 ; ZNVER1-NEXT: retq # sched: [1:0.50]
409 %1 = call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %a0, <4 x float> %a1)
410 %2 = load <4 x float>, <4 x float> *%a2, align 16
411 %3 = call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %1, <4 x float> %2)
414 declare <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float>, <4 x float>) nounwind readnone
416 define <16 x i8> @test_lddqu(i8* %a0) {
417 ; GENERIC-LABEL: test_lddqu:
419 ; GENERIC-NEXT: lddqu (%rdi), %xmm0 # sched: [6:0.50]
420 ; GENERIC-NEXT: retq # sched: [1:1.00]
422 ; ATOM-LABEL: test_lddqu:
424 ; ATOM-NEXT: lddqu (%rdi), %xmm0 # sched: [3:1.50]
425 ; ATOM-NEXT: nop # sched: [1:0.50]
426 ; ATOM-NEXT: nop # sched: [1:0.50]
427 ; ATOM-NEXT: retq # sched: [79:39.50]
429 ; SLM-LABEL: test_lddqu:
431 ; SLM-NEXT: lddqu (%rdi), %xmm0 # sched: [3:1.00]
432 ; SLM-NEXT: retq # sched: [4:1.00]
434 ; SANDY-LABEL: test_lddqu:
436 ; SANDY-NEXT: vlddqu (%rdi), %xmm0 # sched: [6:0.50]
437 ; SANDY-NEXT: retq # sched: [1:1.00]
439 ; HASWELL-LABEL: test_lddqu:
441 ; HASWELL-NEXT: vlddqu (%rdi), %xmm0 # sched: [1:0.50]
442 ; HASWELL-NEXT: retq # sched: [2:1.00]
444 ; BROADWELL-LABEL: test_lddqu:
446 ; BROADWELL-NEXT: vlddqu (%rdi), %xmm0 # sched: [5:0.50]
447 ; BROADWELL-NEXT: retq # sched: [7:1.00]
449 ; SKYLAKE-LABEL: test_lddqu:
451 ; SKYLAKE-NEXT: vlddqu (%rdi), %xmm0 # sched: [6:0.50]
452 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
454 ; SKX-LABEL: test_lddqu:
456 ; SKX-NEXT: vlddqu (%rdi), %xmm0 # sched: [6:0.50]
457 ; SKX-NEXT: retq # sched: [7:1.00]
459 ; BTVER2-LABEL: test_lddqu:
461 ; BTVER2-NEXT: vlddqu (%rdi), %xmm0 # sched: [5:1.00]
462 ; BTVER2-NEXT: retq # sched: [4:1.00]
464 ; ZNVER1-LABEL: test_lddqu:
466 ; ZNVER1-NEXT: vlddqu (%rdi), %xmm0 # sched: [8:0.50]
467 ; ZNVER1-NEXT: retq # sched: [1:0.50]
468 %1 = call <16 x i8> @llvm.x86.sse3.ldu.dq(i8* %a0)
471 declare <16 x i8> @llvm.x86.sse3.ldu.dq(i8*) nounwind readonly
473 define void @test_monitor(i8* %a0, i32 %a1, i32 %a2) {
474 ; GENERIC-LABEL: test_monitor:
476 ; GENERIC-NEXT: leaq (%rdi), %rax # sched: [1:0.50]
477 ; GENERIC-NEXT: movl %esi, %ecx # sched: [1:0.33]
478 ; GENERIC-NEXT: monitor # sched: [100:0.33]
479 ; GENERIC-NEXT: retq # sched: [1:1.00]
481 ; ATOM-LABEL: test_monitor:
483 ; ATOM-NEXT: leaq (%rdi), %rax # sched: [1:1.00]
484 ; ATOM-NEXT: movl %esi, %ecx # sched: [1:0.50]
485 ; ATOM-NEXT: monitor # sched: [45:22.50]
486 ; ATOM-NEXT: retq # sched: [79:39.50]
488 ; SLM-LABEL: test_monitor:
490 ; SLM-NEXT: leaq (%rdi), %rax # sched: [1:1.00]
491 ; SLM-NEXT: movl %esi, %ecx # sched: [1:0.50]
492 ; SLM-NEXT: monitor # sched: [100:1.00]
493 ; SLM-NEXT: retq # sched: [4:1.00]
495 ; SANDY-LABEL: test_monitor:
497 ; SANDY-NEXT: leaq (%rdi), %rax # sched: [1:0.50]
498 ; SANDY-NEXT: movl %esi, %ecx # sched: [1:0.33]
499 ; SANDY-NEXT: monitor # sched: [100:0.33]
500 ; SANDY-NEXT: retq # sched: [1:1.00]
502 ; HASWELL-LABEL: test_monitor:
504 ; HASWELL-NEXT: leaq (%rdi), %rax # sched: [1:0.50]
505 ; HASWELL-NEXT: movl %esi, %ecx # sched: [1:0.25]
506 ; HASWELL-NEXT: monitor # sched: [100:0.25]
507 ; HASWELL-NEXT: retq # sched: [2:1.00]
509 ; BROADWELL-LABEL: test_monitor:
511 ; BROADWELL-NEXT: leaq (%rdi), %rax # sched: [1:0.50]
512 ; BROADWELL-NEXT: movl %esi, %ecx # sched: [1:0.25]
513 ; BROADWELL-NEXT: monitor # sched: [100:0.25]
514 ; BROADWELL-NEXT: retq # sched: [7:1.00]
516 ; SKYLAKE-LABEL: test_monitor:
518 ; SKYLAKE-NEXT: leaq (%rdi), %rax # sched: [1:0.50]
519 ; SKYLAKE-NEXT: movl %esi, %ecx # sched: [1:0.25]
520 ; SKYLAKE-NEXT: monitor # sched: [100:0.25]
521 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
523 ; SKX-LABEL: test_monitor:
525 ; SKX-NEXT: leaq (%rdi), %rax # sched: [1:0.50]
526 ; SKX-NEXT: movl %esi, %ecx # sched: [1:0.25]
527 ; SKX-NEXT: monitor # sched: [100:0.25]
528 ; SKX-NEXT: retq # sched: [7:1.00]
530 ; BTVER2-LABEL: test_monitor:
532 ; BTVER2-NEXT: leaq (%rdi), %rax # sched: [1:0.50]
533 ; BTVER2-NEXT: movl %esi, %ecx # sched: [1:0.17]
534 ; BTVER2-NEXT: monitor # sched: [100:0.17]
535 ; BTVER2-NEXT: retq # sched: [4:1.00]
537 ; ZNVER1-LABEL: test_monitor:
539 ; ZNVER1-NEXT: leaq (%rdi), %rax # sched: [1:0.25]
540 ; ZNVER1-NEXT: movl %esi, %ecx # sched: [1:0.25]
541 ; ZNVER1-NEXT: monitor # sched: [100:?]
542 ; ZNVER1-NEXT: retq # sched: [1:0.50]
543 tail call void @llvm.x86.sse3.monitor(i8* %a0, i32 %a1, i32 %a2)
546 declare void @llvm.x86.sse3.monitor(i8*, i32, i32)
548 define <2 x double> @test_movddup(<2 x double> %a0, <2 x double> *%a1) {
549 ; GENERIC-LABEL: test_movddup:
551 ; GENERIC-NEXT: movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:1.00]
552 ; GENERIC-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] sched: [6:0.50]
553 ; GENERIC-NEXT: subpd %xmm1, %xmm0 # sched: [3:1.00]
554 ; GENERIC-NEXT: retq # sched: [1:1.00]
556 ; ATOM-LABEL: test_movddup:
558 ; ATOM-NEXT: movddup {{.*#+}} xmm1 = mem[0,0] sched: [1:1.00]
559 ; ATOM-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00]
560 ; ATOM-NEXT: subpd %xmm0, %xmm1 # sched: [6:3.00]
561 ; ATOM-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.50]
562 ; ATOM-NEXT: retq # sched: [79:39.50]
564 ; SLM-LABEL: test_movddup:
566 ; SLM-NEXT: movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:1.00]
567 ; SLM-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] sched: [3:1.00]
568 ; SLM-NEXT: subpd %xmm1, %xmm0 # sched: [3:1.00]
569 ; SLM-NEXT: retq # sched: [4:1.00]
571 ; SANDY-LABEL: test_movddup:
573 ; SANDY-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00]
574 ; SANDY-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [6:0.50]
575 ; SANDY-NEXT: vsubpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
576 ; SANDY-NEXT: retq # sched: [1:1.00]
578 ; HASWELL-LABEL: test_movddup:
580 ; HASWELL-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00]
581 ; HASWELL-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [1:0.50]
582 ; HASWELL-NEXT: vsubpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
583 ; HASWELL-NEXT: retq # sched: [2:1.00]
585 ; BROADWELL-LABEL: test_movddup:
587 ; BROADWELL-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00]
588 ; BROADWELL-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [5:0.50]
589 ; BROADWELL-NEXT: vsubpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
590 ; BROADWELL-NEXT: retq # sched: [7:1.00]
592 ; SKYLAKE-LABEL: test_movddup:
594 ; SKYLAKE-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00]
595 ; SKYLAKE-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [5:0.50]
596 ; SKYLAKE-NEXT: vsubpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
597 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
599 ; SKX-LABEL: test_movddup:
601 ; SKX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00]
602 ; SKX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [5:0.50]
603 ; SKX-NEXT: vsubpd %xmm0, %xmm1, %xmm0 # sched: [4:0.33]
604 ; SKX-NEXT: retq # sched: [7:1.00]
606 ; BTVER2-LABEL: test_movddup:
608 ; BTVER2-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [5:1.00]
609 ; BTVER2-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:0.50]
610 ; BTVER2-NEXT: vsubpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
611 ; BTVER2-NEXT: retq # sched: [4:1.00]
613 ; ZNVER1-LABEL: test_movddup:
615 ; ZNVER1-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [8:0.50]
616 ; ZNVER1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:0.50]
617 ; ZNVER1-NEXT: vsubpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
618 ; ZNVER1-NEXT: retq # sched: [1:0.50]
619 %1 = shufflevector <2 x double> %a0, <2 x double> undef, <2 x i32> zeroinitializer
620 %2 = load <2 x double>, <2 x double> *%a1, align 16
621 %3 = shufflevector <2 x double> %2, <2 x double> undef, <2 x i32> zeroinitializer
622 %4 = fsub <2 x double> %3, %1 ; Use fsub to stop the movddup from being folded as a broadcast load in avx512vl.
626 define <4 x float> @test_movshdup(<4 x float> %a0, <4 x float> *%a1) {
627 ; GENERIC-LABEL: test_movshdup:
629 ; GENERIC-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:1.00]
630 ; GENERIC-NEXT: movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [6:0.50]
631 ; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
632 ; GENERIC-NEXT: retq # sched: [1:1.00]
634 ; ATOM-LABEL: test_movshdup:
636 ; ATOM-NEXT: movshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [1:1.00]
637 ; ATOM-NEXT: movshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00]
638 ; ATOM-NEXT: addps %xmm0, %xmm1 # sched: [5:5.00]
639 ; ATOM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50]
640 ; ATOM-NEXT: retq # sched: [79:39.50]
642 ; SLM-LABEL: test_movshdup:
644 ; SLM-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:1.00]
645 ; SLM-NEXT: movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [3:1.00]
646 ; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
647 ; SLM-NEXT: retq # sched: [4:1.00]
649 ; SANDY-LABEL: test_movshdup:
651 ; SANDY-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00]
652 ; SANDY-NEXT: vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [6:0.50]
653 ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
654 ; SANDY-NEXT: retq # sched: [1:1.00]
656 ; HASWELL-LABEL: test_movshdup:
658 ; HASWELL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00]
659 ; HASWELL-NEXT: vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [1:0.50]
660 ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
661 ; HASWELL-NEXT: retq # sched: [2:1.00]
663 ; BROADWELL-LABEL: test_movshdup:
665 ; BROADWELL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00]
666 ; BROADWELL-NEXT: vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [5:0.50]
667 ; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
668 ; BROADWELL-NEXT: retq # sched: [7:1.00]
670 ; SKYLAKE-LABEL: test_movshdup:
672 ; SKYLAKE-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00]
673 ; SKYLAKE-NEXT: vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [6:0.50]
674 ; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
675 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
677 ; SKX-LABEL: test_movshdup:
679 ; SKX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00]
680 ; SKX-NEXT: vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [6:0.50]
681 ; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
682 ; SKX-NEXT: retq # sched: [7:1.00]
684 ; BTVER2-LABEL: test_movshdup:
686 ; BTVER2-NEXT: vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [5:1.00]
687 ; BTVER2-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:0.50]
688 ; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
689 ; BTVER2-NEXT: retq # sched: [4:1.00]
691 ; ZNVER1-LABEL: test_movshdup:
693 ; ZNVER1-NEXT: vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [8:0.50]
694 ; ZNVER1-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:0.50]
695 ; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
696 ; ZNVER1-NEXT: retq # sched: [1:0.50]
697 %1 = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
698 %2 = load <4 x float>, <4 x float> *%a1, align 16
699 %3 = shufflevector <4 x float> %2, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
700 %4 = fadd <4 x float> %1, %3
704 define <4 x float> @test_movsldup(<4 x float> %a0, <4 x float> *%a1) {
705 ; GENERIC-LABEL: test_movsldup:
707 ; GENERIC-NEXT: movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:1.00]
708 ; GENERIC-NEXT: movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [6:0.50]
709 ; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
710 ; GENERIC-NEXT: retq # sched: [1:1.00]
712 ; ATOM-LABEL: test_movsldup:
714 ; ATOM-NEXT: movsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [1:1.00]
715 ; ATOM-NEXT: movsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00]
716 ; ATOM-NEXT: addps %xmm0, %xmm1 # sched: [5:5.00]
717 ; ATOM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50]
718 ; ATOM-NEXT: retq # sched: [79:39.50]
720 ; SLM-LABEL: test_movsldup:
722 ; SLM-NEXT: movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:1.00]
723 ; SLM-NEXT: movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [3:1.00]
724 ; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
725 ; SLM-NEXT: retq # sched: [4:1.00]
727 ; SANDY-LABEL: test_movsldup:
729 ; SANDY-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00]
730 ; SANDY-NEXT: vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [6:0.50]
731 ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
732 ; SANDY-NEXT: retq # sched: [1:1.00]
734 ; HASWELL-LABEL: test_movsldup:
736 ; HASWELL-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00]
737 ; HASWELL-NEXT: vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [1:0.50]
738 ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
739 ; HASWELL-NEXT: retq # sched: [2:1.00]
741 ; BROADWELL-LABEL: test_movsldup:
743 ; BROADWELL-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00]
744 ; BROADWELL-NEXT: vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [5:0.50]
745 ; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
746 ; BROADWELL-NEXT: retq # sched: [7:1.00]
748 ; SKYLAKE-LABEL: test_movsldup:
750 ; SKYLAKE-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00]
751 ; SKYLAKE-NEXT: vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [6:0.50]
752 ; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
753 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
755 ; SKX-LABEL: test_movsldup:
757 ; SKX-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00]
758 ; SKX-NEXT: vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [6:0.50]
759 ; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
760 ; SKX-NEXT: retq # sched: [7:1.00]
762 ; BTVER2-LABEL: test_movsldup:
764 ; BTVER2-NEXT: vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [5:1.00]
765 ; BTVER2-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:0.50]
766 ; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
767 ; BTVER2-NEXT: retq # sched: [4:1.00]
769 ; ZNVER1-LABEL: test_movsldup:
771 ; ZNVER1-NEXT: vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [8:0.50]
772 ; ZNVER1-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:0.50]
773 ; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
774 ; ZNVER1-NEXT: retq # sched: [1:0.50]
775 %1 = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
776 %2 = load <4 x float>, <4 x float> *%a1, align 16
777 %3 = shufflevector <4 x float> %2, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
778 %4 = fadd <4 x float> %1, %3
782 define void @test_mwait(i32 %a0, i32 %a1) {
783 ; GENERIC-LABEL: test_mwait:
785 ; GENERIC-NEXT: movl %edi, %ecx # sched: [1:0.33]
786 ; GENERIC-NEXT: movl %esi, %eax # sched: [1:0.33]
787 ; GENERIC-NEXT: mwait # sched: [100:0.33]
788 ; GENERIC-NEXT: retq # sched: [1:1.00]
790 ; ATOM-LABEL: test_mwait:
792 ; ATOM-NEXT: movl %edi, %ecx # sched: [1:0.50]
793 ; ATOM-NEXT: movl %esi, %eax # sched: [1:0.50]
794 ; ATOM-NEXT: mwait # sched: [46:23.00]
795 ; ATOM-NEXT: retq # sched: [79:39.50]
797 ; SLM-LABEL: test_mwait:
799 ; SLM-NEXT: movl %edi, %ecx # sched: [1:0.50]
800 ; SLM-NEXT: movl %esi, %eax # sched: [1:0.50]
801 ; SLM-NEXT: mwait # sched: [100:1.00]
802 ; SLM-NEXT: retq # sched: [4:1.00]
804 ; SANDY-LABEL: test_mwait:
806 ; SANDY-NEXT: movl %edi, %ecx # sched: [1:0.33]
807 ; SANDY-NEXT: movl %esi, %eax # sched: [1:0.33]
808 ; SANDY-NEXT: mwait # sched: [100:0.33]
809 ; SANDY-NEXT: retq # sched: [1:1.00]
811 ; HASWELL-LABEL: test_mwait:
813 ; HASWELL-NEXT: movl %edi, %ecx # sched: [1:0.25]
814 ; HASWELL-NEXT: movl %esi, %eax # sched: [1:0.25]
815 ; HASWELL-NEXT: mwait # sched: [20:2.50]
816 ; HASWELL-NEXT: retq # sched: [2:1.00]
818 ; BROADWELL-LABEL: test_mwait:
820 ; BROADWELL-NEXT: movl %edi, %ecx # sched: [1:0.25]
821 ; BROADWELL-NEXT: movl %esi, %eax # sched: [1:0.25]
822 ; BROADWELL-NEXT: mwait # sched: [100:0.25]
823 ; BROADWELL-NEXT: retq # sched: [7:1.00]
825 ; SKYLAKE-LABEL: test_mwait:
827 ; SKYLAKE-NEXT: movl %edi, %ecx # sched: [1:0.25]
828 ; SKYLAKE-NEXT: movl %esi, %eax # sched: [1:0.25]
829 ; SKYLAKE-NEXT: mwait # sched: [20:2.50]
830 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
832 ; SKX-LABEL: test_mwait:
834 ; SKX-NEXT: movl %edi, %ecx # sched: [1:0.25]
835 ; SKX-NEXT: movl %esi, %eax # sched: [1:0.25]
836 ; SKX-NEXT: mwait # sched: [20:2.50]
837 ; SKX-NEXT: retq # sched: [7:1.00]
839 ; BTVER2-LABEL: test_mwait:
841 ; BTVER2-NEXT: movl %edi, %ecx # sched: [1:0.17]
842 ; BTVER2-NEXT: movl %esi, %eax # sched: [1:0.17]
843 ; BTVER2-NEXT: mwait # sched: [100:0.17]
844 ; BTVER2-NEXT: retq # sched: [4:1.00]
846 ; ZNVER1-LABEL: test_mwait:
848 ; ZNVER1-NEXT: movl %edi, %ecx # sched: [1:0.25]
849 ; ZNVER1-NEXT: movl %esi, %eax # sched: [1:0.25]
850 ; ZNVER1-NEXT: mwait # sched: [100:?]
851 ; ZNVER1-NEXT: retq # sched: [1:0.50]
852 tail call void @llvm.x86.sse3.mwait(i32 %a0, i32 %a1)
855 declare void @llvm.x86.sse3.mwait(i32, i32)