1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=atom | FileCheck %s --check-prefix=CHECK --check-prefix=ATOM
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm | FileCheck %s --check-prefix=CHECK --check-prefix=SLM
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL
9 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE
10 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
11 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
12 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
14 define <4 x float> @test_addps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
15 ; GENERIC-LABEL: test_addps:
17 ; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
18 ; GENERIC-NEXT: addps (%rdi), %xmm0 # sched: [9:1.00]
19 ; GENERIC-NEXT: retq # sched: [1:1.00]
21 ; ATOM-LABEL: test_addps:
23 ; ATOM-NEXT: addps %xmm1, %xmm0 # sched: [5:5.00]
24 ; ATOM-NEXT: addps (%rdi), %xmm0 # sched: [5:5.00]
25 ; ATOM-NEXT: retq # sched: [79:39.50]
27 ; SLM-LABEL: test_addps:
29 ; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
30 ; SLM-NEXT: addps (%rdi), %xmm0 # sched: [6:1.00]
31 ; SLM-NEXT: retq # sched: [4:1.00]
33 ; SANDY-LABEL: test_addps:
35 ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
36 ; SANDY-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
37 ; SANDY-NEXT: retq # sched: [1:1.00]
39 ; HASWELL-LABEL: test_addps:
41 ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
42 ; HASWELL-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
43 ; HASWELL-NEXT: retq # sched: [2:1.00]
45 ; BROADWELL-LABEL: test_addps:
47 ; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
48 ; BROADWELL-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
49 ; BROADWELL-NEXT: retq # sched: [7:1.00]
51 ; SKYLAKE-LABEL: test_addps:
53 ; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
54 ; SKYLAKE-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
55 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
57 ; SKX-LABEL: test_addps:
59 ; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
60 ; SKX-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
61 ; SKX-NEXT: retq # sched: [7:1.00]
63 ; BTVER2-LABEL: test_addps:
65 ; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
66 ; BTVER2-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
67 ; BTVER2-NEXT: retq # sched: [4:1.00]
69 ; ZNVER1-LABEL: test_addps:
71 ; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
72 ; ZNVER1-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
73 ; ZNVER1-NEXT: retq # sched: [1:0.50]
74 %1 = fadd <4 x float> %a0, %a1
75 %2 = load <4 x float>, <4 x float> *%a2, align 16
76 %3 = fadd <4 x float> %1, %2
80 define float @test_addss(float %a0, float %a1, float *%a2) {
81 ; GENERIC-LABEL: test_addss:
83 ; GENERIC-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
84 ; GENERIC-NEXT: addss (%rdi), %xmm0 # sched: [9:1.00]
85 ; GENERIC-NEXT: retq # sched: [1:1.00]
87 ; ATOM-LABEL: test_addss:
89 ; ATOM-NEXT: addss %xmm1, %xmm0 # sched: [5:5.00]
90 ; ATOM-NEXT: addss (%rdi), %xmm0 # sched: [5:5.00]
91 ; ATOM-NEXT: retq # sched: [79:39.50]
93 ; SLM-LABEL: test_addss:
95 ; SLM-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
96 ; SLM-NEXT: addss (%rdi), %xmm0 # sched: [6:1.00]
97 ; SLM-NEXT: retq # sched: [4:1.00]
99 ; SANDY-LABEL: test_addss:
101 ; SANDY-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
102 ; SANDY-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
103 ; SANDY-NEXT: retq # sched: [1:1.00]
105 ; HASWELL-LABEL: test_addss:
107 ; HASWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
108 ; HASWELL-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
109 ; HASWELL-NEXT: retq # sched: [2:1.00]
111 ; BROADWELL-LABEL: test_addss:
113 ; BROADWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
114 ; BROADWELL-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
115 ; BROADWELL-NEXT: retq # sched: [7:1.00]
117 ; SKYLAKE-LABEL: test_addss:
119 ; SKYLAKE-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
120 ; SKYLAKE-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
121 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
123 ; SKX-LABEL: test_addss:
125 ; SKX-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
126 ; SKX-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
127 ; SKX-NEXT: retq # sched: [7:1.00]
129 ; BTVER2-LABEL: test_addss:
131 ; BTVER2-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
132 ; BTVER2-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
133 ; BTVER2-NEXT: retq # sched: [4:1.00]
135 ; ZNVER1-LABEL: test_addss:
137 ; ZNVER1-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
138 ; ZNVER1-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
139 ; ZNVER1-NEXT: retq # sched: [1:0.50]
140 %1 = fadd float %a0, %a1
141 %2 = load float, float *%a2, align 4
142 %3 = fadd float %1, %2
146 define <4 x float> @test_andps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
147 ; GENERIC-LABEL: test_andps:
149 ; GENERIC-NEXT: andps %xmm1, %xmm0 # sched: [1:1.00]
150 ; GENERIC-NEXT: andps (%rdi), %xmm0 # sched: [7:1.00]
151 ; GENERIC-NEXT: retq # sched: [1:1.00]
153 ; ATOM-LABEL: test_andps:
155 ; ATOM-NEXT: andps %xmm1, %xmm0 # sched: [1:0.50]
156 ; ATOM-NEXT: andps (%rdi), %xmm0 # sched: [1:1.00]
157 ; ATOM-NEXT: nop # sched: [1:0.50]
158 ; ATOM-NEXT: nop # sched: [1:0.50]
159 ; ATOM-NEXT: nop # sched: [1:0.50]
160 ; ATOM-NEXT: nop # sched: [1:0.50]
161 ; ATOM-NEXT: retq # sched: [79:39.50]
163 ; SLM-LABEL: test_andps:
165 ; SLM-NEXT: andps %xmm1, %xmm0 # sched: [1:0.50]
166 ; SLM-NEXT: andps (%rdi), %xmm0 # sched: [4:1.00]
167 ; SLM-NEXT: retq # sched: [4:1.00]
169 ; SANDY-LABEL: test_andps:
171 ; SANDY-NEXT: vandps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
172 ; SANDY-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
173 ; SANDY-NEXT: retq # sched: [1:1.00]
175 ; HASWELL-LABEL: test_andps:
177 ; HASWELL-NEXT: vandps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
178 ; HASWELL-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [1:1.00]
179 ; HASWELL-NEXT: retq # sched: [2:1.00]
181 ; BROADWELL-LABEL: test_andps:
183 ; BROADWELL-NEXT: vandps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
184 ; BROADWELL-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
185 ; BROADWELL-NEXT: retq # sched: [7:1.00]
187 ; SKYLAKE-LABEL: test_andps:
189 ; SKYLAKE-NEXT: vandps %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
190 ; SKYLAKE-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
191 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
193 ; SKX-LABEL: test_andps:
195 ; SKX-NEXT: vandps %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
196 ; SKX-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
197 ; SKX-NEXT: retq # sched: [7:1.00]
199 ; BTVER2-LABEL: test_andps:
201 ; BTVER2-NEXT: vandps %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
202 ; BTVER2-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
203 ; BTVER2-NEXT: retq # sched: [4:1.00]
205 ; ZNVER1-LABEL: test_andps:
207 ; ZNVER1-NEXT: vandps %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
208 ; ZNVER1-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
209 ; ZNVER1-NEXT: retq # sched: [1:0.50]
210 %1 = bitcast <4 x float> %a0 to <4 x i32>
211 %2 = bitcast <4 x float> %a1 to <4 x i32>
212 %3 = and <4 x i32> %1, %2
213 %4 = load <4 x float>, <4 x float> *%a2, align 16
214 %5 = bitcast <4 x float> %4 to <4 x i32>
215 %6 = and <4 x i32> %3, %5
216 %7 = bitcast <4 x i32> %6 to <4 x float>
220 define <4 x float> @test_andnotps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
221 ; GENERIC-LABEL: test_andnotps:
223 ; GENERIC-NEXT: andnps %xmm1, %xmm0 # sched: [1:1.00]
224 ; GENERIC-NEXT: andnps (%rdi), %xmm0 # sched: [7:1.00]
225 ; GENERIC-NEXT: retq # sched: [1:1.00]
227 ; ATOM-LABEL: test_andnotps:
229 ; ATOM-NEXT: andnps %xmm1, %xmm0 # sched: [1:0.50]
230 ; ATOM-NEXT: andnps (%rdi), %xmm0 # sched: [1:1.00]
231 ; ATOM-NEXT: nop # sched: [1:0.50]
232 ; ATOM-NEXT: nop # sched: [1:0.50]
233 ; ATOM-NEXT: nop # sched: [1:0.50]
234 ; ATOM-NEXT: nop # sched: [1:0.50]
235 ; ATOM-NEXT: retq # sched: [79:39.50]
237 ; SLM-LABEL: test_andnotps:
239 ; SLM-NEXT: andnps %xmm1, %xmm0 # sched: [1:0.50]
240 ; SLM-NEXT: andnps (%rdi), %xmm0 # sched: [4:1.00]
241 ; SLM-NEXT: retq # sched: [4:1.00]
243 ; SANDY-LABEL: test_andnotps:
245 ; SANDY-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
246 ; SANDY-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
247 ; SANDY-NEXT: retq # sched: [1:1.00]
249 ; HASWELL-LABEL: test_andnotps:
251 ; HASWELL-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
252 ; HASWELL-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [1:1.00]
253 ; HASWELL-NEXT: retq # sched: [2:1.00]
255 ; BROADWELL-LABEL: test_andnotps:
257 ; BROADWELL-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
258 ; BROADWELL-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
259 ; BROADWELL-NEXT: retq # sched: [7:1.00]
261 ; SKYLAKE-LABEL: test_andnotps:
263 ; SKYLAKE-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
264 ; SKYLAKE-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
265 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
267 ; SKX-LABEL: test_andnotps:
269 ; SKX-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
270 ; SKX-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
271 ; SKX-NEXT: retq # sched: [7:1.00]
273 ; BTVER2-LABEL: test_andnotps:
275 ; BTVER2-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
276 ; BTVER2-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
277 ; BTVER2-NEXT: retq # sched: [4:1.00]
279 ; ZNVER1-LABEL: test_andnotps:
281 ; ZNVER1-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
282 ; ZNVER1-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
283 ; ZNVER1-NEXT: retq # sched: [1:0.50]
284 %1 = bitcast <4 x float> %a0 to <4 x i32>
285 %2 = bitcast <4 x float> %a1 to <4 x i32>
286 %3 = xor <4 x i32> %1, <i32 -1, i32 -1, i32 -1, i32 -1>
287 %4 = and <4 x i32> %3, %2
288 %5 = load <4 x float>, <4 x float> *%a2, align 16
289 %6 = bitcast <4 x float> %5 to <4 x i32>
290 %7 = xor <4 x i32> %4, <i32 -1, i32 -1, i32 -1, i32 -1>
291 %8 = and <4 x i32> %6, %7
292 %9 = bitcast <4 x i32> %8 to <4 x float>
296 define <4 x float> @test_cmpps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
297 ; GENERIC-LABEL: test_cmpps:
299 ; GENERIC-NEXT: cmpeqps %xmm0, %xmm1 # sched: [3:1.00]
300 ; GENERIC-NEXT: cmpeqps (%rdi), %xmm0 # sched: [9:1.00]
301 ; GENERIC-NEXT: orps %xmm1, %xmm0 # sched: [1:1.00]
302 ; GENERIC-NEXT: retq # sched: [1:1.00]
304 ; ATOM-LABEL: test_cmpps:
306 ; ATOM-NEXT: cmpeqps %xmm0, %xmm1 # sched: [5:5.00]
307 ; ATOM-NEXT: cmpeqps (%rdi), %xmm0 # sched: [5:5.00]
308 ; ATOM-NEXT: orps %xmm1, %xmm0 # sched: [1:0.50]
309 ; ATOM-NEXT: retq # sched: [79:39.50]
311 ; SLM-LABEL: test_cmpps:
313 ; SLM-NEXT: cmpeqps %xmm0, %xmm1 # sched: [3:1.00]
314 ; SLM-NEXT: cmpeqps (%rdi), %xmm0 # sched: [6:1.00]
315 ; SLM-NEXT: orps %xmm1, %xmm0 # sched: [1:0.50]
316 ; SLM-NEXT: retq # sched: [4:1.00]
318 ; SANDY-LABEL: test_cmpps:
320 ; SANDY-NEXT: vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
321 ; SANDY-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
322 ; SANDY-NEXT: vorps %xmm0, %xmm1, %xmm0 # sched: [1:1.00]
323 ; SANDY-NEXT: retq # sched: [1:1.00]
325 ; HASWELL-LABEL: test_cmpps:
327 ; HASWELL-NEXT: vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
328 ; HASWELL-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
329 ; HASWELL-NEXT: vorps %xmm0, %xmm1, %xmm0 # sched: [1:1.00]
330 ; HASWELL-NEXT: retq # sched: [2:1.00]
332 ; BROADWELL-LABEL: test_cmpps:
334 ; BROADWELL-NEXT: vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
335 ; BROADWELL-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
336 ; BROADWELL-NEXT: vorps %xmm0, %xmm1, %xmm0 # sched: [1:1.00]
337 ; BROADWELL-NEXT: retq # sched: [7:1.00]
339 ; SKYLAKE-LABEL: test_cmpps:
341 ; SKYLAKE-NEXT: vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [4:0.33]
342 ; SKYLAKE-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
343 ; SKYLAKE-NEXT: vorps %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
344 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
346 ; SKX-LABEL: test_cmpps:
348 ; SKX-NEXT: vcmpeqps %xmm1, %xmm0, %k0 # sched: [3:1.00]
349 ; SKX-NEXT: vcmpeqps (%rdi), %xmm0, %k1 # sched: [9:1.00]
350 ; SKX-NEXT: korw %k1, %k0, %k0 # sched: [1:1.00]
351 ; SKX-NEXT: vpmovm2d %k0, %xmm0
352 ; SKX-NEXT: retq # sched: [7:1.00]
354 ; BTVER2-LABEL: test_cmpps:
356 ; BTVER2-NEXT: vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
357 ; BTVER2-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
358 ; BTVER2-NEXT: vorps %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
359 ; BTVER2-NEXT: retq # sched: [4:1.00]
361 ; ZNVER1-LABEL: test_cmpps:
363 ; ZNVER1-NEXT: vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
364 ; ZNVER1-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
365 ; ZNVER1-NEXT: vorps %xmm0, %xmm1, %xmm0 # sched: [1:0.25]
366 ; ZNVER1-NEXT: retq # sched: [1:0.50]
367 %1 = fcmp oeq <4 x float> %a0, %a1
368 %2 = load <4 x float>, <4 x float> *%a2, align 16
369 %3 = fcmp oeq <4 x float> %a0, %2
370 %4 = or <4 x i1> %1, %3
371 %5 = sext <4 x i1> %4 to <4 x i32>
372 %6 = bitcast <4 x i32> %5 to <4 x float>
376 define float @test_cmpss(float %a0, float %a1, float *%a2) {
377 ; GENERIC-LABEL: test_cmpss:
379 ; GENERIC-NEXT: cmpeqss %xmm1, %xmm0 # sched: [3:1.00]
380 ; GENERIC-NEXT: cmpeqss (%rdi), %xmm0 # sched: [7:1.00]
381 ; GENERIC-NEXT: retq # sched: [1:1.00]
383 ; ATOM-LABEL: test_cmpss:
385 ; ATOM-NEXT: cmpeqss %xmm1, %xmm0 # sched: [5:5.00]
386 ; ATOM-NEXT: cmpeqss (%rdi), %xmm0 # sched: [5:5.00]
387 ; ATOM-NEXT: retq # sched: [79:39.50]
389 ; SLM-LABEL: test_cmpss:
391 ; SLM-NEXT: cmpeqss %xmm1, %xmm0 # sched: [3:1.00]
392 ; SLM-NEXT: cmpeqss (%rdi), %xmm0 # sched: [6:1.00]
393 ; SLM-NEXT: retq # sched: [4:1.00]
395 ; SANDY-LABEL: test_cmpss:
397 ; SANDY-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
398 ; SANDY-NEXT: vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
399 ; SANDY-NEXT: retq # sched: [1:1.00]
401 ; HASWELL-LABEL: test_cmpss:
403 ; HASWELL-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
404 ; HASWELL-NEXT: vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
405 ; HASWELL-NEXT: retq # sched: [2:1.00]
407 ; BROADWELL-LABEL: test_cmpss:
409 ; BROADWELL-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
410 ; BROADWELL-NEXT: vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
411 ; BROADWELL-NEXT: retq # sched: [7:1.00]
413 ; SKYLAKE-LABEL: test_cmpss:
415 ; SKYLAKE-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
416 ; SKYLAKE-NEXT: vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
417 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
419 ; SKX-LABEL: test_cmpss:
421 ; SKX-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
422 ; SKX-NEXT: vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
423 ; SKX-NEXT: retq # sched: [7:1.00]
425 ; BTVER2-LABEL: test_cmpss:
427 ; BTVER2-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
428 ; BTVER2-NEXT: vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
429 ; BTVER2-NEXT: retq # sched: [4:1.00]
431 ; ZNVER1-LABEL: test_cmpss:
433 ; ZNVER1-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
434 ; ZNVER1-NEXT: vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
435 ; ZNVER1-NEXT: retq # sched: [1:0.50]
436 %1 = insertelement <4 x float> undef, float %a0, i32 0
437 %2 = insertelement <4 x float> undef, float %a1, i32 0
438 %3 = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %1, <4 x float> %2, i8 0)
439 %4 = load float, float *%a2, align 4
440 %5 = insertelement <4 x float> undef, float %4, i32 0
441 %6 = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %3, <4 x float> %5, i8 0)
442 %7 = extractelement <4 x float> %6, i32 0
445 declare <4 x float> @llvm.x86.sse.cmp.ss(<4 x float>, <4 x float>, i8) nounwind readnone
447 define i32 @test_comiss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
448 ; GENERIC-LABEL: test_comiss:
450 ; GENERIC-NEXT: comiss %xmm1, %xmm0 # sched: [3:1.00]
451 ; GENERIC-NEXT: setnp %al # sched: [1:0.50]
452 ; GENERIC-NEXT: sete %cl # sched: [1:0.50]
453 ; GENERIC-NEXT: andb %al, %cl # sched: [1:0.33]
454 ; GENERIC-NEXT: comiss (%rdi), %xmm0 # sched: [7:1.00]
455 ; GENERIC-NEXT: setnp %al # sched: [1:0.50]
456 ; GENERIC-NEXT: sete %dl # sched: [1:0.50]
457 ; GENERIC-NEXT: andb %al, %dl # sched: [1:0.33]
458 ; GENERIC-NEXT: orb %cl, %dl # sched: [1:0.33]
459 ; GENERIC-NEXT: movzbl %dl, %eax # sched: [1:0.33]
460 ; GENERIC-NEXT: retq # sched: [1:1.00]
462 ; ATOM-LABEL: test_comiss:
464 ; ATOM-NEXT: comiss %xmm1, %xmm0 # sched: [9:4.50]
465 ; ATOM-NEXT: setnp %al # sched: [1:0.50]
466 ; ATOM-NEXT: sete %cl # sched: [1:0.50]
467 ; ATOM-NEXT: andb %al, %cl # sched: [1:0.50]
468 ; ATOM-NEXT: comiss (%rdi), %xmm0 # sched: [10:5.00]
469 ; ATOM-NEXT: setnp %al # sched: [1:0.50]
470 ; ATOM-NEXT: sete %dl # sched: [1:0.50]
471 ; ATOM-NEXT: andb %al, %dl # sched: [1:0.50]
472 ; ATOM-NEXT: orb %cl, %dl # sched: [1:0.50]
473 ; ATOM-NEXT: movzbl %dl, %eax # sched: [1:1.00]
474 ; ATOM-NEXT: retq # sched: [79:39.50]
476 ; SLM-LABEL: test_comiss:
478 ; SLM-NEXT: comiss %xmm1, %xmm0 # sched: [3:1.00]
479 ; SLM-NEXT: setnp %al # sched: [1:0.50]
480 ; SLM-NEXT: sete %cl # sched: [1:0.50]
481 ; SLM-NEXT: andb %al, %cl # sched: [1:0.50]
482 ; SLM-NEXT: comiss (%rdi), %xmm0 # sched: [6:1.00]
483 ; SLM-NEXT: setnp %al # sched: [1:0.50]
484 ; SLM-NEXT: sete %dl # sched: [1:0.50]
485 ; SLM-NEXT: andb %al, %dl # sched: [1:0.50]
486 ; SLM-NEXT: orb %cl, %dl # sched: [1:0.50]
487 ; SLM-NEXT: movzbl %dl, %eax # sched: [1:0.50]
488 ; SLM-NEXT: retq # sched: [4:1.00]
490 ; SANDY-LABEL: test_comiss:
492 ; SANDY-NEXT: vcomiss %xmm1, %xmm0 # sched: [3:1.00]
493 ; SANDY-NEXT: setnp %al # sched: [1:0.50]
494 ; SANDY-NEXT: sete %cl # sched: [1:0.50]
495 ; SANDY-NEXT: andb %al, %cl # sched: [1:0.33]
496 ; SANDY-NEXT: vcomiss (%rdi), %xmm0 # sched: [7:1.00]
497 ; SANDY-NEXT: setnp %al # sched: [1:0.50]
498 ; SANDY-NEXT: sete %dl # sched: [1:0.50]
499 ; SANDY-NEXT: andb %al, %dl # sched: [1:0.33]
500 ; SANDY-NEXT: orb %cl, %dl # sched: [1:0.33]
501 ; SANDY-NEXT: movzbl %dl, %eax # sched: [1:0.33]
502 ; SANDY-NEXT: retq # sched: [1:1.00]
504 ; HASWELL-LABEL: test_comiss:
506 ; HASWELL-NEXT: vcomiss %xmm1, %xmm0 # sched: [3:1.00]
507 ; HASWELL-NEXT: setnp %al # sched: [1:0.50]
508 ; HASWELL-NEXT: sete %cl # sched: [1:0.50]
509 ; HASWELL-NEXT: andb %al, %cl # sched: [1:0.25]
510 ; HASWELL-NEXT: vcomiss (%rdi), %xmm0 # sched: [7:1.00]
511 ; HASWELL-NEXT: setnp %al # sched: [1:0.50]
512 ; HASWELL-NEXT: sete %dl # sched: [1:0.50]
513 ; HASWELL-NEXT: andb %al, %dl # sched: [1:0.25]
514 ; HASWELL-NEXT: orb %cl, %dl # sched: [1:0.25]
515 ; HASWELL-NEXT: movzbl %dl, %eax # sched: [1:0.25]
516 ; HASWELL-NEXT: retq # sched: [2:1.00]
518 ; BROADWELL-LABEL: test_comiss:
520 ; BROADWELL-NEXT: vcomiss %xmm1, %xmm0 # sched: [3:1.00]
521 ; BROADWELL-NEXT: setnp %al # sched: [1:0.50]
522 ; BROADWELL-NEXT: sete %cl # sched: [1:0.50]
523 ; BROADWELL-NEXT: andb %al, %cl # sched: [1:0.25]
524 ; BROADWELL-NEXT: vcomiss (%rdi), %xmm0 # sched: [8:1.00]
525 ; BROADWELL-NEXT: setnp %al # sched: [1:0.50]
526 ; BROADWELL-NEXT: sete %dl # sched: [1:0.50]
527 ; BROADWELL-NEXT: andb %al, %dl # sched: [1:0.25]
528 ; BROADWELL-NEXT: orb %cl, %dl # sched: [1:0.25]
529 ; BROADWELL-NEXT: movzbl %dl, %eax # sched: [1:0.25]
530 ; BROADWELL-NEXT: retq # sched: [7:1.00]
532 ; SKYLAKE-LABEL: test_comiss:
534 ; SKYLAKE-NEXT: vcomiss %xmm1, %xmm0 # sched: [3:1.00]
535 ; SKYLAKE-NEXT: setnp %al # sched: [1:0.50]
536 ; SKYLAKE-NEXT: sete %cl # sched: [1:0.50]
537 ; SKYLAKE-NEXT: andb %al, %cl # sched: [1:0.25]
538 ; SKYLAKE-NEXT: vcomiss (%rdi), %xmm0 # sched: [8:1.00]
539 ; SKYLAKE-NEXT: setnp %al # sched: [1:0.50]
540 ; SKYLAKE-NEXT: sete %dl # sched: [1:0.50]
541 ; SKYLAKE-NEXT: andb %al, %dl # sched: [1:0.25]
542 ; SKYLAKE-NEXT: orb %cl, %dl # sched: [1:0.25]
543 ; SKYLAKE-NEXT: movzbl %dl, %eax # sched: [1:0.25]
544 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
546 ; SKX-LABEL: test_comiss:
548 ; SKX-NEXT: vcomiss %xmm1, %xmm0 # sched: [3:1.00]
549 ; SKX-NEXT: setnp %al # sched: [1:0.50]
550 ; SKX-NEXT: sete %cl # sched: [1:0.50]
551 ; SKX-NEXT: andb %al, %cl # sched: [1:0.25]
552 ; SKX-NEXT: vcomiss (%rdi), %xmm0 # sched: [8:1.00]
553 ; SKX-NEXT: setnp %al # sched: [1:0.50]
554 ; SKX-NEXT: sete %dl # sched: [1:0.50]
555 ; SKX-NEXT: andb %al, %dl # sched: [1:0.25]
556 ; SKX-NEXT: orb %cl, %dl # sched: [1:0.25]
557 ; SKX-NEXT: movzbl %dl, %eax # sched: [1:0.25]
558 ; SKX-NEXT: retq # sched: [7:1.00]
560 ; BTVER2-LABEL: test_comiss:
562 ; BTVER2-NEXT: vcomiss %xmm1, %xmm0 # sched: [3:1.00]
563 ; BTVER2-NEXT: setnp %al # sched: [1:0.50]
564 ; BTVER2-NEXT: sete %cl # sched: [1:0.50]
565 ; BTVER2-NEXT: andb %al, %cl # sched: [1:0.50]
566 ; BTVER2-NEXT: vcomiss (%rdi), %xmm0 # sched: [8:1.00]
567 ; BTVER2-NEXT: setnp %al # sched: [1:0.50]
568 ; BTVER2-NEXT: sete %dl # sched: [1:0.50]
569 ; BTVER2-NEXT: andb %al, %dl # sched: [1:0.50]
570 ; BTVER2-NEXT: orb %cl, %dl # sched: [1:0.50]
571 ; BTVER2-NEXT: movzbl %dl, %eax # sched: [1:0.50]
572 ; BTVER2-NEXT: retq # sched: [4:1.00]
574 ; ZNVER1-LABEL: test_comiss:
576 ; ZNVER1-NEXT: vcomiss %xmm1, %xmm0 # sched: [3:1.00]
577 ; ZNVER1-NEXT: setnp %al # sched: [1:0.25]
578 ; ZNVER1-NEXT: sete %cl # sched: [1:0.25]
579 ; ZNVER1-NEXT: andb %al, %cl # sched: [1:0.25]
580 ; ZNVER1-NEXT: vcomiss (%rdi), %xmm0 # sched: [10:1.00]
581 ; ZNVER1-NEXT: setnp %al # sched: [1:0.25]
582 ; ZNVER1-NEXT: sete %dl # sched: [1:0.25]
583 ; ZNVER1-NEXT: andb %al, %dl # sched: [1:0.25]
584 ; ZNVER1-NEXT: orb %cl, %dl # sched: [1:0.25]
585 ; ZNVER1-NEXT: movzbl %dl, %eax # sched: [1:0.25]
586 ; ZNVER1-NEXT: retq # sched: [1:0.50]
587 %1 = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float> %a1)
588 %2 = load <4 x float>, <4 x float> *%a2, align 4
589 %3 = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float> %2)
593 declare i32 @llvm.x86.sse.comieq.ss(<4 x float>, <4 x float>) nounwind readnone
595 define float @test_cvtsi2ss(i32 %a0, i32 *%a1) {
596 ; GENERIC-LABEL: test_cvtsi2ss:
598 ; GENERIC-NEXT: cvtsi2ssl %edi, %xmm1 # sched: [5:2.00]
599 ; GENERIC-NEXT: cvtsi2ssl (%rsi), %xmm0 # sched: [10:1.00]
600 ; GENERIC-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
601 ; GENERIC-NEXT: retq # sched: [1:1.00]
603 ; ATOM-LABEL: test_cvtsi2ss:
605 ; ATOM-NEXT: cvtsi2ssl (%rsi), %xmm0 # sched: [7:3.50]
606 ; ATOM-NEXT: cvtsi2ssl %edi, %xmm1 # sched: [6:3.00]
607 ; ATOM-NEXT: addss %xmm1, %xmm0 # sched: [5:5.00]
608 ; ATOM-NEXT: retq # sched: [79:39.50]
610 ; SLM-LABEL: test_cvtsi2ss:
612 ; SLM-NEXT: cvtsi2ssl (%rsi), %xmm0 # sched: [7:1.00]
613 ; SLM-NEXT: cvtsi2ssl %edi, %xmm1 # sched: [4:0.50]
614 ; SLM-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
615 ; SLM-NEXT: retq # sched: [4:1.00]
617 ; SANDY-LABEL: test_cvtsi2ss:
619 ; SANDY-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [5:2.00]
620 ; SANDY-NEXT: vcvtsi2ssl (%rsi), %xmm1, %xmm1 # sched: [10:1.00]
621 ; SANDY-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
622 ; SANDY-NEXT: retq # sched: [1:1.00]
624 ; HASWELL-LABEL: test_cvtsi2ss:
626 ; HASWELL-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [4:1.00]
627 ; HASWELL-NEXT: vcvtsi2ssl (%rsi), %xmm1, %xmm1 # sched: [8:1.00]
628 ; HASWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
629 ; HASWELL-NEXT: retq # sched: [2:1.00]
631 ; BROADWELL-LABEL: test_cvtsi2ss:
633 ; BROADWELL-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [4:1.00]
634 ; BROADWELL-NEXT: vcvtsi2ssl (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
635 ; BROADWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
636 ; BROADWELL-NEXT: retq # sched: [7:1.00]
638 ; SKYLAKE-LABEL: test_cvtsi2ss:
640 ; SKYLAKE-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [5:1.00]
641 ; SKYLAKE-NEXT: vcvtsi2ssl (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
642 ; SKYLAKE-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
643 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
645 ; SKX-LABEL: test_cvtsi2ss:
647 ; SKX-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [5:1.00]
648 ; SKX-NEXT: vcvtsi2ssl (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
649 ; SKX-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
650 ; SKX-NEXT: retq # sched: [7:1.00]
652 ; BTVER2-LABEL: test_cvtsi2ss:
654 ; BTVER2-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [3:1.00]
655 ; BTVER2-NEXT: vcvtsi2ssl (%rsi), %xmm1, %xmm1 # sched: [8:1.00]
656 ; BTVER2-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
657 ; BTVER2-NEXT: retq # sched: [4:1.00]
659 ; ZNVER1-LABEL: test_cvtsi2ss:
661 ; ZNVER1-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [5:1.00]
662 ; ZNVER1-NEXT: vcvtsi2ssl (%rsi), %xmm1, %xmm1 # sched: [12:1.00]
663 ; ZNVER1-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
664 ; ZNVER1-NEXT: retq # sched: [1:0.50]
665 %1 = sitofp i32 %a0 to float
666 %2 = load i32, i32 *%a1, align 4
667 %3 = sitofp i32 %2 to float
668 %4 = fadd float %1, %3
672 define float @test_cvtsi2ssq(i64 %a0, i64 *%a1) {
673 ; GENERIC-LABEL: test_cvtsi2ssq:
675 ; GENERIC-NEXT: cvtsi2ssq %rdi, %xmm1 # sched: [5:2.00]
676 ; GENERIC-NEXT: cvtsi2ssq (%rsi), %xmm0 # sched: [10:1.00]
677 ; GENERIC-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
678 ; GENERIC-NEXT: retq # sched: [1:1.00]
680 ; ATOM-LABEL: test_cvtsi2ssq:
682 ; ATOM-NEXT: cvtsi2ssq (%rsi), %xmm0 # sched: [7:3.50]
683 ; ATOM-NEXT: cvtsi2ssq %rdi, %xmm1 # sched: [6:3.00]
684 ; ATOM-NEXT: addss %xmm1, %xmm0 # sched: [5:5.00]
685 ; ATOM-NEXT: retq # sched: [79:39.50]
687 ; SLM-LABEL: test_cvtsi2ssq:
689 ; SLM-NEXT: cvtsi2ssq (%rsi), %xmm0 # sched: [7:1.00]
690 ; SLM-NEXT: cvtsi2ssq %rdi, %xmm1 # sched: [4:0.50]
691 ; SLM-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
692 ; SLM-NEXT: retq # sched: [4:1.00]
694 ; SANDY-LABEL: test_cvtsi2ssq:
696 ; SANDY-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [5:2.00]
697 ; SANDY-NEXT: vcvtsi2ssq (%rsi), %xmm1, %xmm1 # sched: [10:1.00]
698 ; SANDY-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
699 ; SANDY-NEXT: retq # sched: [1:1.00]
701 ; HASWELL-LABEL: test_cvtsi2ssq:
703 ; HASWELL-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [5:2.00]
704 ; HASWELL-NEXT: vcvtsi2ssq (%rsi), %xmm1, %xmm1 # sched: [8:1.00]
705 ; HASWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
706 ; HASWELL-NEXT: retq # sched: [2:1.00]
708 ; BROADWELL-LABEL: test_cvtsi2ssq:
710 ; BROADWELL-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [5:2.00]
711 ; BROADWELL-NEXT: vcvtsi2ssq (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
712 ; BROADWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
713 ; BROADWELL-NEXT: retq # sched: [7:1.00]
715 ; SKYLAKE-LABEL: test_cvtsi2ssq:
717 ; SKYLAKE-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [6:2.00]
718 ; SKYLAKE-NEXT: vcvtsi2ssq (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
719 ; SKYLAKE-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
720 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
722 ; SKX-LABEL: test_cvtsi2ssq:
724 ; SKX-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [6:2.00]
725 ; SKX-NEXT: vcvtsi2ssq (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
726 ; SKX-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
727 ; SKX-NEXT: retq # sched: [7:1.00]
729 ; BTVER2-LABEL: test_cvtsi2ssq:
731 ; BTVER2-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [3:1.00]
732 ; BTVER2-NEXT: vcvtsi2ssq (%rsi), %xmm1, %xmm1 # sched: [8:1.00]
733 ; BTVER2-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
734 ; BTVER2-NEXT: retq # sched: [4:1.00]
736 ; ZNVER1-LABEL: test_cvtsi2ssq:
738 ; ZNVER1-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [5:1.00]
739 ; ZNVER1-NEXT: vcvtsi2ssq (%rsi), %xmm1, %xmm1 # sched: [12:1.00]
740 ; ZNVER1-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
741 ; ZNVER1-NEXT: retq # sched: [1:0.50]
742 %1 = sitofp i64 %a0 to float
743 %2 = load i64, i64 *%a1, align 8
744 %3 = sitofp i64 %2 to float
745 %4 = fadd float %1, %3
749 define i32 @test_cvtss2si(float %a0, float *%a1) {
750 ; GENERIC-LABEL: test_cvtss2si:
752 ; GENERIC-NEXT: cvtss2si %xmm0, %ecx # sched: [5:1.00]
753 ; GENERIC-NEXT: cvtss2si (%rdi), %eax # sched: [9:1.00]
754 ; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33]
755 ; GENERIC-NEXT: retq # sched: [1:1.00]
757 ; ATOM-LABEL: test_cvtss2si:
759 ; ATOM-NEXT: cvtss2si (%rdi), %eax # sched: [9:4.50]
760 ; ATOM-NEXT: cvtss2si %xmm0, %ecx # sched: [8:4.00]
761 ; ATOM-NEXT: addl %ecx, %eax # sched: [1:0.50]
762 ; ATOM-NEXT: retq # sched: [79:39.50]
764 ; SLM-LABEL: test_cvtss2si:
766 ; SLM-NEXT: cvtss2si (%rdi), %eax # sched: [7:1.00]
767 ; SLM-NEXT: cvtss2si %xmm0, %ecx # sched: [4:0.50]
768 ; SLM-NEXT: addl %ecx, %eax # sched: [1:0.50]
769 ; SLM-NEXT: retq # sched: [4:1.00]
771 ; SANDY-LABEL: test_cvtss2si:
773 ; SANDY-NEXT: vcvtss2si %xmm0, %ecx # sched: [5:1.00]
774 ; SANDY-NEXT: vcvtss2si (%rdi), %eax # sched: [10:1.00]
775 ; SANDY-NEXT: addl %ecx, %eax # sched: [1:0.33]
776 ; SANDY-NEXT: retq # sched: [1:1.00]
778 ; HASWELL-LABEL: test_cvtss2si:
780 ; HASWELL-NEXT: vcvtss2si %xmm0, %ecx # sched: [4:1.00]
781 ; HASWELL-NEXT: vcvtss2si (%rdi), %eax # sched: [4:1.00]
782 ; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25]
783 ; HASWELL-NEXT: retq # sched: [2:1.00]
785 ; BROADWELL-LABEL: test_cvtss2si:
787 ; BROADWELL-NEXT: vcvtss2si %xmm0, %ecx # sched: [4:1.00]
788 ; BROADWELL-NEXT: vcvtss2si (%rdi), %eax # sched: [9:1.00]
789 ; BROADWELL-NEXT: addl %ecx, %eax # sched: [1:0.25]
790 ; BROADWELL-NEXT: retq # sched: [7:1.00]
792 ; SKYLAKE-LABEL: test_cvtss2si:
794 ; SKYLAKE-NEXT: vcvtss2si %xmm0, %ecx # sched: [6:1.00]
795 ; SKYLAKE-NEXT: vcvtss2si (%rdi), %eax # sched: [11:1.00]
796 ; SKYLAKE-NEXT: addl %ecx, %eax # sched: [1:0.25]
797 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
799 ; SKX-LABEL: test_cvtss2si:
801 ; SKX-NEXT: vcvtss2si %xmm0, %ecx # sched: [6:1.00]
802 ; SKX-NEXT: vcvtss2si (%rdi), %eax # sched: [11:1.00]
803 ; SKX-NEXT: addl %ecx, %eax # sched: [1:0.25]
804 ; SKX-NEXT: retq # sched: [7:1.00]
806 ; BTVER2-LABEL: test_cvtss2si:
808 ; BTVER2-NEXT: vcvtss2si (%rdi), %eax # sched: [8:1.00]
809 ; BTVER2-NEXT: vcvtss2si %xmm0, %ecx # sched: [3:1.00]
810 ; BTVER2-NEXT: addl %ecx, %eax # sched: [1:0.50]
811 ; BTVER2-NEXT: retq # sched: [4:1.00]
813 ; ZNVER1-LABEL: test_cvtss2si:
815 ; ZNVER1-NEXT: vcvtss2si (%rdi), %eax # sched: [12:1.00]
816 ; ZNVER1-NEXT: vcvtss2si %xmm0, %ecx # sched: [5:1.00]
817 ; ZNVER1-NEXT: addl %ecx, %eax # sched: [1:0.25]
818 ; ZNVER1-NEXT: retq # sched: [1:0.50]
819 %1 = insertelement <4 x float> undef, float %a0, i32 0
820 %2 = call i32 @llvm.x86.sse.cvtss2si(<4 x float> %1)
821 %3 = load float, float *%a1, align 4
822 %4 = insertelement <4 x float> undef, float %3, i32 0
823 %5 = call i32 @llvm.x86.sse.cvtss2si(<4 x float> %4)
827 declare i32 @llvm.x86.sse.cvtss2si(<4 x float>) nounwind readnone
829 define i64 @test_cvtss2siq(float %a0, float *%a1) {
830 ; GENERIC-LABEL: test_cvtss2siq:
832 ; GENERIC-NEXT: cvtss2si %xmm0, %rcx # sched: [5:1.00]
833 ; GENERIC-NEXT: cvtss2si (%rdi), %rax # sched: [9:1.00]
834 ; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33]
835 ; GENERIC-NEXT: retq # sched: [1:1.00]
837 ; ATOM-LABEL: test_cvtss2siq:
839 ; ATOM-NEXT: cvtss2si (%rdi), %rax # sched: [10:5.00]
840 ; ATOM-NEXT: cvtss2si %xmm0, %rcx # sched: [9:4.50]
841 ; ATOM-NEXT: addq %rcx, %rax # sched: [1:0.50]
842 ; ATOM-NEXT: retq # sched: [79:39.50]
844 ; SLM-LABEL: test_cvtss2siq:
846 ; SLM-NEXT: cvtss2si (%rdi), %rax # sched: [7:1.00]
847 ; SLM-NEXT: cvtss2si %xmm0, %rcx # sched: [4:0.50]
848 ; SLM-NEXT: addq %rcx, %rax # sched: [1:0.50]
849 ; SLM-NEXT: retq # sched: [4:1.00]
851 ; SANDY-LABEL: test_cvtss2siq:
853 ; SANDY-NEXT: vcvtss2si %xmm0, %rcx # sched: [5:1.00]
854 ; SANDY-NEXT: vcvtss2si (%rdi), %rax # sched: [10:1.00]
855 ; SANDY-NEXT: addq %rcx, %rax # sched: [1:0.33]
856 ; SANDY-NEXT: retq # sched: [1:1.00]
858 ; HASWELL-LABEL: test_cvtss2siq:
860 ; HASWELL-NEXT: vcvtss2si %xmm0, %rcx # sched: [4:1.00]
861 ; HASWELL-NEXT: vcvtss2si (%rdi), %rax # sched: [4:1.00]
862 ; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25]
863 ; HASWELL-NEXT: retq # sched: [2:1.00]
865 ; BROADWELL-LABEL: test_cvtss2siq:
867 ; BROADWELL-NEXT: vcvtss2si %xmm0, %rcx # sched: [4:1.00]
868 ; BROADWELL-NEXT: vcvtss2si (%rdi), %rax # sched: [9:1.00]
869 ; BROADWELL-NEXT: addq %rcx, %rax # sched: [1:0.25]
870 ; BROADWELL-NEXT: retq # sched: [7:1.00]
872 ; SKYLAKE-LABEL: test_cvtss2siq:
874 ; SKYLAKE-NEXT: vcvtss2si %xmm0, %rcx # sched: [6:1.00]
875 ; SKYLAKE-NEXT: vcvtss2si (%rdi), %rax # sched: [11:1.00]
876 ; SKYLAKE-NEXT: addq %rcx, %rax # sched: [1:0.25]
877 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
879 ; SKX-LABEL: test_cvtss2siq:
881 ; SKX-NEXT: vcvtss2si %xmm0, %rcx # sched: [6:1.00]
882 ; SKX-NEXT: vcvtss2si (%rdi), %rax # sched: [11:1.00]
883 ; SKX-NEXT: addq %rcx, %rax # sched: [1:0.25]
884 ; SKX-NEXT: retq # sched: [7:1.00]
886 ; BTVER2-LABEL: test_cvtss2siq:
888 ; BTVER2-NEXT: vcvtss2si (%rdi), %rax # sched: [8:1.00]
889 ; BTVER2-NEXT: vcvtss2si %xmm0, %rcx # sched: [3:1.00]
890 ; BTVER2-NEXT: addq %rcx, %rax # sched: [1:0.50]
891 ; BTVER2-NEXT: retq # sched: [4:1.00]
893 ; ZNVER1-LABEL: test_cvtss2siq:
895 ; ZNVER1-NEXT: vcvtss2si (%rdi), %rax # sched: [12:1.00]
896 ; ZNVER1-NEXT: vcvtss2si %xmm0, %rcx # sched: [5:1.00]
897 ; ZNVER1-NEXT: addq %rcx, %rax # sched: [1:0.25]
898 ; ZNVER1-NEXT: retq # sched: [1:0.50]
899 %1 = insertelement <4 x float> undef, float %a0, i32 0
900 %2 = call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %1)
901 %3 = load float, float *%a1, align 4
902 %4 = insertelement <4 x float> undef, float %3, i32 0
903 %5 = call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %4)
907 declare i64 @llvm.x86.sse.cvtss2si64(<4 x float>) nounwind readnone
909 define i32 @test_cvttss2si(float %a0, float *%a1) {
910 ; GENERIC-LABEL: test_cvttss2si:
912 ; GENERIC-NEXT: cvttss2si %xmm0, %ecx # sched: [5:1.00]
913 ; GENERIC-NEXT: cvttss2si (%rdi), %eax # sched: [9:1.00]
914 ; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33]
915 ; GENERIC-NEXT: retq # sched: [1:1.00]
917 ; ATOM-LABEL: test_cvttss2si:
919 ; ATOM-NEXT: cvttss2si (%rdi), %eax # sched: [9:4.50]
920 ; ATOM-NEXT: cvttss2si %xmm0, %ecx # sched: [8:4.00]
921 ; ATOM-NEXT: addl %ecx, %eax # sched: [1:0.50]
922 ; ATOM-NEXT: retq # sched: [79:39.50]
924 ; SLM-LABEL: test_cvttss2si:
926 ; SLM-NEXT: cvttss2si (%rdi), %eax # sched: [7:1.00]
927 ; SLM-NEXT: cvttss2si %xmm0, %ecx # sched: [4:0.50]
928 ; SLM-NEXT: addl %ecx, %eax # sched: [1:0.50]
929 ; SLM-NEXT: retq # sched: [4:1.00]
931 ; SANDY-LABEL: test_cvttss2si:
933 ; SANDY-NEXT: vcvttss2si %xmm0, %ecx # sched: [5:1.00]
934 ; SANDY-NEXT: vcvttss2si (%rdi), %eax # sched: [10:1.00]
935 ; SANDY-NEXT: addl %ecx, %eax # sched: [1:0.33]
936 ; SANDY-NEXT: retq # sched: [1:1.00]
938 ; HASWELL-LABEL: test_cvttss2si:
940 ; HASWELL-NEXT: vcvttss2si %xmm0, %ecx # sched: [4:1.00]
941 ; HASWELL-NEXT: vcvttss2si (%rdi), %eax # sched: [4:1.00]
942 ; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25]
943 ; HASWELL-NEXT: retq # sched: [2:1.00]
945 ; BROADWELL-LABEL: test_cvttss2si:
947 ; BROADWELL-NEXT: vcvttss2si %xmm0, %ecx # sched: [4:1.00]
948 ; BROADWELL-NEXT: vcvttss2si (%rdi), %eax # sched: [9:1.00]
949 ; BROADWELL-NEXT: addl %ecx, %eax # sched: [1:0.25]
950 ; BROADWELL-NEXT: retq # sched: [7:1.00]
952 ; SKYLAKE-LABEL: test_cvttss2si:
954 ; SKYLAKE-NEXT: vcvttss2si %xmm0, %ecx # sched: [7:1.00]
955 ; SKYLAKE-NEXT: vcvttss2si (%rdi), %eax # sched: [11:1.00]
956 ; SKYLAKE-NEXT: addl %ecx, %eax # sched: [1:0.25]
957 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
959 ; SKX-LABEL: test_cvttss2si:
961 ; SKX-NEXT: vcvttss2si %xmm0, %ecx # sched: [7:1.00]
962 ; SKX-NEXT: vcvttss2si (%rdi), %eax # sched: [11:1.00]
963 ; SKX-NEXT: addl %ecx, %eax # sched: [1:0.25]
964 ; SKX-NEXT: retq # sched: [7:1.00]
966 ; BTVER2-LABEL: test_cvttss2si:
968 ; BTVER2-NEXT: vcvttss2si (%rdi), %eax # sched: [8:1.00]
969 ; BTVER2-NEXT: vcvttss2si %xmm0, %ecx # sched: [3:1.00]
970 ; BTVER2-NEXT: addl %ecx, %eax # sched: [1:0.50]
971 ; BTVER2-NEXT: retq # sched: [4:1.00]
973 ; ZNVER1-LABEL: test_cvttss2si:
975 ; ZNVER1-NEXT: vcvttss2si (%rdi), %eax # sched: [12:1.00]
976 ; ZNVER1-NEXT: vcvttss2si %xmm0, %ecx # sched: [5:1.00]
977 ; ZNVER1-NEXT: addl %ecx, %eax # sched: [1:0.25]
978 ; ZNVER1-NEXT: retq # sched: [1:0.50]
979 %1 = fptosi float %a0 to i32
980 %2 = load float, float *%a1, align 4
981 %3 = fptosi float %2 to i32
986 define i64 @test_cvttss2siq(float %a0, float *%a1) {
987 ; GENERIC-LABEL: test_cvttss2siq:
989 ; GENERIC-NEXT: cvttss2si %xmm0, %rcx # sched: [5:1.00]
990 ; GENERIC-NEXT: cvttss2si (%rdi), %rax # sched: [9:1.00]
991 ; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33]
992 ; GENERIC-NEXT: retq # sched: [1:1.00]
994 ; ATOM-LABEL: test_cvttss2siq:
996 ; ATOM-NEXT: cvttss2si (%rdi), %rax # sched: [10:5.00]
997 ; ATOM-NEXT: cvttss2si %xmm0, %rcx # sched: [9:4.50]
998 ; ATOM-NEXT: addq %rcx, %rax # sched: [1:0.50]
999 ; ATOM-NEXT: retq # sched: [79:39.50]
1001 ; SLM-LABEL: test_cvttss2siq:
1003 ; SLM-NEXT: cvttss2si (%rdi), %rax # sched: [7:1.00]
1004 ; SLM-NEXT: cvttss2si %xmm0, %rcx # sched: [4:0.50]
1005 ; SLM-NEXT: addq %rcx, %rax # sched: [1:0.50]
1006 ; SLM-NEXT: retq # sched: [4:1.00]
1008 ; SANDY-LABEL: test_cvttss2siq:
1010 ; SANDY-NEXT: vcvttss2si %xmm0, %rcx # sched: [5:1.00]
1011 ; SANDY-NEXT: vcvttss2si (%rdi), %rax # sched: [10:1.00]
1012 ; SANDY-NEXT: addq %rcx, %rax # sched: [1:0.33]
1013 ; SANDY-NEXT: retq # sched: [1:1.00]
1015 ; HASWELL-LABEL: test_cvttss2siq:
1017 ; HASWELL-NEXT: vcvttss2si %xmm0, %rcx # sched: [4:1.00]
1018 ; HASWELL-NEXT: vcvttss2si (%rdi), %rax # sched: [4:1.00]
1019 ; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25]
1020 ; HASWELL-NEXT: retq # sched: [2:1.00]
1022 ; BROADWELL-LABEL: test_cvttss2siq:
1023 ; BROADWELL: # BB#0:
1024 ; BROADWELL-NEXT: vcvttss2si %xmm0, %rcx # sched: [4:1.00]
1025 ; BROADWELL-NEXT: vcvttss2si (%rdi), %rax # sched: [9:1.00]
1026 ; BROADWELL-NEXT: addq %rcx, %rax # sched: [1:0.25]
1027 ; BROADWELL-NEXT: retq # sched: [7:1.00]
1029 ; SKYLAKE-LABEL: test_cvttss2siq:
1031 ; SKYLAKE-NEXT: vcvttss2si %xmm0, %rcx # sched: [7:1.00]
1032 ; SKYLAKE-NEXT: vcvttss2si (%rdi), %rax # sched: [11:1.00]
1033 ; SKYLAKE-NEXT: addq %rcx, %rax # sched: [1:0.25]
1034 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
1036 ; SKX-LABEL: test_cvttss2siq:
1038 ; SKX-NEXT: vcvttss2si %xmm0, %rcx # sched: [7:1.00]
1039 ; SKX-NEXT: vcvttss2si (%rdi), %rax # sched: [11:1.00]
1040 ; SKX-NEXT: addq %rcx, %rax # sched: [1:0.25]
1041 ; SKX-NEXT: retq # sched: [7:1.00]
1043 ; BTVER2-LABEL: test_cvttss2siq:
1045 ; BTVER2-NEXT: vcvttss2si (%rdi), %rax # sched: [8:1.00]
1046 ; BTVER2-NEXT: vcvttss2si %xmm0, %rcx # sched: [3:1.00]
1047 ; BTVER2-NEXT: addq %rcx, %rax # sched: [1:0.50]
1048 ; BTVER2-NEXT: retq # sched: [4:1.00]
1050 ; ZNVER1-LABEL: test_cvttss2siq:
1052 ; ZNVER1-NEXT: vcvttss2si (%rdi), %rax # sched: [12:1.00]
1053 ; ZNVER1-NEXT: vcvttss2si %xmm0, %rcx # sched: [5:1.00]
1054 ; ZNVER1-NEXT: addq %rcx, %rax # sched: [1:0.25]
1055 ; ZNVER1-NEXT: retq # sched: [1:0.50]
1056 %1 = fptosi float %a0 to i64
1057 %2 = load float, float *%a1, align 4
1058 %3 = fptosi float %2 to i64
1063 define <4 x float> @test_divps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
1064 ; GENERIC-LABEL: test_divps:
1066 ; GENERIC-NEXT: divps %xmm1, %xmm0 # sched: [14:1.00]
1067 ; GENERIC-NEXT: divps (%rdi), %xmm0 # sched: [20:1.00]
1068 ; GENERIC-NEXT: retq # sched: [1:1.00]
1070 ; ATOM-LABEL: test_divps:
1072 ; ATOM-NEXT: divps %xmm1, %xmm0 # sched: [70:35.00]
1073 ; ATOM-NEXT: divps (%rdi), %xmm0 # sched: [125:62.50]
1074 ; ATOM-NEXT: retq # sched: [79:39.50]
1076 ; SLM-LABEL: test_divps:
1078 ; SLM-NEXT: divps %xmm1, %xmm0 # sched: [34:34.00]
1079 ; SLM-NEXT: divps (%rdi), %xmm0 # sched: [37:34.00]
1080 ; SLM-NEXT: retq # sched: [4:1.00]
1082 ; SANDY-LABEL: test_divps:
1084 ; SANDY-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [14:1.00]
1085 ; SANDY-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [20:1.00]
1086 ; SANDY-NEXT: retq # sched: [1:1.00]
1088 ; HASWELL-LABEL: test_divps:
1090 ; HASWELL-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [13:1.00]
1091 ; HASWELL-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [13:1.00]
1092 ; HASWELL-NEXT: retq # sched: [2:1.00]
1094 ; BROADWELL-LABEL: test_divps:
1095 ; BROADWELL: # BB#0:
1096 ; BROADWELL-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [11:1.00]
1097 ; BROADWELL-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [16:1.00]
1098 ; BROADWELL-NEXT: retq # sched: [7:1.00]
1100 ; SKYLAKE-LABEL: test_divps:
1102 ; SKYLAKE-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [11:1.00]
1103 ; SKYLAKE-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [17:1.00]
1104 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
1106 ; SKX-LABEL: test_divps:
1108 ; SKX-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [11:1.00]
1109 ; SKX-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [17:1.00]
1110 ; SKX-NEXT: retq # sched: [7:1.00]
1112 ; BTVER2-LABEL: test_divps:
1114 ; BTVER2-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [19:19.00]
1115 ; BTVER2-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [24:19.00]
1116 ; BTVER2-NEXT: retq # sched: [4:1.00]
1118 ; ZNVER1-LABEL: test_divps:
1120 ; ZNVER1-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [15:1.00]
1121 ; ZNVER1-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [22:1.00]
1122 ; ZNVER1-NEXT: retq # sched: [1:0.50]
1123 %1 = fdiv <4 x float> %a0, %a1
1124 %2 = load <4 x float>, <4 x float> *%a2, align 16
1125 %3 = fdiv <4 x float> %1, %2
1129 define float @test_divss(float %a0, float %a1, float *%a2) {
1130 ; GENERIC-LABEL: test_divss:
1132 ; GENERIC-NEXT: divss %xmm1, %xmm0 # sched: [14:1.00]
1133 ; GENERIC-NEXT: divss (%rdi), %xmm0 # sched: [20:1.00]
1134 ; GENERIC-NEXT: retq # sched: [1:1.00]
1136 ; ATOM-LABEL: test_divss:
1138 ; ATOM-NEXT: divss %xmm1, %xmm0 # sched: [34:17.00]
1139 ; ATOM-NEXT: divss (%rdi), %xmm0 # sched: [62:31.00]
1140 ; ATOM-NEXT: retq # sched: [79:39.50]
1142 ; SLM-LABEL: test_divss:
1144 ; SLM-NEXT: divss %xmm1, %xmm0 # sched: [34:34.00]
1145 ; SLM-NEXT: divss (%rdi), %xmm0 # sched: [37:34.00]
1146 ; SLM-NEXT: retq # sched: [4:1.00]
1148 ; SANDY-LABEL: test_divss:
1150 ; SANDY-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [14:1.00]
1151 ; SANDY-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [20:1.00]
1152 ; SANDY-NEXT: retq # sched: [1:1.00]
1154 ; HASWELL-LABEL: test_divss:
1156 ; HASWELL-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [13:1.00]
1157 ; HASWELL-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [13:1.00]
1158 ; HASWELL-NEXT: retq # sched: [2:1.00]
1160 ; BROADWELL-LABEL: test_divss:
1161 ; BROADWELL: # BB#0:
1162 ; BROADWELL-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [11:1.00]
1163 ; BROADWELL-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [16:1.00]
1164 ; BROADWELL-NEXT: retq # sched: [7:1.00]
1166 ; SKYLAKE-LABEL: test_divss:
1168 ; SKYLAKE-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [11:1.00]
1169 ; SKYLAKE-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [16:1.00]
1170 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
1172 ; SKX-LABEL: test_divss:
1174 ; SKX-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [11:1.00]
1175 ; SKX-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [16:1.00]
1176 ; SKX-NEXT: retq # sched: [7:1.00]
1178 ; BTVER2-LABEL: test_divss:
1180 ; BTVER2-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [19:19.00]
1181 ; BTVER2-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [24:19.00]
1182 ; BTVER2-NEXT: retq # sched: [4:1.00]
1184 ; ZNVER1-LABEL: test_divss:
1186 ; ZNVER1-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [15:1.00]
1187 ; ZNVER1-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [22:1.00]
1188 ; ZNVER1-NEXT: retq # sched: [1:0.50]
1189 %1 = fdiv float %a0, %a1
1190 %2 = load float, float *%a2, align 4
1191 %3 = fdiv float %1, %2
1195 define void @test_ldmxcsr(i32 %a0) {
1196 ; GENERIC-LABEL: test_ldmxcsr:
1198 ; GENERIC-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [5:1.00]
1199 ; GENERIC-NEXT: ldmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00]
1200 ; GENERIC-NEXT: retq # sched: [1:1.00]
1202 ; ATOM-LABEL: test_ldmxcsr:
1204 ; ATOM-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
1205 ; ATOM-NEXT: ldmxcsr -{{[0-9]+}}(%rsp) # sched: [5:2.50]
1206 ; ATOM-NEXT: retq # sched: [79:39.50]
1208 ; SLM-LABEL: test_ldmxcsr:
1210 ; SLM-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
1211 ; SLM-NEXT: ldmxcsr -{{[0-9]+}}(%rsp) # sched: [3:1.00]
1212 ; SLM-NEXT: retq # sched: [4:1.00]
1214 ; SANDY-LABEL: test_ldmxcsr:
1216 ; SANDY-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [5:1.00]
1217 ; SANDY-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00]
1218 ; SANDY-NEXT: retq # sched: [1:1.00]
1220 ; HASWELL-LABEL: test_ldmxcsr:
1222 ; HASWELL-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
1223 ; HASWELL-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [2:1.00]
1224 ; HASWELL-NEXT: retq # sched: [2:1.00]
1226 ; BROADWELL-LABEL: test_ldmxcsr:
1227 ; BROADWELL: # BB#0:
1228 ; BROADWELL-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
1229 ; BROADWELL-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [7:1.00]
1230 ; BROADWELL-NEXT: retq # sched: [7:1.00]
1232 ; SKYLAKE-LABEL: test_ldmxcsr:
1234 ; SKYLAKE-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
1235 ; SKYLAKE-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [7:1.00]
1236 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
1238 ; SKX-LABEL: test_ldmxcsr:
1240 ; SKX-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
1241 ; SKX-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [7:1.00]
1242 ; SKX-NEXT: retq # sched: [7:1.00]
1244 ; BTVER2-LABEL: test_ldmxcsr:
1246 ; BTVER2-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
1247 ; BTVER2-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00]
1248 ; BTVER2-NEXT: retq # sched: [4:1.00]
1250 ; ZNVER1-LABEL: test_ldmxcsr:
1252 ; ZNVER1-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:0.50]
1253 ; ZNVER1-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [100:?]
1254 ; ZNVER1-NEXT: retq # sched: [1:0.50]
1255 %1 = alloca i32, align 4
1256 %2 = bitcast i32* %1 to i8*
1257 store i32 %a0, i32* %1
1258 call void @llvm.x86.sse.ldmxcsr(i8* %2)
1261 declare void @llvm.x86.sse.ldmxcsr(i8*) nounwind readnone
1263 define <4 x float> @test_maxps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
1264 ; GENERIC-LABEL: test_maxps:
1266 ; GENERIC-NEXT: maxps %xmm1, %xmm0 # sched: [3:1.00]
1267 ; GENERIC-NEXT: maxps (%rdi), %xmm0 # sched: [9:1.00]
1268 ; GENERIC-NEXT: retq # sched: [1:1.00]
1270 ; ATOM-LABEL: test_maxps:
1272 ; ATOM-NEXT: maxps %xmm1, %xmm0 # sched: [5:5.00]
1273 ; ATOM-NEXT: maxps (%rdi), %xmm0 # sched: [5:5.00]
1274 ; ATOM-NEXT: retq # sched: [79:39.50]
1276 ; SLM-LABEL: test_maxps:
1278 ; SLM-NEXT: maxps %xmm1, %xmm0 # sched: [3:1.00]
1279 ; SLM-NEXT: maxps (%rdi), %xmm0 # sched: [6:1.00]
1280 ; SLM-NEXT: retq # sched: [4:1.00]
1282 ; SANDY-LABEL: test_maxps:
1284 ; SANDY-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1285 ; SANDY-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
1286 ; SANDY-NEXT: retq # sched: [1:1.00]
1288 ; HASWELL-LABEL: test_maxps:
1290 ; HASWELL-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1291 ; HASWELL-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
1292 ; HASWELL-NEXT: retq # sched: [2:1.00]
1294 ; BROADWELL-LABEL: test_maxps:
1295 ; BROADWELL: # BB#0:
1296 ; BROADWELL-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1297 ; BROADWELL-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
1298 ; BROADWELL-NEXT: retq # sched: [7:1.00]
1300 ; SKYLAKE-LABEL: test_maxps:
1302 ; SKYLAKE-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
1303 ; SKYLAKE-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
1304 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
1306 ; SKX-LABEL: test_maxps:
1308 ; SKX-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
1309 ; SKX-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
1310 ; SKX-NEXT: retq # sched: [7:1.00]
1312 ; BTVER2-LABEL: test_maxps:
1314 ; BTVER2-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1315 ; BTVER2-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
1316 ; BTVER2-NEXT: retq # sched: [4:1.00]
1318 ; ZNVER1-LABEL: test_maxps:
1320 ; ZNVER1-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1321 ; ZNVER1-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
1322 ; ZNVER1-NEXT: retq # sched: [1:0.50]
1323 %1 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1)
1324 %2 = load <4 x float>, <4 x float> *%a2, align 16
1325 %3 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %1, <4 x float> %2)
1328 declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind readnone
1330 define <4 x float> @test_maxss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
1331 ; GENERIC-LABEL: test_maxss:
1333 ; GENERIC-NEXT: maxss %xmm1, %xmm0 # sched: [3:1.00]
1334 ; GENERIC-NEXT: maxss (%rdi), %xmm0 # sched: [9:1.00]
1335 ; GENERIC-NEXT: retq # sched: [1:1.00]
1337 ; ATOM-LABEL: test_maxss:
1339 ; ATOM-NEXT: maxss %xmm1, %xmm0 # sched: [5:5.00]
1340 ; ATOM-NEXT: maxss (%rdi), %xmm0 # sched: [5:5.00]
1341 ; ATOM-NEXT: retq # sched: [79:39.50]
1343 ; SLM-LABEL: test_maxss:
1345 ; SLM-NEXT: maxss %xmm1, %xmm0 # sched: [3:1.00]
1346 ; SLM-NEXT: maxss (%rdi), %xmm0 # sched: [6:1.00]
1347 ; SLM-NEXT: retq # sched: [4:1.00]
1349 ; SANDY-LABEL: test_maxss:
1351 ; SANDY-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1352 ; SANDY-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
1353 ; SANDY-NEXT: retq # sched: [1:1.00]
1355 ; HASWELL-LABEL: test_maxss:
1357 ; HASWELL-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1358 ; HASWELL-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
1359 ; HASWELL-NEXT: retq # sched: [2:1.00]
1361 ; BROADWELL-LABEL: test_maxss:
1362 ; BROADWELL: # BB#0:
1363 ; BROADWELL-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1364 ; BROADWELL-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
1365 ; BROADWELL-NEXT: retq # sched: [7:1.00]
1367 ; SKYLAKE-LABEL: test_maxss:
1369 ; SKYLAKE-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
1370 ; SKYLAKE-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
1371 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
1373 ; SKX-LABEL: test_maxss:
1375 ; SKX-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
1376 ; SKX-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
1377 ; SKX-NEXT: retq # sched: [7:1.00]
1379 ; BTVER2-LABEL: test_maxss:
1381 ; BTVER2-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1382 ; BTVER2-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
1383 ; BTVER2-NEXT: retq # sched: [4:1.00]
1385 ; ZNVER1-LABEL: test_maxss:
1387 ; ZNVER1-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1388 ; ZNVER1-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
1389 ; ZNVER1-NEXT: retq # sched: [1:0.50]
1390 %1 = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a0, <4 x float> %a1)
1391 %2 = load <4 x float>, <4 x float> *%a2, align 16
1392 %3 = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %1, <4 x float> %2)
1395 declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind readnone
1397 define <4 x float> @test_minps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
1398 ; GENERIC-LABEL: test_minps:
1400 ; GENERIC-NEXT: minps %xmm1, %xmm0 # sched: [3:1.00]
1401 ; GENERIC-NEXT: minps (%rdi), %xmm0 # sched: [9:1.00]
1402 ; GENERIC-NEXT: retq # sched: [1:1.00]
1404 ; ATOM-LABEL: test_minps:
1406 ; ATOM-NEXT: minps %xmm1, %xmm0 # sched: [5:5.00]
1407 ; ATOM-NEXT: minps (%rdi), %xmm0 # sched: [5:5.00]
1408 ; ATOM-NEXT: retq # sched: [79:39.50]
1410 ; SLM-LABEL: test_minps:
1412 ; SLM-NEXT: minps %xmm1, %xmm0 # sched: [3:1.00]
1413 ; SLM-NEXT: minps (%rdi), %xmm0 # sched: [6:1.00]
1414 ; SLM-NEXT: retq # sched: [4:1.00]
1416 ; SANDY-LABEL: test_minps:
1418 ; SANDY-NEXT: vminps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1419 ; SANDY-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
1420 ; SANDY-NEXT: retq # sched: [1:1.00]
1422 ; HASWELL-LABEL: test_minps:
1424 ; HASWELL-NEXT: vminps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1425 ; HASWELL-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
1426 ; HASWELL-NEXT: retq # sched: [2:1.00]
1428 ; BROADWELL-LABEL: test_minps:
1429 ; BROADWELL: # BB#0:
1430 ; BROADWELL-NEXT: vminps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1431 ; BROADWELL-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
1432 ; BROADWELL-NEXT: retq # sched: [7:1.00]
1434 ; SKYLAKE-LABEL: test_minps:
1436 ; SKYLAKE-NEXT: vminps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
1437 ; SKYLAKE-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
1438 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
1440 ; SKX-LABEL: test_minps:
1442 ; SKX-NEXT: vminps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
1443 ; SKX-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
1444 ; SKX-NEXT: retq # sched: [7:1.00]
1446 ; BTVER2-LABEL: test_minps:
1448 ; BTVER2-NEXT: vminps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1449 ; BTVER2-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
1450 ; BTVER2-NEXT: retq # sched: [4:1.00]
1452 ; ZNVER1-LABEL: test_minps:
1454 ; ZNVER1-NEXT: vminps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1455 ; ZNVER1-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
1456 ; ZNVER1-NEXT: retq # sched: [1:0.50]
1457 %1 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1)
1458 %2 = load <4 x float>, <4 x float> *%a2, align 16
1459 %3 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %1, <4 x float> %2)
1462 declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind readnone
1464 define <4 x float> @test_minss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
1465 ; GENERIC-LABEL: test_minss:
1467 ; GENERIC-NEXT: minss %xmm1, %xmm0 # sched: [3:1.00]
1468 ; GENERIC-NEXT: minss (%rdi), %xmm0 # sched: [9:1.00]
1469 ; GENERIC-NEXT: retq # sched: [1:1.00]
1471 ; ATOM-LABEL: test_minss:
1473 ; ATOM-NEXT: minss %xmm1, %xmm0 # sched: [5:5.00]
1474 ; ATOM-NEXT: minss (%rdi), %xmm0 # sched: [5:5.00]
1475 ; ATOM-NEXT: retq # sched: [79:39.50]
1477 ; SLM-LABEL: test_minss:
1479 ; SLM-NEXT: minss %xmm1, %xmm0 # sched: [3:1.00]
1480 ; SLM-NEXT: minss (%rdi), %xmm0 # sched: [6:1.00]
1481 ; SLM-NEXT: retq # sched: [4:1.00]
1483 ; SANDY-LABEL: test_minss:
1485 ; SANDY-NEXT: vminss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1486 ; SANDY-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
1487 ; SANDY-NEXT: retq # sched: [1:1.00]
1489 ; HASWELL-LABEL: test_minss:
1491 ; HASWELL-NEXT: vminss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1492 ; HASWELL-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
1493 ; HASWELL-NEXT: retq # sched: [2:1.00]
1495 ; BROADWELL-LABEL: test_minss:
1496 ; BROADWELL: # BB#0:
1497 ; BROADWELL-NEXT: vminss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1498 ; BROADWELL-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
1499 ; BROADWELL-NEXT: retq # sched: [7:1.00]
1501 ; SKYLAKE-LABEL: test_minss:
1503 ; SKYLAKE-NEXT: vminss %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
1504 ; SKYLAKE-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
1505 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
1507 ; SKX-LABEL: test_minss:
1509 ; SKX-NEXT: vminss %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
1510 ; SKX-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
1511 ; SKX-NEXT: retq # sched: [7:1.00]
1513 ; BTVER2-LABEL: test_minss:
1515 ; BTVER2-NEXT: vminss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1516 ; BTVER2-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
1517 ; BTVER2-NEXT: retq # sched: [4:1.00]
1519 ; ZNVER1-LABEL: test_minss:
1521 ; ZNVER1-NEXT: vminss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1522 ; ZNVER1-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
1523 ; ZNVER1-NEXT: retq # sched: [1:0.50]
1524 %1 = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a0, <4 x float> %a1)
1525 %2 = load <4 x float>, <4 x float> *%a2, align 16
1526 %3 = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %1, <4 x float> %2)
1529 declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) nounwind readnone
1531 define void @test_movaps(<4 x float> *%a0, <4 x float> *%a1) {
1532 ; GENERIC-LABEL: test_movaps:
1534 ; GENERIC-NEXT: movaps (%rdi), %xmm0 # sched: [6:0.50]
1535 ; GENERIC-NEXT: addps %xmm0, %xmm0 # sched: [3:1.00]
1536 ; GENERIC-NEXT: movaps %xmm0, (%rsi) # sched: [5:1.00]
1537 ; GENERIC-NEXT: retq # sched: [1:1.00]
1539 ; ATOM-LABEL: test_movaps:
1541 ; ATOM-NEXT: movaps (%rdi), %xmm0 # sched: [1:1.00]
1542 ; ATOM-NEXT: addps %xmm0, %xmm0 # sched: [5:5.00]
1543 ; ATOM-NEXT: movaps %xmm0, (%rsi) # sched: [1:1.00]
1544 ; ATOM-NEXT: retq # sched: [79:39.50]
1546 ; SLM-LABEL: test_movaps:
1548 ; SLM-NEXT: movaps (%rdi), %xmm0 # sched: [3:1.00]
1549 ; SLM-NEXT: addps %xmm0, %xmm0 # sched: [3:1.00]
1550 ; SLM-NEXT: movaps %xmm0, (%rsi) # sched: [1:1.00]
1551 ; SLM-NEXT: retq # sched: [4:1.00]
1553 ; SANDY-LABEL: test_movaps:
1555 ; SANDY-NEXT: vmovaps (%rdi), %xmm0 # sched: [6:0.50]
1556 ; SANDY-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
1557 ; SANDY-NEXT: vmovaps %xmm0, (%rsi) # sched: [5:1.00]
1558 ; SANDY-NEXT: retq # sched: [1:1.00]
1560 ; HASWELL-LABEL: test_movaps:
1562 ; HASWELL-NEXT: vmovaps (%rdi), %xmm0 # sched: [1:0.50]
1563 ; HASWELL-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
1564 ; HASWELL-NEXT: vmovaps %xmm0, (%rsi) # sched: [1:1.00]
1565 ; HASWELL-NEXT: retq # sched: [2:1.00]
1567 ; BROADWELL-LABEL: test_movaps:
1568 ; BROADWELL: # BB#0:
1569 ; BROADWELL-NEXT: vmovaps (%rdi), %xmm0 # sched: [5:0.50]
1570 ; BROADWELL-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
1571 ; BROADWELL-NEXT: vmovaps %xmm0, (%rsi) # sched: [1:1.00]
1572 ; BROADWELL-NEXT: retq # sched: [7:1.00]
1574 ; SKYLAKE-LABEL: test_movaps:
1576 ; SKYLAKE-NEXT: vmovaps (%rdi), %xmm0 # sched: [6:0.50]
1577 ; SKYLAKE-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [4:0.50]
1578 ; SKYLAKE-NEXT: vmovaps %xmm0, (%rsi) # sched: [1:1.00]
1579 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
1581 ; SKX-LABEL: test_movaps:
1583 ; SKX-NEXT: vmovaps (%rdi), %xmm0 # sched: [6:0.50]
1584 ; SKX-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [4:0.33]
1585 ; SKX-NEXT: vmovaps %xmm0, (%rsi) # sched: [1:1.00]
1586 ; SKX-NEXT: retq # sched: [7:1.00]
1588 ; BTVER2-LABEL: test_movaps:
1590 ; BTVER2-NEXT: vmovaps (%rdi), %xmm0 # sched: [5:1.00]
1591 ; BTVER2-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
1592 ; BTVER2-NEXT: vmovaps %xmm0, (%rsi) # sched: [1:1.00]
1593 ; BTVER2-NEXT: retq # sched: [4:1.00]
1595 ; ZNVER1-LABEL: test_movaps:
1597 ; ZNVER1-NEXT: vmovaps (%rdi), %xmm0 # sched: [8:0.50]
1598 ; ZNVER1-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
1599 ; ZNVER1-NEXT: vmovaps %xmm0, (%rsi) # sched: [1:0.50]
1600 ; ZNVER1-NEXT: retq # sched: [1:0.50]
1601 %1 = load <4 x float>, <4 x float> *%a0, align 16
1602 %2 = fadd <4 x float> %1, %1
1603 store <4 x float> %2, <4 x float> *%a1, align 16
1609 define <4 x float> @test_movhlps(<4 x float> %a0, <4 x float> %a1) {
1610 ; GENERIC-LABEL: test_movhlps:
1612 ; GENERIC-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00]
1613 ; GENERIC-NEXT: retq # sched: [1:1.00]
1615 ; ATOM-LABEL: test_movhlps:
1617 ; ATOM-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00]
1618 ; ATOM-NEXT: nop # sched: [1:0.50]
1619 ; ATOM-NEXT: nop # sched: [1:0.50]
1620 ; ATOM-NEXT: nop # sched: [1:0.50]
1621 ; ATOM-NEXT: nop # sched: [1:0.50]
1622 ; ATOM-NEXT: nop # sched: [1:0.50]
1623 ; ATOM-NEXT: nop # sched: [1:0.50]
1624 ; ATOM-NEXT: retq # sched: [79:39.50]
1626 ; SLM-LABEL: test_movhlps:
1628 ; SLM-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00]
1629 ; SLM-NEXT: retq # sched: [4:1.00]
1631 ; SANDY-LABEL: test_movhlps:
1633 ; SANDY-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00]
1634 ; SANDY-NEXT: retq # sched: [1:1.00]
1636 ; HASWELL-LABEL: test_movhlps:
1638 ; HASWELL-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00]
1639 ; HASWELL-NEXT: retq # sched: [2:1.00]
1641 ; BROADWELL-LABEL: test_movhlps:
1642 ; BROADWELL: # BB#0:
1643 ; BROADWELL-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00]
1644 ; BROADWELL-NEXT: retq # sched: [7:1.00]
1646 ; SKYLAKE-LABEL: test_movhlps:
1648 ; SKYLAKE-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00]
1649 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
1651 ; SKX-LABEL: test_movhlps:
1653 ; SKX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00]
1654 ; SKX-NEXT: retq # sched: [7:1.00]
1656 ; BTVER2-LABEL: test_movhlps:
1658 ; BTVER2-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:0.50]
1659 ; BTVER2-NEXT: retq # sched: [4:1.00]
1661 ; ZNVER1-LABEL: test_movhlps:
1663 ; ZNVER1-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:0.50]
1664 ; ZNVER1-NEXT: retq # sched: [1:0.50]
1665 %1 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 6, i32 7, i32 2, i32 3>
1671 define void @test_movhps(<4 x float> %a0, <4 x float> %a1, x86_mmx *%a2) {
1672 ; GENERIC-LABEL: test_movhps:
1674 ; GENERIC-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
1675 ; GENERIC-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00]
1676 ; GENERIC-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] sched: [1:1.00]
1677 ; GENERIC-NEXT: movlps %xmm1, (%rdi) # sched: [5:1.00]
1678 ; GENERIC-NEXT: retq # sched: [1:1.00]
1680 ; ATOM-LABEL: test_movhps:
1682 ; ATOM-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [1:1.00]
1683 ; ATOM-NEXT: addps %xmm0, %xmm1 # sched: [5:5.00]
1684 ; ATOM-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] sched: [1:1.00]
1685 ; ATOM-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00]
1686 ; ATOM-NEXT: retq # sched: [79:39.50]
1688 ; SLM-LABEL: test_movhps:
1690 ; SLM-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [4:1.00]
1691 ; SLM-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00]
1692 ; SLM-NEXT: pextrq $1, %xmm1, (%rdi) # sched: [4:2.00]
1693 ; SLM-NEXT: retq # sched: [4:1.00]
1695 ; SANDY-LABEL: test_movhps:
1697 ; SANDY-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
1698 ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1699 ; SANDY-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [5:1.00]
1700 ; SANDY-NEXT: retq # sched: [1:1.00]
1702 ; HASWELL-LABEL: test_movhps:
1704 ; HASWELL-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [1:1.00]
1705 ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1706 ; HASWELL-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [1:1.00]
1707 ; HASWELL-NEXT: retq # sched: [2:1.00]
1709 ; BROADWELL-LABEL: test_movhps:
1710 ; BROADWELL: # BB#0:
1711 ; BROADWELL-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
1712 ; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1713 ; BROADWELL-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [2:1.00]
1714 ; BROADWELL-NEXT: retq # sched: [7:1.00]
1716 ; SKYLAKE-LABEL: test_movhps:
1718 ; SKYLAKE-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
1719 ; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
1720 ; SKYLAKE-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [2:1.00]
1721 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
1723 ; SKX-LABEL: test_movhps:
1725 ; SKX-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
1726 ; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
1727 ; SKX-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [2:1.00]
1728 ; SKX-NEXT: retq # sched: [7:1.00]
1730 ; BTVER2-LABEL: test_movhps:
1732 ; BTVER2-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
1733 ; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1734 ; BTVER2-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [6:1.00]
1735 ; BTVER2-NEXT: retq # sched: [4:1.00]
1737 ; ZNVER1-LABEL: test_movhps:
1739 ; ZNVER1-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [8:0.50]
1740 ; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1741 ; ZNVER1-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [8:1.00]
1742 ; ZNVER1-NEXT: retq # sched: [1:0.50]
1743 %1 = bitcast x86_mmx* %a2 to <2 x float>*
1744 %2 = load <2 x float>, <2 x float> *%1, align 8
1745 %3 = shufflevector <2 x float> %2, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1746 %4 = shufflevector <4 x float> %a1, <4 x float> %3, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
1747 %5 = fadd <4 x float> %a0, %4
1748 %6 = shufflevector <4 x float> %5, <4 x float> undef, <2 x i32> <i32 2, i32 3>
1749 store <2 x float> %6, <2 x float>* %1
1755 define <4 x float> @test_movlhps(<4 x float> %a0, <4 x float> %a1) {
1756 ; GENERIC-LABEL: test_movlhps:
1758 ; GENERIC-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
1759 ; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
1760 ; GENERIC-NEXT: retq # sched: [1:1.00]
1762 ; ATOM-LABEL: test_movlhps:
1764 ; ATOM-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
1765 ; ATOM-NEXT: addps %xmm1, %xmm0 # sched: [5:5.00]
1766 ; ATOM-NEXT: retq # sched: [79:39.50]
1768 ; SLM-LABEL: test_movlhps:
1770 ; SLM-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
1771 ; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
1772 ; SLM-NEXT: retq # sched: [4:1.00]
1774 ; SANDY-LABEL: test_movlhps:
1776 ; SANDY-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
1777 ; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
1778 ; SANDY-NEXT: retq # sched: [1:1.00]
1780 ; HASWELL-LABEL: test_movlhps:
1782 ; HASWELL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
1783 ; HASWELL-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
1784 ; HASWELL-NEXT: retq # sched: [2:1.00]
1786 ; BROADWELL-LABEL: test_movlhps:
1787 ; BROADWELL: # BB#0:
1788 ; BROADWELL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
1789 ; BROADWELL-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
1790 ; BROADWELL-NEXT: retq # sched: [7:1.00]
1792 ; SKYLAKE-LABEL: test_movlhps:
1794 ; SKYLAKE-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
1795 ; SKYLAKE-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
1796 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
1798 ; SKX-LABEL: test_movlhps:
1800 ; SKX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
1801 ; SKX-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [4:0.33]
1802 ; SKX-NEXT: retq # sched: [7:1.00]
1804 ; BTVER2-LABEL: test_movlhps:
1806 ; BTVER2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50]
1807 ; BTVER2-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
1808 ; BTVER2-NEXT: retq # sched: [4:1.00]
1810 ; ZNVER1-LABEL: test_movlhps:
1812 ; ZNVER1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50]
1813 ; ZNVER1-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
1814 ; ZNVER1-NEXT: retq # sched: [1:0.50]
1815 %1 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
1816 %2 = fadd <4 x float> %a1, %1
1820 define void @test_movlps(<4 x float> %a0, <4 x float> %a1, x86_mmx *%a2) {
1821 ; GENERIC-LABEL: test_movlps:
1823 ; GENERIC-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00]
1824 ; GENERIC-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00]
1825 ; GENERIC-NEXT: movlps %xmm1, (%rdi) # sched: [5:1.00]
1826 ; GENERIC-NEXT: retq # sched: [1:1.00]
1828 ; ATOM-LABEL: test_movlps:
1830 ; ATOM-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [1:1.00]
1831 ; ATOM-NEXT: addps %xmm0, %xmm1 # sched: [5:5.00]
1832 ; ATOM-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00]
1833 ; ATOM-NEXT: retq # sched: [79:39.50]
1835 ; SLM-LABEL: test_movlps:
1837 ; SLM-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [4:1.00]
1838 ; SLM-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00]
1839 ; SLM-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00]
1840 ; SLM-NEXT: retq # sched: [4:1.00]
1842 ; SANDY-LABEL: test_movlps:
1844 ; SANDY-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00]
1845 ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1846 ; SANDY-NEXT: vmovlps %xmm0, (%rdi) # sched: [5:1.00]
1847 ; SANDY-NEXT: retq # sched: [1:1.00]
1849 ; HASWELL-LABEL: test_movlps:
1851 ; HASWELL-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [1:1.00]
1852 ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1853 ; HASWELL-NEXT: vmovlps %xmm0, (%rdi) # sched: [1:1.00]
1854 ; HASWELL-NEXT: retq # sched: [2:1.00]
1856 ; BROADWELL-LABEL: test_movlps:
1857 ; BROADWELL: # BB#0:
1858 ; BROADWELL-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
1859 ; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1860 ; BROADWELL-NEXT: vmovlps %xmm0, (%rdi) # sched: [1:1.00]
1861 ; BROADWELL-NEXT: retq # sched: [7:1.00]
1863 ; SKYLAKE-LABEL: test_movlps:
1865 ; SKYLAKE-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
1866 ; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
1867 ; SKYLAKE-NEXT: vmovlps %xmm0, (%rdi) # sched: [1:1.00]
1868 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
1870 ; SKX-LABEL: test_movlps:
1872 ; SKX-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
1873 ; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
1874 ; SKX-NEXT: vmovlps %xmm0, (%rdi) # sched: [1:1.00]
1875 ; SKX-NEXT: retq # sched: [7:1.00]
1877 ; BTVER2-LABEL: test_movlps:
1879 ; BTVER2-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
1880 ; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1881 ; BTVER2-NEXT: vmovlps %xmm0, (%rdi) # sched: [1:1.00]
1882 ; BTVER2-NEXT: retq # sched: [4:1.00]
1884 ; ZNVER1-LABEL: test_movlps:
1886 ; ZNVER1-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [8:0.50]
1887 ; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
1888 ; ZNVER1-NEXT: vmovlps %xmm0, (%rdi) # sched: [1:0.50]
1889 ; ZNVER1-NEXT: retq # sched: [1:0.50]
1890 %1 = bitcast x86_mmx* %a2 to <2 x float>*
1891 %2 = load <2 x float>, <2 x float> *%1, align 8
1892 %3 = shufflevector <2 x float> %2, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1893 %4 = shufflevector <4 x float> %a1, <4 x float> %3, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
1894 %5 = fadd <4 x float> %a0, %4
1895 %6 = shufflevector <4 x float> %5, <4 x float> undef, <2 x i32> <i32 0, i32 1>
1896 store <2 x float> %6, <2 x float>* %1
1900 define i32 @test_movmskps(<4 x float> %a0) {
1901 ; GENERIC-LABEL: test_movmskps:
1903 ; GENERIC-NEXT: movmskps %xmm0, %eax # sched: [2:1.00]
1904 ; GENERIC-NEXT: retq # sched: [1:1.00]
1906 ; ATOM-LABEL: test_movmskps:
1908 ; ATOM-NEXT: movmskps %xmm0, %eax # sched: [3:3.00]
1909 ; ATOM-NEXT: nop # sched: [1:0.50]
1910 ; ATOM-NEXT: nop # sched: [1:0.50]
1911 ; ATOM-NEXT: retq # sched: [79:39.50]
1913 ; SLM-LABEL: test_movmskps:
1915 ; SLM-NEXT: movmskps %xmm0, %eax # sched: [1:0.50]
1916 ; SLM-NEXT: retq # sched: [4:1.00]
1918 ; SANDY-LABEL: test_movmskps:
1920 ; SANDY-NEXT: vmovmskps %xmm0, %eax # sched: [2:1.00]
1921 ; SANDY-NEXT: retq # sched: [1:1.00]
1923 ; HASWELL-LABEL: test_movmskps:
1925 ; HASWELL-NEXT: vmovmskps %xmm0, %eax # sched: [3:1.00]
1926 ; HASWELL-NEXT: retq # sched: [2:1.00]
1928 ; BROADWELL-LABEL: test_movmskps:
1929 ; BROADWELL: # BB#0:
1930 ; BROADWELL-NEXT: vmovmskps %xmm0, %eax # sched: [3:1.00]
1931 ; BROADWELL-NEXT: retq # sched: [7:1.00]
1933 ; SKYLAKE-LABEL: test_movmskps:
1935 ; SKYLAKE-NEXT: vmovmskps %xmm0, %eax # sched: [2:1.00]
1936 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
1938 ; SKX-LABEL: test_movmskps:
1940 ; SKX-NEXT: vmovmskps %xmm0, %eax # sched: [2:1.00]
1941 ; SKX-NEXT: retq # sched: [7:1.00]
1943 ; BTVER2-LABEL: test_movmskps:
1945 ; BTVER2-NEXT: vmovmskps %xmm0, %eax # sched: [3:1.00]
1946 ; BTVER2-NEXT: retq # sched: [4:1.00]
1948 ; ZNVER1-LABEL: test_movmskps:
1950 ; ZNVER1-NEXT: vmovmskps %xmm0, %eax # sched: [1:1.00]
1951 ; ZNVER1-NEXT: retq # sched: [1:0.50]
1952 %1 = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %a0)
1955 declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>) nounwind readnone
1957 define void @test_movntps(<4 x float> %a0, <4 x float> *%a1) {
1958 ; GENERIC-LABEL: test_movntps:
1960 ; GENERIC-NEXT: movntps %xmm0, (%rdi) # sched: [5:1.00]
1961 ; GENERIC-NEXT: retq # sched: [1:1.00]
1963 ; ATOM-LABEL: test_movntps:
1965 ; ATOM-NEXT: movntps %xmm0, (%rdi) # sched: [1:1.00]
1966 ; ATOM-NEXT: nop # sched: [1:0.50]
1967 ; ATOM-NEXT: nop # sched: [1:0.50]
1968 ; ATOM-NEXT: nop # sched: [1:0.50]
1969 ; ATOM-NEXT: nop # sched: [1:0.50]
1970 ; ATOM-NEXT: nop # sched: [1:0.50]
1971 ; ATOM-NEXT: nop # sched: [1:0.50]
1972 ; ATOM-NEXT: retq # sched: [79:39.50]
1974 ; SLM-LABEL: test_movntps:
1976 ; SLM-NEXT: movntps %xmm0, (%rdi) # sched: [1:1.00]
1977 ; SLM-NEXT: retq # sched: [4:1.00]
1979 ; SANDY-LABEL: test_movntps:
1981 ; SANDY-NEXT: vmovntps %xmm0, (%rdi) # sched: [5:1.00]
1982 ; SANDY-NEXT: retq # sched: [1:1.00]
1984 ; HASWELL-LABEL: test_movntps:
1986 ; HASWELL-NEXT: vmovntps %xmm0, (%rdi) # sched: [1:1.00]
1987 ; HASWELL-NEXT: retq # sched: [2:1.00]
1989 ; BROADWELL-LABEL: test_movntps:
1990 ; BROADWELL: # BB#0:
1991 ; BROADWELL-NEXT: vmovntps %xmm0, (%rdi) # sched: [1:1.00]
1992 ; BROADWELL-NEXT: retq # sched: [7:1.00]
1994 ; SKYLAKE-LABEL: test_movntps:
1996 ; SKYLAKE-NEXT: vmovntps %xmm0, (%rdi) # sched: [1:1.00]
1997 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
1999 ; SKX-LABEL: test_movntps:
2001 ; SKX-NEXT: vmovntps %xmm0, (%rdi) # sched: [1:1.00]
2002 ; SKX-NEXT: retq # sched: [7:1.00]
2004 ; BTVER2-LABEL: test_movntps:
2006 ; BTVER2-NEXT: vmovntps %xmm0, (%rdi) # sched: [1:1.00]
2007 ; BTVER2-NEXT: retq # sched: [4:1.00]
2009 ; ZNVER1-LABEL: test_movntps:
2011 ; ZNVER1-NEXT: vmovntps %xmm0, (%rdi) # sched: [1:0.50]
2012 ; ZNVER1-NEXT: retq # sched: [1:0.50]
2013 store <4 x float> %a0, <4 x float> *%a1, align 16, !nontemporal !0
2017 define void @test_movss_mem(float* %a0, float* %a1) {
2018 ; GENERIC-LABEL: test_movss_mem:
2020 ; GENERIC-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50]
2021 ; GENERIC-NEXT: addss %xmm0, %xmm0 # sched: [3:1.00]
2022 ; GENERIC-NEXT: movss %xmm0, (%rsi) # sched: [5:1.00]
2023 ; GENERIC-NEXT: retq # sched: [1:1.00]
2025 ; ATOM-LABEL: test_movss_mem:
2027 ; ATOM-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [1:1.00]
2028 ; ATOM-NEXT: addss %xmm0, %xmm0 # sched: [5:5.00]
2029 ; ATOM-NEXT: movss %xmm0, (%rsi) # sched: [1:1.00]
2030 ; ATOM-NEXT: retq # sched: [79:39.50]
2032 ; SLM-LABEL: test_movss_mem:
2034 ; SLM-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [3:1.00]
2035 ; SLM-NEXT: addss %xmm0, %xmm0 # sched: [3:1.00]
2036 ; SLM-NEXT: movss %xmm0, (%rsi) # sched: [1:1.00]
2037 ; SLM-NEXT: retq # sched: [4:1.00]
2039 ; SANDY-LABEL: test_movss_mem:
2041 ; SANDY-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50]
2042 ; SANDY-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
2043 ; SANDY-NEXT: vmovss %xmm0, (%rsi) # sched: [5:1.00]
2044 ; SANDY-NEXT: retq # sched: [1:1.00]
2046 ; HASWELL-LABEL: test_movss_mem:
2048 ; HASWELL-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [1:0.50]
2049 ; HASWELL-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
2050 ; HASWELL-NEXT: vmovss %xmm0, (%rsi) # sched: [1:1.00]
2051 ; HASWELL-NEXT: retq # sched: [2:1.00]
2053 ; BROADWELL-LABEL: test_movss_mem:
2054 ; BROADWELL: # BB#0:
2055 ; BROADWELL-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
2056 ; BROADWELL-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
2057 ; BROADWELL-NEXT: vmovss %xmm0, (%rsi) # sched: [1:1.00]
2058 ; BROADWELL-NEXT: retq # sched: [7:1.00]
2060 ; SKYLAKE-LABEL: test_movss_mem:
2062 ; SKYLAKE-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
2063 ; SKYLAKE-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [4:0.50]
2064 ; SKYLAKE-NEXT: vmovss %xmm0, (%rsi) # sched: [1:1.00]
2065 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
2067 ; SKX-LABEL: test_movss_mem:
2069 ; SKX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
2070 ; SKX-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [4:0.33]
2071 ; SKX-NEXT: vmovss %xmm0, (%rsi) # sched: [1:1.00]
2072 ; SKX-NEXT: retq # sched: [7:1.00]
2074 ; BTVER2-LABEL: test_movss_mem:
2076 ; BTVER2-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:1.00]
2077 ; BTVER2-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
2078 ; BTVER2-NEXT: vmovss %xmm0, (%rsi) # sched: [1:1.00]
2079 ; BTVER2-NEXT: retq # sched: [4:1.00]
2081 ; ZNVER1-LABEL: test_movss_mem:
2083 ; ZNVER1-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [8:0.50]
2084 ; ZNVER1-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
2085 ; ZNVER1-NEXT: vmovss %xmm0, (%rsi) # sched: [1:0.50]
2086 ; ZNVER1-NEXT: retq # sched: [1:0.50]
2087 %1 = load float, float* %a0, align 1
2088 %2 = fadd float %1, %1
2089 store float %2, float *%a1, align 1
2093 define <4 x float> @test_movss_reg(<4 x float> %a0, <4 x float> %a1) {
2094 ; GENERIC-LABEL: test_movss_reg:
2096 ; GENERIC-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:1.00]
2097 ; GENERIC-NEXT: retq # sched: [1:1.00]
2099 ; ATOM-LABEL: test_movss_reg:
2101 ; ATOM-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.50]
2102 ; ATOM-NEXT: nop # sched: [1:0.50]
2103 ; ATOM-NEXT: nop # sched: [1:0.50]
2104 ; ATOM-NEXT: nop # sched: [1:0.50]
2105 ; ATOM-NEXT: nop # sched: [1:0.50]
2106 ; ATOM-NEXT: nop # sched: [1:0.50]
2107 ; ATOM-NEXT: nop # sched: [1:0.50]
2108 ; ATOM-NEXT: retq # sched: [79:39.50]
2110 ; SLM-LABEL: test_movss_reg:
2112 ; SLM-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:1.00]
2113 ; SLM-NEXT: retq # sched: [4:1.00]
2115 ; SANDY-LABEL: test_movss_reg:
2117 ; SANDY-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.50]
2118 ; SANDY-NEXT: retq # sched: [1:1.00]
2120 ; HASWELL-LABEL: test_movss_reg:
2122 ; HASWELL-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.33]
2123 ; HASWELL-NEXT: retq # sched: [2:1.00]
2125 ; BROADWELL-LABEL: test_movss_reg:
2126 ; BROADWELL: # BB#0:
2127 ; BROADWELL-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.33]
2128 ; BROADWELL-NEXT: retq # sched: [7:1.00]
2130 ; SKYLAKE-LABEL: test_movss_reg:
2132 ; SKYLAKE-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.33]
2133 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
2135 ; SKX-LABEL: test_movss_reg:
2137 ; SKX-NEXT: vmovss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:1.00]
2138 ; SKX-NEXT: retq # sched: [7:1.00]
2140 ; BTVER2-LABEL: test_movss_reg:
2142 ; BTVER2-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.50]
2143 ; BTVER2-NEXT: retq # sched: [4:1.00]
2145 ; ZNVER1-LABEL: test_movss_reg:
2147 ; ZNVER1-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.50]
2148 ; ZNVER1-NEXT: retq # sched: [1:0.50]
2149 %1 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
2153 define void @test_movups(<4 x float> *%a0, <4 x float> *%a1) {
2154 ; GENERIC-LABEL: test_movups:
2156 ; GENERIC-NEXT: movups (%rdi), %xmm0 # sched: [6:0.50]
2157 ; GENERIC-NEXT: addps %xmm0, %xmm0 # sched: [3:1.00]
2158 ; GENERIC-NEXT: movups %xmm0, (%rsi) # sched: [5:1.00]
2159 ; GENERIC-NEXT: retq # sched: [1:1.00]
2161 ; ATOM-LABEL: test_movups:
2163 ; ATOM-NEXT: movups (%rdi), %xmm0 # sched: [3:1.50]
2164 ; ATOM-NEXT: addps %xmm0, %xmm0 # sched: [5:5.00]
2165 ; ATOM-NEXT: movups %xmm0, (%rsi) # sched: [2:1.00]
2166 ; ATOM-NEXT: retq # sched: [79:39.50]
2168 ; SLM-LABEL: test_movups:
2170 ; SLM-NEXT: movups (%rdi), %xmm0 # sched: [3:1.00]
2171 ; SLM-NEXT: addps %xmm0, %xmm0 # sched: [3:1.00]
2172 ; SLM-NEXT: movups %xmm0, (%rsi) # sched: [1:1.00]
2173 ; SLM-NEXT: retq # sched: [4:1.00]
2175 ; SANDY-LABEL: test_movups:
2177 ; SANDY-NEXT: vmovups (%rdi), %xmm0 # sched: [6:0.50]
2178 ; SANDY-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
2179 ; SANDY-NEXT: vmovups %xmm0, (%rsi) # sched: [5:1.00]
2180 ; SANDY-NEXT: retq # sched: [1:1.00]
2182 ; HASWELL-LABEL: test_movups:
2184 ; HASWELL-NEXT: vmovups (%rdi), %xmm0 # sched: [1:0.50]
2185 ; HASWELL-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
2186 ; HASWELL-NEXT: vmovups %xmm0, (%rsi) # sched: [1:1.00]
2187 ; HASWELL-NEXT: retq # sched: [2:1.00]
2189 ; BROADWELL-LABEL: test_movups:
2190 ; BROADWELL: # BB#0:
2191 ; BROADWELL-NEXT: vmovups (%rdi), %xmm0 # sched: [5:0.50]
2192 ; BROADWELL-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
2193 ; BROADWELL-NEXT: vmovups %xmm0, (%rsi) # sched: [1:1.00]
2194 ; BROADWELL-NEXT: retq # sched: [7:1.00]
2196 ; SKYLAKE-LABEL: test_movups:
2198 ; SKYLAKE-NEXT: vmovups (%rdi), %xmm0 # sched: [6:0.50]
2199 ; SKYLAKE-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [4:0.50]
2200 ; SKYLAKE-NEXT: vmovups %xmm0, (%rsi) # sched: [1:1.00]
2201 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
2203 ; SKX-LABEL: test_movups:
2205 ; SKX-NEXT: vmovups (%rdi), %xmm0 # sched: [6:0.50]
2206 ; SKX-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [4:0.33]
2207 ; SKX-NEXT: vmovups %xmm0, (%rsi) # sched: [1:1.00]
2208 ; SKX-NEXT: retq # sched: [7:1.00]
2210 ; BTVER2-LABEL: test_movups:
2212 ; BTVER2-NEXT: vmovups (%rdi), %xmm0 # sched: [5:1.00]
2213 ; BTVER2-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
2214 ; BTVER2-NEXT: vmovups %xmm0, (%rsi) # sched: [1:1.00]
2215 ; BTVER2-NEXT: retq # sched: [4:1.00]
2217 ; ZNVER1-LABEL: test_movups:
2219 ; ZNVER1-NEXT: vmovups (%rdi), %xmm0 # sched: [8:0.50]
2220 ; ZNVER1-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
2221 ; ZNVER1-NEXT: vmovups %xmm0, (%rsi) # sched: [1:0.50]
2222 ; ZNVER1-NEXT: retq # sched: [1:0.50]
2223 %1 = load <4 x float>, <4 x float> *%a0, align 1
2224 %2 = fadd <4 x float> %1, %1
2225 store <4 x float> %2, <4 x float> *%a1, align 1
2229 define <4 x float> @test_mulps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
2230 ; GENERIC-LABEL: test_mulps:
2232 ; GENERIC-NEXT: mulps %xmm1, %xmm0 # sched: [5:1.00]
2233 ; GENERIC-NEXT: mulps (%rdi), %xmm0 # sched: [11:1.00]
2234 ; GENERIC-NEXT: retq # sched: [1:1.00]
2236 ; ATOM-LABEL: test_mulps:
2238 ; ATOM-NEXT: mulps %xmm1, %xmm0 # sched: [5:5.00]
2239 ; ATOM-NEXT: mulps (%rdi), %xmm0 # sched: [10:5.00]
2240 ; ATOM-NEXT: retq # sched: [79:39.50]
2242 ; SLM-LABEL: test_mulps:
2244 ; SLM-NEXT: mulps %xmm1, %xmm0 # sched: [5:2.00]
2245 ; SLM-NEXT: mulps (%rdi), %xmm0 # sched: [8:2.00]
2246 ; SLM-NEXT: retq # sched: [4:1.00]
2248 ; SANDY-LABEL: test_mulps:
2250 ; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
2251 ; SANDY-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
2252 ; SANDY-NEXT: retq # sched: [1:1.00]
2254 ; HASWELL-LABEL: test_mulps:
2256 ; HASWELL-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
2257 ; HASWELL-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
2258 ; HASWELL-NEXT: retq # sched: [2:1.00]
2260 ; BROADWELL-LABEL: test_mulps:
2261 ; BROADWELL: # BB#0:
2262 ; BROADWELL-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [3:0.50]
2263 ; BROADWELL-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
2264 ; BROADWELL-NEXT: retq # sched: [7:1.00]
2266 ; SKYLAKE-LABEL: test_mulps:
2268 ; SKYLAKE-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
2269 ; SKYLAKE-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
2270 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
2272 ; SKX-LABEL: test_mulps:
2274 ; SKX-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
2275 ; SKX-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
2276 ; SKX-NEXT: retq # sched: [7:1.00]
2278 ; BTVER2-LABEL: test_mulps:
2280 ; BTVER2-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
2281 ; BTVER2-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
2282 ; BTVER2-NEXT: retq # sched: [4:1.00]
2284 ; ZNVER1-LABEL: test_mulps:
2286 ; ZNVER1-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [3:0.50]
2287 ; ZNVER1-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
2288 ; ZNVER1-NEXT: retq # sched: [1:0.50]
2289 %1 = fmul <4 x float> %a0, %a1
2290 %2 = load <4 x float>, <4 x float> *%a2, align 16
2291 %3 = fmul <4 x float> %1, %2
2295 define float @test_mulss(float %a0, float %a1, float *%a2) {
2296 ; GENERIC-LABEL: test_mulss:
2298 ; GENERIC-NEXT: mulss %xmm1, %xmm0 # sched: [5:1.00]
2299 ; GENERIC-NEXT: mulss (%rdi), %xmm0 # sched: [11:1.00]
2300 ; GENERIC-NEXT: retq # sched: [1:1.00]
2302 ; ATOM-LABEL: test_mulss:
2304 ; ATOM-NEXT: mulss %xmm1, %xmm0 # sched: [4:4.00]
2305 ; ATOM-NEXT: mulss (%rdi), %xmm0 # sched: [5:5.00]
2306 ; ATOM-NEXT: retq # sched: [79:39.50]
2308 ; SLM-LABEL: test_mulss:
2310 ; SLM-NEXT: mulss %xmm1, %xmm0 # sched: [5:2.00]
2311 ; SLM-NEXT: mulss (%rdi), %xmm0 # sched: [8:2.00]
2312 ; SLM-NEXT: retq # sched: [4:1.00]
2314 ; SANDY-LABEL: test_mulss:
2316 ; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
2317 ; SANDY-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
2318 ; SANDY-NEXT: retq # sched: [1:1.00]
2320 ; HASWELL-LABEL: test_mulss:
2322 ; HASWELL-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
2323 ; HASWELL-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [5:0.50]
2324 ; HASWELL-NEXT: retq # sched: [2:1.00]
2326 ; BROADWELL-LABEL: test_mulss:
2327 ; BROADWELL: # BB#0:
2328 ; BROADWELL-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [3:0.50]
2329 ; BROADWELL-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
2330 ; BROADWELL-NEXT: retq # sched: [7:1.00]
2332 ; SKYLAKE-LABEL: test_mulss:
2334 ; SKYLAKE-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
2335 ; SKYLAKE-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
2336 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
2338 ; SKX-LABEL: test_mulss:
2340 ; SKX-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
2341 ; SKX-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
2342 ; SKX-NEXT: retq # sched: [7:1.00]
2344 ; BTVER2-LABEL: test_mulss:
2346 ; BTVER2-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
2347 ; BTVER2-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
2348 ; BTVER2-NEXT: retq # sched: [4:1.00]
2350 ; ZNVER1-LABEL: test_mulss:
2352 ; ZNVER1-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [3:0.50]
2353 ; ZNVER1-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
2354 ; ZNVER1-NEXT: retq # sched: [1:0.50]
2355 %1 = fmul float %a0, %a1
2356 %2 = load float, float *%a2, align 4
2357 %3 = fmul float %1, %2
2361 define <4 x float> @test_orps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
2362 ; GENERIC-LABEL: test_orps:
2364 ; GENERIC-NEXT: orps %xmm1, %xmm0 # sched: [1:1.00]
2365 ; GENERIC-NEXT: orps (%rdi), %xmm0 # sched: [7:1.00]
2366 ; GENERIC-NEXT: retq # sched: [1:1.00]
2368 ; ATOM-LABEL: test_orps:
2370 ; ATOM-NEXT: orps %xmm1, %xmm0 # sched: [1:0.50]
2371 ; ATOM-NEXT: orps (%rdi), %xmm0 # sched: [1:1.00]
2372 ; ATOM-NEXT: nop # sched: [1:0.50]
2373 ; ATOM-NEXT: nop # sched: [1:0.50]
2374 ; ATOM-NEXT: nop # sched: [1:0.50]
2375 ; ATOM-NEXT: nop # sched: [1:0.50]
2376 ; ATOM-NEXT: retq # sched: [79:39.50]
2378 ; SLM-LABEL: test_orps:
2380 ; SLM-NEXT: orps %xmm1, %xmm0 # sched: [1:0.50]
2381 ; SLM-NEXT: orps (%rdi), %xmm0 # sched: [4:1.00]
2382 ; SLM-NEXT: retq # sched: [4:1.00]
2384 ; SANDY-LABEL: test_orps:
2386 ; SANDY-NEXT: vorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
2387 ; SANDY-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
2388 ; SANDY-NEXT: retq # sched: [1:1.00]
2390 ; HASWELL-LABEL: test_orps:
2392 ; HASWELL-NEXT: vorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
2393 ; HASWELL-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [1:1.00]
2394 ; HASWELL-NEXT: retq # sched: [2:1.00]
2396 ; BROADWELL-LABEL: test_orps:
2397 ; BROADWELL: # BB#0:
2398 ; BROADWELL-NEXT: vorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
2399 ; BROADWELL-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
2400 ; BROADWELL-NEXT: retq # sched: [7:1.00]
2402 ; SKYLAKE-LABEL: test_orps:
2404 ; SKYLAKE-NEXT: vorps %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
2405 ; SKYLAKE-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
2406 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
2408 ; SKX-LABEL: test_orps:
2410 ; SKX-NEXT: vorps %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
2411 ; SKX-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
2412 ; SKX-NEXT: retq # sched: [7:1.00]
2414 ; BTVER2-LABEL: test_orps:
2416 ; BTVER2-NEXT: vorps %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
2417 ; BTVER2-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
2418 ; BTVER2-NEXT: retq # sched: [4:1.00]
2420 ; ZNVER1-LABEL: test_orps:
2422 ; ZNVER1-NEXT: vorps %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
2423 ; ZNVER1-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
2424 ; ZNVER1-NEXT: retq # sched: [1:0.50]
2425 %1 = bitcast <4 x float> %a0 to <4 x i32>
2426 %2 = bitcast <4 x float> %a1 to <4 x i32>
2427 %3 = or <4 x i32> %1, %2
2428 %4 = load <4 x float>, <4 x float> *%a2, align 16
2429 %5 = bitcast <4 x float> %4 to <4 x i32>
2430 %6 = or <4 x i32> %3, %5
2431 %7 = bitcast <4 x i32> %6 to <4 x float>
2435 define void @test_prefetchnta(i8* %a0) {
2436 ; GENERIC-LABEL: test_prefetchnta:
2438 ; GENERIC-NEXT: prefetchnta (%rdi) # sched: [5:0.50]
2439 ; GENERIC-NEXT: retq # sched: [1:1.00]
2441 ; ATOM-LABEL: test_prefetchnta:
2443 ; ATOM-NEXT: prefetchnta (%rdi) # sched: [1:1.00]
2444 ; ATOM-NEXT: nop # sched: [1:0.50]
2445 ; ATOM-NEXT: nop # sched: [1:0.50]
2446 ; ATOM-NEXT: nop # sched: [1:0.50]
2447 ; ATOM-NEXT: nop # sched: [1:0.50]
2448 ; ATOM-NEXT: nop # sched: [1:0.50]
2449 ; ATOM-NEXT: nop # sched: [1:0.50]
2450 ; ATOM-NEXT: retq # sched: [79:39.50]
2452 ; SLM-LABEL: test_prefetchnta:
2454 ; SLM-NEXT: prefetchnta (%rdi) # sched: [3:1.00]
2455 ; SLM-NEXT: retq # sched: [4:1.00]
2457 ; SANDY-LABEL: test_prefetchnta:
2459 ; SANDY-NEXT: prefetchnta (%rdi) # sched: [5:0.50]
2460 ; SANDY-NEXT: retq # sched: [1:1.00]
2462 ; HASWELL-LABEL: test_prefetchnta:
2464 ; HASWELL-NEXT: prefetchnta (%rdi) # sched: [1:0.50]
2465 ; HASWELL-NEXT: retq # sched: [2:1.00]
2467 ; BROADWELL-LABEL: test_prefetchnta:
2468 ; BROADWELL: # BB#0:
2469 ; BROADWELL-NEXT: prefetchnta (%rdi) # sched: [5:0.50]
2470 ; BROADWELL-NEXT: retq # sched: [7:1.00]
2472 ; SKYLAKE-LABEL: test_prefetchnta:
2474 ; SKYLAKE-NEXT: prefetchnta (%rdi) # sched: [5:0.50]
2475 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
2477 ; SKX-LABEL: test_prefetchnta:
2479 ; SKX-NEXT: prefetchnta (%rdi) # sched: [5:0.50]
2480 ; SKX-NEXT: retq # sched: [7:1.00]
2482 ; BTVER2-LABEL: test_prefetchnta:
2484 ; BTVER2-NEXT: prefetchnta (%rdi) # sched: [5:1.00]
2485 ; BTVER2-NEXT: retq # sched: [4:1.00]
2487 ; ZNVER1-LABEL: test_prefetchnta:
2489 ; ZNVER1-NEXT: prefetchnta (%rdi) # sched: [8:0.50]
2490 ; ZNVER1-NEXT: retq # sched: [1:0.50]
2491 call void @llvm.prefetch(i8* %a0, i32 0, i32 0, i32 1)
2494 declare void @llvm.prefetch(i8* nocapture, i32, i32, i32) nounwind readnone
2496 define <4 x float> @test_rcpps(<4 x float> %a0, <4 x float> *%a1) {
2497 ; GENERIC-LABEL: test_rcpps:
2499 ; GENERIC-NEXT: rcpps %xmm0, %xmm1 # sched: [5:1.00]
2500 ; GENERIC-NEXT: rcpps (%rdi), %xmm0 # sched: [11:1.00]
2501 ; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
2502 ; GENERIC-NEXT: retq # sched: [1:1.00]
2504 ; ATOM-LABEL: test_rcpps:
2506 ; ATOM-NEXT: rcpps (%rdi), %xmm1 # sched: [10:5.00]
2507 ; ATOM-NEXT: rcpps %xmm0, %xmm0 # sched: [9:4.50]
2508 ; ATOM-NEXT: addps %xmm0, %xmm1 # sched: [5:5.00]
2509 ; ATOM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50]
2510 ; ATOM-NEXT: retq # sched: [79:39.50]
2512 ; SLM-LABEL: test_rcpps:
2514 ; SLM-NEXT: rcpps (%rdi), %xmm1 # sched: [8:1.00]
2515 ; SLM-NEXT: rcpps %xmm0, %xmm0 # sched: [5:1.00]
2516 ; SLM-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00]
2517 ; SLM-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00]
2518 ; SLM-NEXT: retq # sched: [4:1.00]
2520 ; SANDY-LABEL: test_rcpps:
2522 ; SANDY-NEXT: vrcpps %xmm0, %xmm0 # sched: [5:1.00]
2523 ; SANDY-NEXT: vrcpps (%rdi), %xmm1 # sched: [11:1.00]
2524 ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
2525 ; SANDY-NEXT: retq # sched: [1:1.00]
2527 ; HASWELL-LABEL: test_rcpps:
2529 ; HASWELL-NEXT: vrcpps %xmm0, %xmm0 # sched: [5:1.00]
2530 ; HASWELL-NEXT: vrcpps (%rdi), %xmm1 # sched: [5:1.00]
2531 ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
2532 ; HASWELL-NEXT: retq # sched: [2:1.00]
2534 ; BROADWELL-LABEL: test_rcpps:
2535 ; BROADWELL: # BB#0:
2536 ; BROADWELL-NEXT: vrcpps %xmm0, %xmm0 # sched: [5:1.00]
2537 ; BROADWELL-NEXT: vrcpps (%rdi), %xmm1 # sched: [10:1.00]
2538 ; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
2539 ; BROADWELL-NEXT: retq # sched: [7:1.00]
2541 ; SKYLAKE-LABEL: test_rcpps:
2543 ; SKYLAKE-NEXT: vrcpps %xmm0, %xmm0 # sched: [4:1.00]
2544 ; SKYLAKE-NEXT: vrcpps (%rdi), %xmm1 # sched: [10:1.00]
2545 ; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
2546 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
2548 ; SKX-LABEL: test_rcpps:
2550 ; SKX-NEXT: vrcpps %xmm0, %xmm0 # sched: [4:1.00]
2551 ; SKX-NEXT: vrcpps (%rdi), %xmm1 # sched: [10:1.00]
2552 ; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
2553 ; SKX-NEXT: retq # sched: [7:1.00]
2555 ; BTVER2-LABEL: test_rcpps:
2557 ; BTVER2-NEXT: vrcpps (%rdi), %xmm1 # sched: [7:1.00]
2558 ; BTVER2-NEXT: vrcpps %xmm0, %xmm0 # sched: [2:1.00]
2559 ; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
2560 ; BTVER2-NEXT: retq # sched: [4:1.00]
2562 ; ZNVER1-LABEL: test_rcpps:
2564 ; ZNVER1-NEXT: vrcpps (%rdi), %xmm1 # sched: [12:0.50]
2565 ; ZNVER1-NEXT: vrcpps %xmm0, %xmm0 # sched: [5:0.50]
2566 ; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
2567 ; ZNVER1-NEXT: retq # sched: [1:0.50]
2568 %1 = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %a0)
2569 %2 = load <4 x float>, <4 x float> *%a1, align 16
2570 %3 = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %2)
2571 %4 = fadd <4 x float> %1, %3
2574 declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>) nounwind readnone
2578 define <4 x float> @test_rcpss(float %a0, float *%a1) {
2579 ; GENERIC-LABEL: test_rcpss:
2581 ; GENERIC-NEXT: rcpss %xmm0, %xmm0 # sched: [5:1.00]
2582 ; GENERIC-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50]
2583 ; GENERIC-NEXT: rcpss %xmm1, %xmm1 # sched: [5:1.00]
2584 ; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
2585 ; GENERIC-NEXT: retq # sched: [1:1.00]
2587 ; ATOM-LABEL: test_rcpss:
2589 ; ATOM-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [1:1.00]
2590 ; ATOM-NEXT: rcpss %xmm0, %xmm0
2591 ; ATOM-NEXT: rcpss %xmm1, %xmm1
2592 ; ATOM-NEXT: addps %xmm1, %xmm0 # sched: [5:5.00]
2593 ; ATOM-NEXT: retq # sched: [79:39.50]
2595 ; SLM-LABEL: test_rcpss:
2597 ; SLM-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [3:1.00]
2598 ; SLM-NEXT: rcpss %xmm0, %xmm0 # sched: [8:1.00]
2599 ; SLM-NEXT: rcpss %xmm1, %xmm1 # sched: [8:1.00]
2600 ; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
2601 ; SLM-NEXT: retq # sched: [4:1.00]
2603 ; SANDY-LABEL: test_rcpss:
2605 ; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
2606 ; SANDY-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50]
2607 ; SANDY-NEXT: vrcpss %xmm1, %xmm1, %xmm1 # sched: [5:1.00]
2608 ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
2609 ; SANDY-NEXT: retq # sched: [1:1.00]
2611 ; HASWELL-LABEL: test_rcpss:
2613 ; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
2614 ; HASWELL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [1:0.50]
2615 ; HASWELL-NEXT: vrcpss %xmm1, %xmm1, %xmm1 # sched: [5:1.00]
2616 ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
2617 ; HASWELL-NEXT: retq # sched: [2:1.00]
2619 ; BROADWELL-LABEL: test_rcpss:
2620 ; BROADWELL: # BB#0:
2621 ; BROADWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
2622 ; BROADWELL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
2623 ; BROADWELL-NEXT: vrcpss %xmm1, %xmm1, %xmm1 # sched: [5:1.00]
2624 ; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
2625 ; BROADWELL-NEXT: retq # sched: [7:1.00]
2627 ; SKYLAKE-LABEL: test_rcpss:
2629 ; SKYLAKE-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [4:1.00]
2630 ; SKYLAKE-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
2631 ; SKYLAKE-NEXT: vrcpss %xmm1, %xmm1, %xmm1 # sched: [4:1.00]
2632 ; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
2633 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
2635 ; SKX-LABEL: test_rcpss:
2637 ; SKX-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [4:1.00]
2638 ; SKX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
2639 ; SKX-NEXT: vrcpss %xmm1, %xmm1, %xmm1 # sched: [4:1.00]
2640 ; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
2641 ; SKX-NEXT: retq # sched: [7:1.00]
2643 ; BTVER2-LABEL: test_rcpss:
2645 ; BTVER2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:1.00]
2646 ; BTVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [7:1.00]
2647 ; BTVER2-NEXT: vrcpss %xmm1, %xmm1, %xmm1 # sched: [7:1.00]
2648 ; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
2649 ; BTVER2-NEXT: retq # sched: [4:1.00]
2651 ; ZNVER1-LABEL: test_rcpss:
2653 ; ZNVER1-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [8:0.50]
2654 ; ZNVER1-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [12:0.50]
2655 ; ZNVER1-NEXT: vrcpss %xmm1, %xmm1, %xmm1 # sched: [12:0.50]
2656 ; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
2657 ; ZNVER1-NEXT: retq # sched: [1:0.50]
2658 %1 = insertelement <4 x float> undef, float %a0, i32 0
2659 %2 = call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %1)
2660 %3 = load float, float *%a1, align 4
2661 %4 = insertelement <4 x float> undef, float %3, i32 0
2662 %5 = call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %4)
2663 %6 = fadd <4 x float> %2, %5
2666 declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) nounwind readnone
2668 define <4 x float> @test_rsqrtps(<4 x float> %a0, <4 x float> *%a1) {
2669 ; GENERIC-LABEL: test_rsqrtps:
2671 ; GENERIC-NEXT: rsqrtps %xmm0, %xmm1 # sched: [5:1.00]
2672 ; GENERIC-NEXT: rsqrtps (%rdi), %xmm0 # sched: [11:1.00]
2673 ; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
2674 ; GENERIC-NEXT: retq # sched: [1:1.00]
2676 ; ATOM-LABEL: test_rsqrtps:
2678 ; ATOM-NEXT: rsqrtps (%rdi), %xmm1 # sched: [10:5.00]
2679 ; ATOM-NEXT: rsqrtps %xmm0, %xmm0 # sched: [9:4.50]
2680 ; ATOM-NEXT: addps %xmm0, %xmm1 # sched: [5:5.00]
2681 ; ATOM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50]
2682 ; ATOM-NEXT: retq # sched: [79:39.50]
2684 ; SLM-LABEL: test_rsqrtps:
2686 ; SLM-NEXT: rsqrtps (%rdi), %xmm1 # sched: [8:1.00]
2687 ; SLM-NEXT: rsqrtps %xmm0, %xmm0 # sched: [5:1.00]
2688 ; SLM-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00]
2689 ; SLM-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00]
2690 ; SLM-NEXT: retq # sched: [4:1.00]
2692 ; SANDY-LABEL: test_rsqrtps:
2694 ; SANDY-NEXT: vrsqrtps %xmm0, %xmm0 # sched: [5:1.00]
2695 ; SANDY-NEXT: vrsqrtps (%rdi), %xmm1 # sched: [11:1.00]
2696 ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
2697 ; SANDY-NEXT: retq # sched: [1:1.00]
2699 ; HASWELL-LABEL: test_rsqrtps:
2701 ; HASWELL-NEXT: vrsqrtps %xmm0, %xmm0 # sched: [5:1.00]
2702 ; HASWELL-NEXT: vrsqrtps (%rdi), %xmm1 # sched: [5:1.00]
2703 ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
2704 ; HASWELL-NEXT: retq # sched: [2:1.00]
2706 ; BROADWELL-LABEL: test_rsqrtps:
2707 ; BROADWELL: # BB#0:
2708 ; BROADWELL-NEXT: vrsqrtps %xmm0, %xmm0 # sched: [5:1.00]
2709 ; BROADWELL-NEXT: vrsqrtps (%rdi), %xmm1 # sched: [10:1.00]
2710 ; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
2711 ; BROADWELL-NEXT: retq # sched: [7:1.00]
2713 ; SKYLAKE-LABEL: test_rsqrtps:
2715 ; SKYLAKE-NEXT: vrsqrtps %xmm0, %xmm0 # sched: [4:1.00]
2716 ; SKYLAKE-NEXT: vrsqrtps (%rdi), %xmm1 # sched: [10:1.00]
2717 ; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
2718 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
2720 ; SKX-LABEL: test_rsqrtps:
2722 ; SKX-NEXT: vrsqrtps %xmm0, %xmm0 # sched: [4:1.00]
2723 ; SKX-NEXT: vrsqrtps (%rdi), %xmm1 # sched: [10:1.00]
2724 ; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
2725 ; SKX-NEXT: retq # sched: [7:1.00]
2727 ; BTVER2-LABEL: test_rsqrtps:
2729 ; BTVER2-NEXT: vrsqrtps (%rdi), %xmm1 # sched: [7:1.00]
2730 ; BTVER2-NEXT: vrsqrtps %xmm0, %xmm0 # sched: [2:1.00]
2731 ; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
2732 ; BTVER2-NEXT: retq # sched: [4:1.00]
2734 ; ZNVER1-LABEL: test_rsqrtps:
2736 ; ZNVER1-NEXT: vrsqrtps (%rdi), %xmm1 # sched: [12:0.50]
2737 ; ZNVER1-NEXT: vrsqrtps %xmm0, %xmm0 # sched: [5:0.50]
2738 ; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
2739 ; ZNVER1-NEXT: retq # sched: [1:0.50]
2740 %1 = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %a0)
2741 %2 = load <4 x float>, <4 x float> *%a1, align 16
2742 %3 = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %2)
2743 %4 = fadd <4 x float> %1, %3
2746 declare <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float>) nounwind readnone
2750 define <4 x float> @test_rsqrtss(float %a0, float *%a1) {
2751 ; GENERIC-LABEL: test_rsqrtss:
2753 ; GENERIC-NEXT: rsqrtss %xmm0, %xmm0 # sched: [5:1.00]
2754 ; GENERIC-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50]
2755 ; GENERIC-NEXT: rsqrtss %xmm1, %xmm1 # sched: [5:1.00]
2756 ; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
2757 ; GENERIC-NEXT: retq # sched: [1:1.00]
2759 ; ATOM-LABEL: test_rsqrtss:
2761 ; ATOM-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [1:1.00]
2762 ; ATOM-NEXT: rsqrtss %xmm0, %xmm0
2763 ; ATOM-NEXT: rsqrtss %xmm1, %xmm1
2764 ; ATOM-NEXT: addps %xmm1, %xmm0 # sched: [5:5.00]
2765 ; ATOM-NEXT: retq # sched: [79:39.50]
2767 ; SLM-LABEL: test_rsqrtss:
2769 ; SLM-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [3:1.00]
2770 ; SLM-NEXT: rsqrtss %xmm0, %xmm0 # sched: [8:1.00]
2771 ; SLM-NEXT: rsqrtss %xmm1, %xmm1 # sched: [8:1.00]
2772 ; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
2773 ; SLM-NEXT: retq # sched: [4:1.00]
2775 ; SANDY-LABEL: test_rsqrtss:
2777 ; SANDY-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
2778 ; SANDY-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50]
2779 ; SANDY-NEXT: vrsqrtss %xmm1, %xmm1, %xmm1 # sched: [5:1.00]
2780 ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
2781 ; SANDY-NEXT: retq # sched: [1:1.00]
2783 ; HASWELL-LABEL: test_rsqrtss:
2785 ; HASWELL-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
2786 ; HASWELL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [1:0.50]
2787 ; HASWELL-NEXT: vrsqrtss %xmm1, %xmm1, %xmm1 # sched: [5:1.00]
2788 ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
2789 ; HASWELL-NEXT: retq # sched: [2:1.00]
2791 ; BROADWELL-LABEL: test_rsqrtss:
2792 ; BROADWELL: # BB#0:
2793 ; BROADWELL-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
2794 ; BROADWELL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
2795 ; BROADWELL-NEXT: vrsqrtss %xmm1, %xmm1, %xmm1 # sched: [5:1.00]
2796 ; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
2797 ; BROADWELL-NEXT: retq # sched: [7:1.00]
2799 ; SKYLAKE-LABEL: test_rsqrtss:
2801 ; SKYLAKE-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [4:1.00]
2802 ; SKYLAKE-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
2803 ; SKYLAKE-NEXT: vrsqrtss %xmm1, %xmm1, %xmm1 # sched: [4:1.00]
2804 ; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
2805 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
2807 ; SKX-LABEL: test_rsqrtss:
2809 ; SKX-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [4:1.00]
2810 ; SKX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
2811 ; SKX-NEXT: vrsqrtss %xmm1, %xmm1, %xmm1 # sched: [4:1.00]
2812 ; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
2813 ; SKX-NEXT: retq # sched: [7:1.00]
2815 ; BTVER2-LABEL: test_rsqrtss:
2817 ; BTVER2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:1.00]
2818 ; BTVER2-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [7:1.00]
2819 ; BTVER2-NEXT: vrsqrtss %xmm1, %xmm1, %xmm1 # sched: [7:1.00]
2820 ; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
2821 ; BTVER2-NEXT: retq # sched: [4:1.00]
2823 ; ZNVER1-LABEL: test_rsqrtss:
2825 ; ZNVER1-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [8:0.50]
2826 ; ZNVER1-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [5:0.50]
2827 ; ZNVER1-NEXT: vrsqrtss %xmm1, %xmm1, %xmm1 # sched: [5:0.50]
2828 ; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
2829 ; ZNVER1-NEXT: retq # sched: [1:0.50]
2830 %1 = insertelement <4 x float> undef, float %a0, i32 0
2831 %2 = call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %1)
2832 %3 = load float, float *%a1, align 4
2833 %4 = insertelement <4 x float> undef, float %3, i32 0
2834 %5 = call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %4)
2835 %6 = fadd <4 x float> %2, %5
2838 declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone
2840 define void @test_sfence() {
2841 ; GENERIC-LABEL: test_sfence:
2843 ; GENERIC-NEXT: sfence # sched: [1:1.00]
2844 ; GENERIC-NEXT: retq # sched: [1:1.00]
2846 ; ATOM-LABEL: test_sfence:
2848 ; ATOM-NEXT: sfence # sched: [1:1.00]
2849 ; ATOM-NEXT: nop # sched: [1:0.50]
2850 ; ATOM-NEXT: nop # sched: [1:0.50]
2851 ; ATOM-NEXT: nop # sched: [1:0.50]
2852 ; ATOM-NEXT: nop # sched: [1:0.50]
2853 ; ATOM-NEXT: nop # sched: [1:0.50]
2854 ; ATOM-NEXT: nop # sched: [1:0.50]
2855 ; ATOM-NEXT: retq # sched: [79:39.50]
2857 ; SLM-LABEL: test_sfence:
2859 ; SLM-NEXT: sfence # sched: [1:1.00]
2860 ; SLM-NEXT: retq # sched: [4:1.00]
2862 ; SANDY-LABEL: test_sfence:
2864 ; SANDY-NEXT: sfence # sched: [1:1.00]
2865 ; SANDY-NEXT: retq # sched: [1:1.00]
2867 ; HASWELL-LABEL: test_sfence:
2869 ; HASWELL-NEXT: sfence # sched: [1:0.33]
2870 ; HASWELL-NEXT: retq # sched: [2:1.00]
2872 ; BROADWELL-LABEL: test_sfence:
2873 ; BROADWELL: # BB#0:
2874 ; BROADWELL-NEXT: sfence # sched: [2:0.33]
2875 ; BROADWELL-NEXT: retq # sched: [7:1.00]
2877 ; SKYLAKE-LABEL: test_sfence:
2879 ; SKYLAKE-NEXT: sfence # sched: [2:0.33]
2880 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
2882 ; SKX-LABEL: test_sfence:
2884 ; SKX-NEXT: sfence # sched: [2:0.33]
2885 ; SKX-NEXT: retq # sched: [7:1.00]
2887 ; BTVER2-LABEL: test_sfence:
2889 ; BTVER2-NEXT: sfence # sched: [1:1.00]
2890 ; BTVER2-NEXT: retq # sched: [4:1.00]
2892 ; ZNVER1-LABEL: test_sfence:
2894 ; ZNVER1-NEXT: sfence # sched: [1:0.50]
2895 ; ZNVER1-NEXT: retq # sched: [1:0.50]
2896 call void @llvm.x86.sse.sfence()
2899 declare void @llvm.x86.sse.sfence() nounwind readnone
2901 define <4 x float> @test_shufps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) nounwind {
2902 ; GENERIC-LABEL: test_shufps:
2904 ; GENERIC-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00]
2905 ; GENERIC-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,3],mem[0,0] sched: [7:1.00]
2906 ; GENERIC-NEXT: retq # sched: [1:1.00]
2908 ; ATOM-LABEL: test_shufps:
2910 ; ATOM-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00]
2911 ; ATOM-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,3],mem[0,0] sched: [1:1.00]
2912 ; ATOM-NEXT: nop # sched: [1:0.50]
2913 ; ATOM-NEXT: nop # sched: [1:0.50]
2914 ; ATOM-NEXT: nop # sched: [1:0.50]
2915 ; ATOM-NEXT: nop # sched: [1:0.50]
2916 ; ATOM-NEXT: retq # sched: [79:39.50]
2918 ; SLM-LABEL: test_shufps:
2920 ; SLM-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00]
2921 ; SLM-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,3],mem[0,0] sched: [4:1.00]
2922 ; SLM-NEXT: retq # sched: [4:1.00]
2924 ; SANDY-LABEL: test_shufps:
2926 ; SANDY-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00]
2927 ; SANDY-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,3],mem[0,0] sched: [7:1.00]
2928 ; SANDY-NEXT: retq # sched: [1:1.00]
2930 ; HASWELL-LABEL: test_shufps:
2932 ; HASWELL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00]
2933 ; HASWELL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,3],mem[0,0] sched: [1:1.00]
2934 ; HASWELL-NEXT: retq # sched: [2:1.00]
2936 ; BROADWELL-LABEL: test_shufps:
2937 ; BROADWELL: # BB#0:
2938 ; BROADWELL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00]
2939 ; BROADWELL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,3],mem[0,0] sched: [6:1.00]
2940 ; BROADWELL-NEXT: retq # sched: [7:1.00]
2942 ; SKYLAKE-LABEL: test_shufps:
2944 ; SKYLAKE-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00]
2945 ; SKYLAKE-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,3],mem[0,0] sched: [7:1.00]
2946 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
2948 ; SKX-LABEL: test_shufps:
2950 ; SKX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00]
2951 ; SKX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,3],mem[0,0] sched: [7:1.00]
2952 ; SKX-NEXT: retq # sched: [7:1.00]
2954 ; BTVER2-LABEL: test_shufps:
2956 ; BTVER2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:0.50]
2957 ; BTVER2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,3],mem[0,0] sched: [6:1.00]
2958 ; BTVER2-NEXT: retq # sched: [4:1.00]
2960 ; ZNVER1-LABEL: test_shufps:
2962 ; ZNVER1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:0.50]
2963 ; ZNVER1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,3],mem[0,0] sched: [8:0.50]
2964 ; ZNVER1-NEXT: retq # sched: [1:0.50]
2965 %1 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 0, i32 4, i32 4>
2966 %2 = load <4 x float>, <4 x float> *%a2, align 16
2967 %3 = shufflevector <4 x float> %1, <4 x float> %2, <4 x i32> <i32 0, i32 3, i32 4, i32 4>
2971 define <4 x float> @test_sqrtps(<4 x float> %a0, <4 x float> *%a1) {
2972 ; GENERIC-LABEL: test_sqrtps:
2974 ; GENERIC-NEXT: sqrtps %xmm0, %xmm1 # sched: [14:1.00]
2975 ; GENERIC-NEXT: sqrtps (%rdi), %xmm0 # sched: [20:1.00]
2976 ; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
2977 ; GENERIC-NEXT: retq # sched: [1:1.00]
2979 ; ATOM-LABEL: test_sqrtps:
2981 ; ATOM-NEXT: sqrtps %xmm0, %xmm1 # sched: [70:35.00]
2982 ; ATOM-NEXT: sqrtps (%rdi), %xmm0 # sched: [70:35.00]
2983 ; ATOM-NEXT: addps %xmm1, %xmm0 # sched: [5:5.00]
2984 ; ATOM-NEXT: retq # sched: [79:39.50]
2986 ; SLM-LABEL: test_sqrtps:
2988 ; SLM-NEXT: sqrtps (%rdi), %xmm1 # sched: [18:1.00]
2989 ; SLM-NEXT: sqrtps %xmm0, %xmm0 # sched: [15:1.00]
2990 ; SLM-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00]
2991 ; SLM-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00]
2992 ; SLM-NEXT: retq # sched: [4:1.00]
2994 ; SANDY-LABEL: test_sqrtps:
2996 ; SANDY-NEXT: vsqrtps %xmm0, %xmm0 # sched: [14:1.00]
2997 ; SANDY-NEXT: vsqrtps (%rdi), %xmm1 # sched: [20:1.00]
2998 ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
2999 ; SANDY-NEXT: retq # sched: [1:1.00]
3001 ; HASWELL-LABEL: test_sqrtps:
3003 ; HASWELL-NEXT: vsqrtps %xmm0, %xmm0 # sched: [14:1.00]
3004 ; HASWELL-NEXT: vsqrtps (%rdi), %xmm1 # sched: [14:1.00]
3005 ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
3006 ; HASWELL-NEXT: retq # sched: [2:1.00]
3008 ; BROADWELL-LABEL: test_sqrtps:
3009 ; BROADWELL: # BB#0:
3010 ; BROADWELL-NEXT: vsqrtps %xmm0, %xmm0 # sched: [14:1.00]
3011 ; BROADWELL-NEXT: vsqrtps (%rdi), %xmm1 # sched: [19:1.00]
3012 ; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
3013 ; BROADWELL-NEXT: retq # sched: [7:1.00]
3015 ; SKYLAKE-LABEL: test_sqrtps:
3017 ; SKYLAKE-NEXT: vsqrtps %xmm0, %xmm0 # sched: [12:1.00]
3018 ; SKYLAKE-NEXT: vsqrtps (%rdi), %xmm1 # sched: [18:1.00]
3019 ; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
3020 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
3022 ; SKX-LABEL: test_sqrtps:
3024 ; SKX-NEXT: vsqrtps %xmm0, %xmm0 # sched: [12:1.00]
3025 ; SKX-NEXT: vsqrtps (%rdi), %xmm1 # sched: [18:1.00]
3026 ; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
3027 ; SKX-NEXT: retq # sched: [7:1.00]
3029 ; BTVER2-LABEL: test_sqrtps:
3031 ; BTVER2-NEXT: vsqrtps (%rdi), %xmm1 # sched: [26:21.00]
3032 ; BTVER2-NEXT: vsqrtps %xmm0, %xmm0 # sched: [21:21.00]
3033 ; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
3034 ; BTVER2-NEXT: retq # sched: [4:1.00]
3036 ; ZNVER1-LABEL: test_sqrtps:
3038 ; ZNVER1-NEXT: vsqrtps (%rdi), %xmm1 # sched: [27:1.00]
3039 ; ZNVER1-NEXT: vsqrtps %xmm0, %xmm0 # sched: [20:1.00]
3040 ; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
3041 ; ZNVER1-NEXT: retq # sched: [1:0.50]
3042 %1 = call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> %a0)
3043 %2 = load <4 x float>, <4 x float> *%a1, align 16
3044 %3 = call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> %2)
3045 %4 = fadd <4 x float> %1, %3
3048 declare <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float>) nounwind readnone
3052 define <4 x float> @test_sqrtss(<4 x float> %a0, <4 x float> *%a1) {
3053 ; GENERIC-LABEL: test_sqrtss:
3055 ; GENERIC-NEXT: sqrtss %xmm0, %xmm0 # sched: [14:1.00]
3056 ; GENERIC-NEXT: movaps (%rdi), %xmm1 # sched: [6:0.50]
3057 ; GENERIC-NEXT: sqrtss %xmm1, %xmm1 # sched: [14:1.00]
3058 ; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
3059 ; GENERIC-NEXT: retq # sched: [1:1.00]
3061 ; ATOM-LABEL: test_sqrtss:
3063 ; ATOM-NEXT: movaps (%rdi), %xmm1 # sched: [1:1.00]
3064 ; ATOM-NEXT: sqrtss %xmm0, %xmm0
3065 ; ATOM-NEXT: sqrtss %xmm1, %xmm1
3066 ; ATOM-NEXT: addps %xmm1, %xmm0 # sched: [5:5.00]
3067 ; ATOM-NEXT: retq # sched: [79:39.50]
3069 ; SLM-LABEL: test_sqrtss:
3071 ; SLM-NEXT: movaps (%rdi), %xmm1 # sched: [3:1.00]
3072 ; SLM-NEXT: sqrtss %xmm0, %xmm0 # sched: [18:1.00]
3073 ; SLM-NEXT: sqrtss %xmm1, %xmm1 # sched: [18:1.00]
3074 ; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
3075 ; SLM-NEXT: retq # sched: [4:1.00]
3077 ; SANDY-LABEL: test_sqrtss:
3079 ; SANDY-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [114:1.00]
3080 ; SANDY-NEXT: vmovaps (%rdi), %xmm1 # sched: [6:0.50]
3081 ; SANDY-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [114:1.00]
3082 ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
3083 ; SANDY-NEXT: retq # sched: [1:1.00]
3085 ; HASWELL-LABEL: test_sqrtss:
3087 ; HASWELL-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [14:1.00]
3088 ; HASWELL-NEXT: vmovaps (%rdi), %xmm1 # sched: [1:0.50]
3089 ; HASWELL-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [14:1.00]
3090 ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
3091 ; HASWELL-NEXT: retq # sched: [2:1.00]
3093 ; BROADWELL-LABEL: test_sqrtss:
3094 ; BROADWELL: # BB#0:
3095 ; BROADWELL-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [14:1.00]
3096 ; BROADWELL-NEXT: vmovaps (%rdi), %xmm1 # sched: [5:0.50]
3097 ; BROADWELL-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [14:1.00]
3098 ; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
3099 ; BROADWELL-NEXT: retq # sched: [7:1.00]
3101 ; SKYLAKE-LABEL: test_sqrtss:
3103 ; SKYLAKE-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [12:1.00]
3104 ; SKYLAKE-NEXT: vmovaps (%rdi), %xmm1 # sched: [6:0.50]
3105 ; SKYLAKE-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [12:1.00]
3106 ; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
3107 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
3109 ; SKX-LABEL: test_sqrtss:
3111 ; SKX-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [12:1.00]
3112 ; SKX-NEXT: vmovaps (%rdi), %xmm1 # sched: [6:0.50]
3113 ; SKX-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [12:1.00]
3114 ; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
3115 ; SKX-NEXT: retq # sched: [7:1.00]
3117 ; BTVER2-LABEL: test_sqrtss:
3119 ; BTVER2-NEXT: vmovaps (%rdi), %xmm1 # sched: [5:1.00]
3120 ; BTVER2-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [26:21.00]
3121 ; BTVER2-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [26:21.00]
3122 ; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
3123 ; BTVER2-NEXT: retq # sched: [4:1.00]
3125 ; ZNVER1-LABEL: test_sqrtss:
3127 ; ZNVER1-NEXT: vmovaps (%rdi), %xmm1 # sched: [8:0.50]
3128 ; ZNVER1-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [27:1.00]
3129 ; ZNVER1-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [27:1.00]
3130 ; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
3131 ; ZNVER1-NEXT: retq # sched: [1:0.50]
3132 %1 = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %a0)
3133 %2 = load <4 x float>, <4 x float> *%a1, align 16
3134 %3 = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %2)
3135 %4 = fadd <4 x float> %1, %3
3138 declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone
3140 define i32 @test_stmxcsr() {
3141 ; GENERIC-LABEL: test_stmxcsr:
3143 ; GENERIC-NEXT: stmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00]
3144 ; GENERIC-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50]
3145 ; GENERIC-NEXT: retq # sched: [1:1.00]
3147 ; ATOM-LABEL: test_stmxcsr:
3149 ; ATOM-NEXT: stmxcsr -{{[0-9]+}}(%rsp) # sched: [15:7.50]
3150 ; ATOM-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [1:1.00]
3151 ; ATOM-NEXT: retq # sched: [79:39.50]
3153 ; SLM-LABEL: test_stmxcsr:
3155 ; SLM-NEXT: stmxcsr -{{[0-9]+}}(%rsp) # sched: [1:1.00]
3156 ; SLM-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [3:1.00]
3157 ; SLM-NEXT: retq # sched: [4:1.00]
3159 ; SANDY-LABEL: test_stmxcsr:
3161 ; SANDY-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00]
3162 ; SANDY-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50]
3163 ; SANDY-NEXT: retq # sched: [1:1.00]
3165 ; HASWELL-LABEL: test_stmxcsr:
3167 ; HASWELL-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # sched: [1:1.00]
3168 ; HASWELL-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [1:0.50]
3169 ; HASWELL-NEXT: retq # sched: [2:1.00]
3171 ; BROADWELL-LABEL: test_stmxcsr:
3172 ; BROADWELL: # BB#0:
3173 ; BROADWELL-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # sched: [2:1.00]
3174 ; BROADWELL-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50]
3175 ; BROADWELL-NEXT: retq # sched: [7:1.00]
3177 ; SKYLAKE-LABEL: test_stmxcsr:
3179 ; SKYLAKE-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # sched: [2:1.00]
3180 ; SKYLAKE-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50]
3181 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
3183 ; SKX-LABEL: test_stmxcsr:
3185 ; SKX-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # sched: [2:1.00]
3186 ; SKX-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50]
3187 ; SKX-NEXT: retq # sched: [7:1.00]
3189 ; BTVER2-LABEL: test_stmxcsr:
3191 ; BTVER2-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # sched: [1:1.00]
3192 ; BTVER2-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [5:1.00]
3193 ; BTVER2-NEXT: retq # sched: [4:1.00]
3195 ; ZNVER1-LABEL: test_stmxcsr:
3197 ; ZNVER1-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # sched: [100:?]
3198 ; ZNVER1-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [8:0.50]
3199 ; ZNVER1-NEXT: retq # sched: [1:0.50]
3200 %1 = alloca i32, align 4
3201 %2 = bitcast i32* %1 to i8*
3202 call void @llvm.x86.sse.stmxcsr(i8* %2)
3203 %3 = load i32, i32* %1, align 4
3206 declare void @llvm.x86.sse.stmxcsr(i8*) nounwind readnone
3208 define <4 x float> @test_subps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
3209 ; GENERIC-LABEL: test_subps:
3211 ; GENERIC-NEXT: subps %xmm1, %xmm0 # sched: [3:1.00]
3212 ; GENERIC-NEXT: subps (%rdi), %xmm0 # sched: [9:1.00]
3213 ; GENERIC-NEXT: retq # sched: [1:1.00]
3215 ; ATOM-LABEL: test_subps:
3217 ; ATOM-NEXT: subps %xmm1, %xmm0 # sched: [5:5.00]
3218 ; ATOM-NEXT: subps (%rdi), %xmm0 # sched: [5:5.00]
3219 ; ATOM-NEXT: retq # sched: [79:39.50]
3221 ; SLM-LABEL: test_subps:
3223 ; SLM-NEXT: subps %xmm1, %xmm0 # sched: [3:1.00]
3224 ; SLM-NEXT: subps (%rdi), %xmm0 # sched: [6:1.00]
3225 ; SLM-NEXT: retq # sched: [4:1.00]
3227 ; SANDY-LABEL: test_subps:
3229 ; SANDY-NEXT: vsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
3230 ; SANDY-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
3231 ; SANDY-NEXT: retq # sched: [1:1.00]
3233 ; HASWELL-LABEL: test_subps:
3235 ; HASWELL-NEXT: vsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
3236 ; HASWELL-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
3237 ; HASWELL-NEXT: retq # sched: [2:1.00]
3239 ; BROADWELL-LABEL: test_subps:
3240 ; BROADWELL: # BB#0:
3241 ; BROADWELL-NEXT: vsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
3242 ; BROADWELL-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
3243 ; BROADWELL-NEXT: retq # sched: [7:1.00]
3245 ; SKYLAKE-LABEL: test_subps:
3247 ; SKYLAKE-NEXT: vsubps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
3248 ; SKYLAKE-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
3249 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
3251 ; SKX-LABEL: test_subps:
3253 ; SKX-NEXT: vsubps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
3254 ; SKX-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
3255 ; SKX-NEXT: retq # sched: [7:1.00]
3257 ; BTVER2-LABEL: test_subps:
3259 ; BTVER2-NEXT: vsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
3260 ; BTVER2-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
3261 ; BTVER2-NEXT: retq # sched: [4:1.00]
3263 ; ZNVER1-LABEL: test_subps:
3265 ; ZNVER1-NEXT: vsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
3266 ; ZNVER1-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
3267 ; ZNVER1-NEXT: retq # sched: [1:0.50]
3268 %1 = fsub <4 x float> %a0, %a1
3269 %2 = load <4 x float>, <4 x float> *%a2, align 16
3270 %3 = fsub <4 x float> %1, %2
3274 define float @test_subss(float %a0, float %a1, float *%a2) {
3275 ; GENERIC-LABEL: test_subss:
3277 ; GENERIC-NEXT: subss %xmm1, %xmm0 # sched: [3:1.00]
3278 ; GENERIC-NEXT: subss (%rdi), %xmm0 # sched: [9:1.00]
3279 ; GENERIC-NEXT: retq # sched: [1:1.00]
3281 ; ATOM-LABEL: test_subss:
3283 ; ATOM-NEXT: subss %xmm1, %xmm0 # sched: [5:5.00]
3284 ; ATOM-NEXT: subss (%rdi), %xmm0 # sched: [5:5.00]
3285 ; ATOM-NEXT: retq # sched: [79:39.50]
3287 ; SLM-LABEL: test_subss:
3289 ; SLM-NEXT: subss %xmm1, %xmm0 # sched: [3:1.00]
3290 ; SLM-NEXT: subss (%rdi), %xmm0 # sched: [6:1.00]
3291 ; SLM-NEXT: retq # sched: [4:1.00]
3293 ; SANDY-LABEL: test_subss:
3295 ; SANDY-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
3296 ; SANDY-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
3297 ; SANDY-NEXT: retq # sched: [1:1.00]
3299 ; HASWELL-LABEL: test_subss:
3301 ; HASWELL-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
3302 ; HASWELL-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [3:1.00]
3303 ; HASWELL-NEXT: retq # sched: [2:1.00]
3305 ; BROADWELL-LABEL: test_subss:
3306 ; BROADWELL: # BB#0:
3307 ; BROADWELL-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
3308 ; BROADWELL-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
3309 ; BROADWELL-NEXT: retq # sched: [7:1.00]
3311 ; SKYLAKE-LABEL: test_subss:
3313 ; SKYLAKE-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
3314 ; SKYLAKE-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
3315 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
3317 ; SKX-LABEL: test_subss:
3319 ; SKX-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
3320 ; SKX-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
3321 ; SKX-NEXT: retq # sched: [7:1.00]
3323 ; BTVER2-LABEL: test_subss:
3325 ; BTVER2-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
3326 ; BTVER2-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
3327 ; BTVER2-NEXT: retq # sched: [4:1.00]
3329 ; ZNVER1-LABEL: test_subss:
3331 ; ZNVER1-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
3332 ; ZNVER1-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
3333 ; ZNVER1-NEXT: retq # sched: [1:0.50]
3334 %1 = fsub float %a0, %a1
3335 %2 = load float, float *%a2, align 4
3336 %3 = fsub float %1, %2
3340 define i32 @test_ucomiss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
3341 ; GENERIC-LABEL: test_ucomiss:
3343 ; GENERIC-NEXT: ucomiss %xmm1, %xmm0 # sched: [3:1.00]
3344 ; GENERIC-NEXT: setnp %al # sched: [1:0.50]
3345 ; GENERIC-NEXT: sete %cl # sched: [1:0.50]
3346 ; GENERIC-NEXT: andb %al, %cl # sched: [1:0.33]
3347 ; GENERIC-NEXT: ucomiss (%rdi), %xmm0 # sched: [7:1.00]
3348 ; GENERIC-NEXT: setnp %al # sched: [1:0.50]
3349 ; GENERIC-NEXT: sete %dl # sched: [1:0.50]
3350 ; GENERIC-NEXT: andb %al, %dl # sched: [1:0.33]
3351 ; GENERIC-NEXT: orb %cl, %dl # sched: [1:0.33]
3352 ; GENERIC-NEXT: movzbl %dl, %eax # sched: [1:0.33]
3353 ; GENERIC-NEXT: retq # sched: [1:1.00]
3355 ; ATOM-LABEL: test_ucomiss:
3357 ; ATOM-NEXT: ucomiss %xmm1, %xmm0 # sched: [9:4.50]
3358 ; ATOM-NEXT: setnp %al # sched: [1:0.50]
3359 ; ATOM-NEXT: sete %cl # sched: [1:0.50]
3360 ; ATOM-NEXT: andb %al, %cl # sched: [1:0.50]
3361 ; ATOM-NEXT: ucomiss (%rdi), %xmm0 # sched: [10:5.00]
3362 ; ATOM-NEXT: setnp %al # sched: [1:0.50]
3363 ; ATOM-NEXT: sete %dl # sched: [1:0.50]
3364 ; ATOM-NEXT: andb %al, %dl # sched: [1:0.50]
3365 ; ATOM-NEXT: orb %cl, %dl # sched: [1:0.50]
3366 ; ATOM-NEXT: movzbl %dl, %eax # sched: [1:1.00]
3367 ; ATOM-NEXT: retq # sched: [79:39.50]
3369 ; SLM-LABEL: test_ucomiss:
3371 ; SLM-NEXT: ucomiss %xmm1, %xmm0 # sched: [3:1.00]
3372 ; SLM-NEXT: setnp %al # sched: [1:0.50]
3373 ; SLM-NEXT: sete %cl # sched: [1:0.50]
3374 ; SLM-NEXT: andb %al, %cl # sched: [1:0.50]
3375 ; SLM-NEXT: ucomiss (%rdi), %xmm0 # sched: [6:1.00]
3376 ; SLM-NEXT: setnp %al # sched: [1:0.50]
3377 ; SLM-NEXT: sete %dl # sched: [1:0.50]
3378 ; SLM-NEXT: andb %al, %dl # sched: [1:0.50]
3379 ; SLM-NEXT: orb %cl, %dl # sched: [1:0.50]
3380 ; SLM-NEXT: movzbl %dl, %eax # sched: [1:0.50]
3381 ; SLM-NEXT: retq # sched: [4:1.00]
3383 ; SANDY-LABEL: test_ucomiss:
3385 ; SANDY-NEXT: vucomiss %xmm1, %xmm0 # sched: [3:1.00]
3386 ; SANDY-NEXT: setnp %al # sched: [1:0.50]
3387 ; SANDY-NEXT: sete %cl # sched: [1:0.50]
3388 ; SANDY-NEXT: andb %al, %cl # sched: [1:0.33]
3389 ; SANDY-NEXT: vucomiss (%rdi), %xmm0 # sched: [7:1.00]
3390 ; SANDY-NEXT: setnp %al # sched: [1:0.50]
3391 ; SANDY-NEXT: sete %dl # sched: [1:0.50]
3392 ; SANDY-NEXT: andb %al, %dl # sched: [1:0.33]
3393 ; SANDY-NEXT: orb %cl, %dl # sched: [1:0.33]
3394 ; SANDY-NEXT: movzbl %dl, %eax # sched: [1:0.33]
3395 ; SANDY-NEXT: retq # sched: [1:1.00]
3397 ; HASWELL-LABEL: test_ucomiss:
3399 ; HASWELL-NEXT: vucomiss %xmm1, %xmm0 # sched: [3:1.00]
3400 ; HASWELL-NEXT: setnp %al # sched: [1:0.50]
3401 ; HASWELL-NEXT: sete %cl # sched: [1:0.50]
3402 ; HASWELL-NEXT: andb %al, %cl # sched: [1:0.25]
3403 ; HASWELL-NEXT: vucomiss (%rdi), %xmm0 # sched: [7:1.00]
3404 ; HASWELL-NEXT: setnp %al # sched: [1:0.50]
3405 ; HASWELL-NEXT: sete %dl # sched: [1:0.50]
3406 ; HASWELL-NEXT: andb %al, %dl # sched: [1:0.25]
3407 ; HASWELL-NEXT: orb %cl, %dl # sched: [1:0.25]
3408 ; HASWELL-NEXT: movzbl %dl, %eax # sched: [1:0.25]
3409 ; HASWELL-NEXT: retq # sched: [2:1.00]
3411 ; BROADWELL-LABEL: test_ucomiss:
3412 ; BROADWELL: # BB#0:
3413 ; BROADWELL-NEXT: vucomiss %xmm1, %xmm0 # sched: [3:1.00]
3414 ; BROADWELL-NEXT: setnp %al # sched: [1:0.50]
3415 ; BROADWELL-NEXT: sete %cl # sched: [1:0.50]
3416 ; BROADWELL-NEXT: andb %al, %cl # sched: [1:0.25]
3417 ; BROADWELL-NEXT: vucomiss (%rdi), %xmm0 # sched: [8:1.00]
3418 ; BROADWELL-NEXT: setnp %al # sched: [1:0.50]
3419 ; BROADWELL-NEXT: sete %dl # sched: [1:0.50]
3420 ; BROADWELL-NEXT: andb %al, %dl # sched: [1:0.25]
3421 ; BROADWELL-NEXT: orb %cl, %dl # sched: [1:0.25]
3422 ; BROADWELL-NEXT: movzbl %dl, %eax # sched: [1:0.25]
3423 ; BROADWELL-NEXT: retq # sched: [7:1.00]
3425 ; SKYLAKE-LABEL: test_ucomiss:
3427 ; SKYLAKE-NEXT: vucomiss %xmm1, %xmm0 # sched: [3:1.00]
3428 ; SKYLAKE-NEXT: setnp %al # sched: [1:0.50]
3429 ; SKYLAKE-NEXT: sete %cl # sched: [1:0.50]
3430 ; SKYLAKE-NEXT: andb %al, %cl # sched: [1:0.25]
3431 ; SKYLAKE-NEXT: vucomiss (%rdi), %xmm0 # sched: [8:1.00]
3432 ; SKYLAKE-NEXT: setnp %al # sched: [1:0.50]
3433 ; SKYLAKE-NEXT: sete %dl # sched: [1:0.50]
3434 ; SKYLAKE-NEXT: andb %al, %dl # sched: [1:0.25]
3435 ; SKYLAKE-NEXT: orb %cl, %dl # sched: [1:0.25]
3436 ; SKYLAKE-NEXT: movzbl %dl, %eax # sched: [1:0.25]
3437 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
3439 ; SKX-LABEL: test_ucomiss:
3441 ; SKX-NEXT: vucomiss %xmm1, %xmm0 # sched: [3:1.00]
3442 ; SKX-NEXT: setnp %al # sched: [1:0.50]
3443 ; SKX-NEXT: sete %cl # sched: [1:0.50]
3444 ; SKX-NEXT: andb %al, %cl # sched: [1:0.25]
3445 ; SKX-NEXT: vucomiss (%rdi), %xmm0 # sched: [8:1.00]
3446 ; SKX-NEXT: setnp %al # sched: [1:0.50]
3447 ; SKX-NEXT: sete %dl # sched: [1:0.50]
3448 ; SKX-NEXT: andb %al, %dl # sched: [1:0.25]
3449 ; SKX-NEXT: orb %cl, %dl # sched: [1:0.25]
3450 ; SKX-NEXT: movzbl %dl, %eax # sched: [1:0.25]
3451 ; SKX-NEXT: retq # sched: [7:1.00]
3453 ; BTVER2-LABEL: test_ucomiss:
3455 ; BTVER2-NEXT: vucomiss %xmm1, %xmm0 # sched: [3:1.00]
3456 ; BTVER2-NEXT: setnp %al # sched: [1:0.50]
3457 ; BTVER2-NEXT: sete %cl # sched: [1:0.50]
3458 ; BTVER2-NEXT: andb %al, %cl # sched: [1:0.50]
3459 ; BTVER2-NEXT: vucomiss (%rdi), %xmm0 # sched: [8:1.00]
3460 ; BTVER2-NEXT: setnp %al # sched: [1:0.50]
3461 ; BTVER2-NEXT: sete %dl # sched: [1:0.50]
3462 ; BTVER2-NEXT: andb %al, %dl # sched: [1:0.50]
3463 ; BTVER2-NEXT: orb %cl, %dl # sched: [1:0.50]
3464 ; BTVER2-NEXT: movzbl %dl, %eax # sched: [1:0.50]
3465 ; BTVER2-NEXT: retq # sched: [4:1.00]
3467 ; ZNVER1-LABEL: test_ucomiss:
3469 ; ZNVER1-NEXT: vucomiss %xmm1, %xmm0 # sched: [3:1.00]
3470 ; ZNVER1-NEXT: setnp %al # sched: [1:0.25]
3471 ; ZNVER1-NEXT: sete %cl # sched: [1:0.25]
3472 ; ZNVER1-NEXT: andb %al, %cl # sched: [1:0.25]
3473 ; ZNVER1-NEXT: vucomiss (%rdi), %xmm0 # sched: [10:1.00]
3474 ; ZNVER1-NEXT: setnp %al # sched: [1:0.25]
3475 ; ZNVER1-NEXT: sete %dl # sched: [1:0.25]
3476 ; ZNVER1-NEXT: andb %al, %dl # sched: [1:0.25]
3477 ; ZNVER1-NEXT: orb %cl, %dl # sched: [1:0.25]
3478 ; ZNVER1-NEXT: movzbl %dl, %eax # sched: [1:0.25]
3479 ; ZNVER1-NEXT: retq # sched: [1:0.50]
3480 %1 = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float> %a1)
3481 %2 = load <4 x float>, <4 x float> *%a2, align 4
3482 %3 = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float> %2)
3486 declare i32 @llvm.x86.sse.ucomieq.ss(<4 x float>, <4 x float>) nounwind readnone
3488 define <4 x float> @test_unpckhps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
3489 ; GENERIC-LABEL: test_unpckhps:
3491 ; GENERIC-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
3492 ; GENERIC-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00]
3493 ; GENERIC-NEXT: retq # sched: [1:1.00]
3495 ; ATOM-LABEL: test_unpckhps:
3497 ; ATOM-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
3498 ; ATOM-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [1:1.00]
3499 ; ATOM-NEXT: nop # sched: [1:0.50]
3500 ; ATOM-NEXT: nop # sched: [1:0.50]
3501 ; ATOM-NEXT: nop # sched: [1:0.50]
3502 ; ATOM-NEXT: nop # sched: [1:0.50]
3503 ; ATOM-NEXT: retq # sched: [79:39.50]
3505 ; SLM-LABEL: test_unpckhps:
3507 ; SLM-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
3508 ; SLM-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [4:1.00]
3509 ; SLM-NEXT: retq # sched: [4:1.00]
3511 ; SANDY-LABEL: test_unpckhps:
3513 ; SANDY-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
3514 ; SANDY-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00]
3515 ; SANDY-NEXT: retq # sched: [1:1.00]
3517 ; HASWELL-LABEL: test_unpckhps:
3519 ; HASWELL-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
3520 ; HASWELL-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [1:1.00]
3521 ; HASWELL-NEXT: retq # sched: [2:1.00]
3523 ; BROADWELL-LABEL: test_unpckhps:
3524 ; BROADWELL: # BB#0:
3525 ; BROADWELL-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
3526 ; BROADWELL-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [6:1.00]
3527 ; BROADWELL-NEXT: retq # sched: [7:1.00]
3529 ; SKYLAKE-LABEL: test_unpckhps:
3531 ; SKYLAKE-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
3532 ; SKYLAKE-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00]
3533 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
3535 ; SKX-LABEL: test_unpckhps:
3537 ; SKX-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
3538 ; SKX-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00]
3539 ; SKX-NEXT: retq # sched: [7:1.00]
3541 ; BTVER2-LABEL: test_unpckhps:
3543 ; BTVER2-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50]
3544 ; BTVER2-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [6:1.00]
3545 ; BTVER2-NEXT: retq # sched: [4:1.00]
3547 ; ZNVER1-LABEL: test_unpckhps:
3549 ; ZNVER1-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50]
3550 ; ZNVER1-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [8:0.50]
3551 ; ZNVER1-NEXT: retq # sched: [1:0.50]
3552 %1 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
3553 %2 = load <4 x float>, <4 x float> *%a2, align 16
3554 %3 = shufflevector <4 x float> %1, <4 x float> %2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
3558 define <4 x float> @test_unpcklps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
3559 ; GENERIC-LABEL: test_unpcklps:
3561 ; GENERIC-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
3562 ; GENERIC-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00]
3563 ; GENERIC-NEXT: retq # sched: [1:1.00]
3565 ; ATOM-LABEL: test_unpcklps:
3567 ; ATOM-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
3568 ; ATOM-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [1:1.00]
3569 ; ATOM-NEXT: nop # sched: [1:0.50]
3570 ; ATOM-NEXT: nop # sched: [1:0.50]
3571 ; ATOM-NEXT: nop # sched: [1:0.50]
3572 ; ATOM-NEXT: nop # sched: [1:0.50]
3573 ; ATOM-NEXT: retq # sched: [79:39.50]
3575 ; SLM-LABEL: test_unpcklps:
3577 ; SLM-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
3578 ; SLM-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [4:1.00]
3579 ; SLM-NEXT: retq # sched: [4:1.00]
3581 ; SANDY-LABEL: test_unpcklps:
3583 ; SANDY-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
3584 ; SANDY-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00]
3585 ; SANDY-NEXT: retq # sched: [1:1.00]
3587 ; HASWELL-LABEL: test_unpcklps:
3589 ; HASWELL-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
3590 ; HASWELL-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [1:1.00]
3591 ; HASWELL-NEXT: retq # sched: [2:1.00]
3593 ; BROADWELL-LABEL: test_unpcklps:
3594 ; BROADWELL: # BB#0:
3595 ; BROADWELL-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
3596 ; BROADWELL-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [6:1.00]
3597 ; BROADWELL-NEXT: retq # sched: [7:1.00]
3599 ; SKYLAKE-LABEL: test_unpcklps:
3601 ; SKYLAKE-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
3602 ; SKYLAKE-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00]
3603 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
3605 ; SKX-LABEL: test_unpcklps:
3607 ; SKX-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
3608 ; SKX-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00]
3609 ; SKX-NEXT: retq # sched: [7:1.00]
3611 ; BTVER2-LABEL: test_unpcklps:
3613 ; BTVER2-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50]
3614 ; BTVER2-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [6:1.00]
3615 ; BTVER2-NEXT: retq # sched: [4:1.00]
3617 ; ZNVER1-LABEL: test_unpcklps:
3619 ; ZNVER1-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50]
3620 ; ZNVER1-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [8:0.50]
3621 ; ZNVER1-NEXT: retq # sched: [1:0.50]
3622 %1 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
3623 %2 = load <4 x float>, <4 x float> *%a2, align 16
3624 %3 = shufflevector <4 x float> %1, <4 x float> %2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
3628 define <4 x float> @test_xorps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
3629 ; GENERIC-LABEL: test_xorps:
3631 ; GENERIC-NEXT: xorps %xmm1, %xmm0 # sched: [1:1.00]
3632 ; GENERIC-NEXT: xorps (%rdi), %xmm0 # sched: [7:1.00]
3633 ; GENERIC-NEXT: retq # sched: [1:1.00]
3635 ; ATOM-LABEL: test_xorps:
3637 ; ATOM-NEXT: xorps %xmm1, %xmm0 # sched: [1:0.50]
3638 ; ATOM-NEXT: xorps (%rdi), %xmm0 # sched: [1:1.00]
3639 ; ATOM-NEXT: nop # sched: [1:0.50]
3640 ; ATOM-NEXT: nop # sched: [1:0.50]
3641 ; ATOM-NEXT: nop # sched: [1:0.50]
3642 ; ATOM-NEXT: nop # sched: [1:0.50]
3643 ; ATOM-NEXT: retq # sched: [79:39.50]
3645 ; SLM-LABEL: test_xorps:
3647 ; SLM-NEXT: xorps %xmm1, %xmm0 # sched: [1:0.50]
3648 ; SLM-NEXT: xorps (%rdi), %xmm0 # sched: [4:1.00]
3649 ; SLM-NEXT: retq # sched: [4:1.00]
3651 ; SANDY-LABEL: test_xorps:
3653 ; SANDY-NEXT: vxorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
3654 ; SANDY-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
3655 ; SANDY-NEXT: retq # sched: [1:1.00]
3657 ; HASWELL-LABEL: test_xorps:
3659 ; HASWELL-NEXT: vxorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
3660 ; HASWELL-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [1:1.00]
3661 ; HASWELL-NEXT: retq # sched: [2:1.00]
3663 ; BROADWELL-LABEL: test_xorps:
3664 ; BROADWELL: # BB#0:
3665 ; BROADWELL-NEXT: vxorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
3666 ; BROADWELL-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
3667 ; BROADWELL-NEXT: retq # sched: [7:1.00]
3669 ; SKYLAKE-LABEL: test_xorps:
3671 ; SKYLAKE-NEXT: vxorps %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
3672 ; SKYLAKE-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
3673 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
3675 ; SKX-LABEL: test_xorps:
3677 ; SKX-NEXT: vxorps %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
3678 ; SKX-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
3679 ; SKX-NEXT: retq # sched: [7:1.00]
3681 ; BTVER2-LABEL: test_xorps:
3683 ; BTVER2-NEXT: vxorps %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
3684 ; BTVER2-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
3685 ; BTVER2-NEXT: retq # sched: [4:1.00]
3687 ; ZNVER1-LABEL: test_xorps:
3689 ; ZNVER1-NEXT: vxorps %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
3690 ; ZNVER1-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
3691 ; ZNVER1-NEXT: retq # sched: [1:0.50]
3692 %1 = bitcast <4 x float> %a0 to <4 x i32>
3693 %2 = bitcast <4 x float> %a1 to <4 x i32>
3694 %3 = xor <4 x i32> %1, %2
3695 %4 = load <4 x float>, <4 x float> *%a2, align 16
3696 %5 = bitcast <4 x float> %4 to <4 x i32>
3697 %6 = xor <4 x i32> %3, %5
3698 %7 = bitcast <4 x i32> %6 to <4 x float>