1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm | FileCheck %s --check-prefix=CHECK --check-prefix=SLM
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL
8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE
9 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
10 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
11 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
13 define <2 x double> @test_blendpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
14 ; GENERIC-LABEL: test_blendpd:
16 ; GENERIC-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.50]
17 ; GENERIC-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
18 ; GENERIC-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50]
19 ; GENERIC-NEXT: retq # sched: [1:1.00]
21 ; SLM-LABEL: test_blendpd:
23 ; SLM-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:1.00]
24 ; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
25 ; SLM-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [4:1.00]
26 ; SLM-NEXT: retq # sched: [4:1.00]
28 ; SANDY-LABEL: test_blendpd:
30 ; SANDY-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.50]
31 ; SANDY-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
32 ; SANDY-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50]
33 ; SANDY-NEXT: retq # sched: [1:1.00]
35 ; HASWELL-LABEL: test_blendpd:
37 ; HASWELL-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.33]
38 ; HASWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
39 ; HASWELL-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [1:0.50]
40 ; HASWELL-NEXT: retq # sched: [2:1.00]
42 ; BROADWELL-LABEL: test_blendpd:
44 ; BROADWELL-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.33]
45 ; BROADWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
46 ; BROADWELL-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [6:0.50]
47 ; BROADWELL-NEXT: retq # sched: [7:1.00]
49 ; SKYLAKE-LABEL: test_blendpd:
51 ; SKYLAKE-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.33]
52 ; SKYLAKE-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
53 ; SKYLAKE-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50]
54 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
56 ; SKX-LABEL: test_blendpd:
58 ; SKX-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:1.00]
59 ; SKX-NEXT: vmovapd (%rdi), %xmm2 # sched: [6:0.50]
60 ; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.33]
61 ; SKX-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm2[1] sched: [1:1.00]
62 ; SKX-NEXT: retq # sched: [7:1.00]
64 ; BTVER2-LABEL: test_blendpd:
66 ; BTVER2-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.50]
67 ; BTVER2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
68 ; BTVER2-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [6:1.00]
69 ; BTVER2-NEXT: retq # sched: [4:1.00]
71 ; ZNVER1-LABEL: test_blendpd:
73 ; ZNVER1-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.50]
74 ; ZNVER1-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
75 ; ZNVER1-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [8:0.50]
76 ; ZNVER1-NEXT: retq # sched: [1:0.50]
77 %1 = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 0, i32 3>
78 %2 = load <2 x double>, <2 x double> *%a2, align 16
79 %3 = fadd <2 x double> %a1, %1
80 %4 = shufflevector <2 x double> %3, <2 x double> %2, <2 x i32> <i32 0, i32 3>
84 define <4 x float> @test_blendps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
85 ; GENERIC-LABEL: test_blendps:
87 ; GENERIC-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.50]
88 ; GENERIC-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2,3] sched: [7:0.50]
89 ; GENERIC-NEXT: retq # sched: [1:1.00]
91 ; SLM-LABEL: test_blendps:
93 ; SLM-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:1.00]
94 ; SLM-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2,3] sched: [4:1.00]
95 ; SLM-NEXT: retq # sched: [4:1.00]
97 ; SANDY-LABEL: test_blendps:
99 ; SANDY-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.50]
100 ; SANDY-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2,3] sched: [7:0.50]
101 ; SANDY-NEXT: retq # sched: [1:1.00]
103 ; HASWELL-LABEL: test_blendps:
105 ; HASWELL-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.33]
106 ; HASWELL-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2,3] sched: [1:0.50]
107 ; HASWELL-NEXT: retq # sched: [2:1.00]
109 ; BROADWELL-LABEL: test_blendps:
111 ; BROADWELL-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.33]
112 ; BROADWELL-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2,3] sched: [6:0.50]
113 ; BROADWELL-NEXT: retq # sched: [7:1.00]
115 ; SKYLAKE-LABEL: test_blendps:
117 ; SKYLAKE-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.33]
118 ; SKYLAKE-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2,3] sched: [7:0.50]
119 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
121 ; SKX-LABEL: test_blendps:
123 ; SKX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.33]
124 ; SKX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2,3] sched: [7:0.50]
125 ; SKX-NEXT: retq # sched: [7:1.00]
127 ; BTVER2-LABEL: test_blendps:
129 ; BTVER2-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.50]
130 ; BTVER2-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2,3] sched: [6:1.00]
131 ; BTVER2-NEXT: retq # sched: [4:1.00]
133 ; ZNVER1-LABEL: test_blendps:
135 ; ZNVER1-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.50]
136 ; ZNVER1-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2,3] sched: [8:0.50]
137 ; ZNVER1-NEXT: retq # sched: [1:0.50]
138 %1 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
139 %2 = load <4 x float>, <4 x float> *%a2, align 16
140 %3 = shufflevector <4 x float> %1, <4 x float> %2, <4 x i32> <i32 0, i32 5, i32 2, i32 3>
144 define <2 x double> @test_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) {
145 ; GENERIC-LABEL: test_blendvpd:
147 ; GENERIC-NEXT: movapd %xmm0, %xmm3 # sched: [1:1.00]
148 ; GENERIC-NEXT: movaps %xmm2, %xmm0 # sched: [1:1.00]
149 ; GENERIC-NEXT: blendvpd %xmm0, %xmm1, %xmm3 # sched: [2:1.00]
150 ; GENERIC-NEXT: blendvpd %xmm0, (%rdi), %xmm3 # sched: [8:1.00]
151 ; GENERIC-NEXT: movapd %xmm3, %xmm0 # sched: [1:1.00]
152 ; GENERIC-NEXT: retq # sched: [1:1.00]
154 ; SLM-LABEL: test_blendvpd:
156 ; SLM-NEXT: movapd %xmm0, %xmm3 # sched: [1:1.00]
157 ; SLM-NEXT: movaps %xmm2, %xmm0 # sched: [1:1.00]
158 ; SLM-NEXT: blendvpd %xmm0, %xmm1, %xmm3 # sched: [1:1.00]
159 ; SLM-NEXT: blendvpd %xmm0, (%rdi), %xmm3 # sched: [4:1.00]
160 ; SLM-NEXT: movapd %xmm3, %xmm0 # sched: [1:1.00]
161 ; SLM-NEXT: retq # sched: [4:1.00]
163 ; SANDY-LABEL: test_blendvpd:
165 ; SANDY-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
166 ; SANDY-NEXT: vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
167 ; SANDY-NEXT: retq # sched: [1:1.00]
169 ; HASWELL-LABEL: test_blendvpd:
171 ; HASWELL-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00]
172 ; HASWELL-NEXT: vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [2:2.00]
173 ; HASWELL-NEXT: retq # sched: [2:1.00]
175 ; BROADWELL-LABEL: test_blendvpd:
177 ; BROADWELL-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00]
178 ; BROADWELL-NEXT: vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:2.00]
179 ; BROADWELL-NEXT: retq # sched: [7:1.00]
181 ; SKYLAKE-LABEL: test_blendvpd:
183 ; SKYLAKE-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:0.67]
184 ; SKYLAKE-NEXT: vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:0.67]
185 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
187 ; SKX-LABEL: test_blendvpd:
189 ; SKX-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:0.67]
190 ; SKX-NEXT: vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:0.67]
191 ; SKX-NEXT: retq # sched: [7:1.00]
193 ; BTVER2-LABEL: test_blendvpd:
195 ; BTVER2-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00]
196 ; BTVER2-NEXT: vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:2.00]
197 ; BTVER2-NEXT: retq # sched: [4:1.00]
199 ; ZNVER1-LABEL: test_blendvpd:
201 ; ZNVER1-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
202 ; ZNVER1-NEXT: vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
203 ; ZNVER1-NEXT: retq # sched: [1:0.50]
204 %1 = call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
205 %2 = load <2 x double>, <2 x double> *%a3, align 16
206 %3 = call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %1, <2 x double> %2, <2 x double> %a2)
209 declare <2 x double> @llvm.x86.sse41.blendvpd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
211 define <4 x float> @test_blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) {
212 ; GENERIC-LABEL: test_blendvps:
214 ; GENERIC-NEXT: movaps %xmm0, %xmm3 # sched: [1:1.00]
215 ; GENERIC-NEXT: movaps %xmm2, %xmm0 # sched: [1:1.00]
216 ; GENERIC-NEXT: blendvps %xmm0, %xmm1, %xmm3 # sched: [2:1.00]
217 ; GENERIC-NEXT: blendvps %xmm0, (%rdi), %xmm3 # sched: [8:1.00]
218 ; GENERIC-NEXT: movaps %xmm3, %xmm0 # sched: [1:1.00]
219 ; GENERIC-NEXT: retq # sched: [1:1.00]
221 ; SLM-LABEL: test_blendvps:
223 ; SLM-NEXT: movaps %xmm0, %xmm3 # sched: [1:1.00]
224 ; SLM-NEXT: movaps %xmm2, %xmm0 # sched: [1:1.00]
225 ; SLM-NEXT: blendvps %xmm0, %xmm1, %xmm3 # sched: [1:1.00]
226 ; SLM-NEXT: blendvps %xmm0, (%rdi), %xmm3 # sched: [4:1.00]
227 ; SLM-NEXT: movaps %xmm3, %xmm0 # sched: [1:1.00]
228 ; SLM-NEXT: retq # sched: [4:1.00]
230 ; SANDY-LABEL: test_blendvps:
232 ; SANDY-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
233 ; SANDY-NEXT: vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
234 ; SANDY-NEXT: retq # sched: [1:1.00]
236 ; HASWELL-LABEL: test_blendvps:
238 ; HASWELL-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00]
239 ; HASWELL-NEXT: vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [2:2.00]
240 ; HASWELL-NEXT: retq # sched: [2:1.00]
242 ; BROADWELL-LABEL: test_blendvps:
244 ; BROADWELL-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00]
245 ; BROADWELL-NEXT: vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:2.00]
246 ; BROADWELL-NEXT: retq # sched: [7:1.00]
248 ; SKYLAKE-LABEL: test_blendvps:
250 ; SKYLAKE-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:0.67]
251 ; SKYLAKE-NEXT: vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:0.67]
252 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
254 ; SKX-LABEL: test_blendvps:
256 ; SKX-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:0.67]
257 ; SKX-NEXT: vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:0.67]
258 ; SKX-NEXT: retq # sched: [7:1.00]
260 ; BTVER2-LABEL: test_blendvps:
262 ; BTVER2-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00]
263 ; BTVER2-NEXT: vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:2.00]
264 ; BTVER2-NEXT: retq # sched: [4:1.00]
266 ; ZNVER1-LABEL: test_blendvps:
268 ; ZNVER1-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
269 ; ZNVER1-NEXT: vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
270 ; ZNVER1-NEXT: retq # sched: [1:0.50]
271 %1 = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
272 %2 = load <4 x float>, <4 x float> *%a3
273 %3 = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %1, <4 x float> %2, <4 x float> %a2)
276 declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
278 define <2 x double> @test_dppd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
279 ; GENERIC-LABEL: test_dppd:
281 ; GENERIC-NEXT: dppd $7, %xmm1, %xmm0 # sched: [9:1.00]
282 ; GENERIC-NEXT: dppd $7, (%rdi), %xmm0 # sched: [15:1.00]
283 ; GENERIC-NEXT: retq # sched: [1:1.00]
285 ; SLM-LABEL: test_dppd:
287 ; SLM-NEXT: dppd $7, %xmm1, %xmm0 # sched: [3:1.00]
288 ; SLM-NEXT: dppd $7, (%rdi), %xmm0 # sched: [6:1.00]
289 ; SLM-NEXT: retq # sched: [4:1.00]
291 ; SANDY-LABEL: test_dppd:
293 ; SANDY-NEXT: vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [9:1.00]
294 ; SANDY-NEXT: vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [15:1.00]
295 ; SANDY-NEXT: retq # sched: [1:1.00]
297 ; HASWELL-LABEL: test_dppd:
299 ; HASWELL-NEXT: vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [9:1.00]
300 ; HASWELL-NEXT: vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
301 ; HASWELL-NEXT: retq # sched: [2:1.00]
303 ; BROADWELL-LABEL: test_dppd:
305 ; BROADWELL-NEXT: vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [9:1.00]
306 ; BROADWELL-NEXT: vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [14:1.00]
307 ; BROADWELL-NEXT: retq # sched: [7:1.00]
309 ; SKYLAKE-LABEL: test_dppd:
311 ; SKYLAKE-NEXT: vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [9:1.00]
312 ; SKYLAKE-NEXT: vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [15:1.00]
313 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
315 ; SKX-LABEL: test_dppd:
317 ; SKX-NEXT: vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [9:1.00]
318 ; SKX-NEXT: vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [15:1.00]
319 ; SKX-NEXT: retq # sched: [7:1.00]
321 ; BTVER2-LABEL: test_dppd:
323 ; BTVER2-NEXT: vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [9:3.00]
324 ; BTVER2-NEXT: vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [14:3.00]
325 ; BTVER2-NEXT: retq # sched: [4:1.00]
327 ; ZNVER1-LABEL: test_dppd:
329 ; ZNVER1-NEXT: vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [100:?]
330 ; ZNVER1-NEXT: vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [100:?]
331 ; ZNVER1-NEXT: retq # sched: [1:0.50]
332 %1 = call <2 x double> @llvm.x86.sse41.dppd(<2 x double> %a0, <2 x double> %a1, i8 7)
333 %2 = load <2 x double>, <2 x double> *%a2, align 16
334 %3 = call <2 x double> @llvm.x86.sse41.dppd(<2 x double> %1, <2 x double> %2, i8 7)
337 declare <2 x double> @llvm.x86.sse41.dppd(<2 x double>, <2 x double>, i8) nounwind readnone
339 define <4 x float> @test_dpps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
340 ; GENERIC-LABEL: test_dpps:
342 ; GENERIC-NEXT: dpps $7, %xmm1, %xmm0 # sched: [12:2.00]
343 ; GENERIC-NEXT: dpps $7, (%rdi), %xmm0 # sched: [7:1.00]
344 ; GENERIC-NEXT: retq # sched: [1:1.00]
346 ; SLM-LABEL: test_dpps:
348 ; SLM-NEXT: dpps $7, %xmm1, %xmm0 # sched: [3:1.00]
349 ; SLM-NEXT: dpps $7, (%rdi), %xmm0 # sched: [6:1.00]
350 ; SLM-NEXT: retq # sched: [4:1.00]
352 ; SANDY-LABEL: test_dpps:
354 ; SANDY-NEXT: vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [12:2.00]
355 ; SANDY-NEXT: vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
356 ; SANDY-NEXT: retq # sched: [1:1.00]
358 ; HASWELL-LABEL: test_dpps:
360 ; HASWELL-NEXT: vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [14:2.00]
361 ; HASWELL-NEXT: vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [14:2.00]
362 ; HASWELL-NEXT: retq # sched: [2:1.00]
364 ; BROADWELL-LABEL: test_dpps:
366 ; BROADWELL-NEXT: vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [14:2.00]
367 ; BROADWELL-NEXT: vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [19:2.00]
368 ; BROADWELL-NEXT: retq # sched: [7:1.00]
370 ; SKYLAKE-LABEL: test_dpps:
372 ; SKYLAKE-NEXT: vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [13:1.33]
373 ; SKYLAKE-NEXT: vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [19:1.33]
374 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
376 ; SKX-LABEL: test_dpps:
378 ; SKX-NEXT: vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [13:1.33]
379 ; SKX-NEXT: vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [19:1.33]
380 ; SKX-NEXT: retq # sched: [7:1.00]
382 ; BTVER2-LABEL: test_dpps:
384 ; BTVER2-NEXT: vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [11:3.00]
385 ; BTVER2-NEXT: vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [16:3.00]
386 ; BTVER2-NEXT: retq # sched: [4:1.00]
388 ; ZNVER1-LABEL: test_dpps:
390 ; ZNVER1-NEXT: vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [100:?]
391 ; ZNVER1-NEXT: vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [100:?]
392 ; ZNVER1-NEXT: retq # sched: [1:0.50]
393 %1 = call <4 x float> @llvm.x86.sse41.dpps(<4 x float> %a0, <4 x float> %a1, i8 7)
394 %2 = load <4 x float>, <4 x float> *%a2, align 16
395 %3 = call <4 x float> @llvm.x86.sse41.dpps(<4 x float> %1, <4 x float> %2, i8 7)
398 declare <4 x float> @llvm.x86.sse41.dpps(<4 x float>, <4 x float>, i8) nounwind readnone
400 define i32 @test_extractps(<4 x float> %a0, i32 *%a1) {
401 ; GENERIC-LABEL: test_extractps:
403 ; GENERIC-NEXT: extractps $3, %xmm0, %eax # sched: [3:1.00]
404 ; GENERIC-NEXT: extractps $1, %xmm0, (%rdi) # sched: [5:1.00]
405 ; GENERIC-NEXT: retq # sched: [1:1.00]
407 ; SLM-LABEL: test_extractps:
409 ; SLM-NEXT: extractps $3, %xmm0, %eax # sched: [1:1.00]
410 ; SLM-NEXT: extractps $1, %xmm0, (%rdi) # sched: [4:2.00]
411 ; SLM-NEXT: retq # sched: [4:1.00]
413 ; SANDY-LABEL: test_extractps:
415 ; SANDY-NEXT: vextractps $3, %xmm0, %eax # sched: [3:1.00]
416 ; SANDY-NEXT: vextractps $1, %xmm0, (%rdi) # sched: [5:1.00]
417 ; SANDY-NEXT: retq # sched: [1:1.00]
419 ; HASWELL-LABEL: test_extractps:
421 ; HASWELL-NEXT: vextractps $3, %xmm0, %eax # sched: [2:1.00]
422 ; HASWELL-NEXT: vextractps $1, %xmm0, (%rdi) # sched: [1:1.00]
423 ; HASWELL-NEXT: retq # sched: [2:1.00]
425 ; BROADWELL-LABEL: test_extractps:
427 ; BROADWELL-NEXT: vextractps $3, %xmm0, %eax # sched: [2:1.00]
428 ; BROADWELL-NEXT: vextractps $1, %xmm0, (%rdi) # sched: [2:1.00]
429 ; BROADWELL-NEXT: retq # sched: [7:1.00]
431 ; SKYLAKE-LABEL: test_extractps:
433 ; SKYLAKE-NEXT: vextractps $3, %xmm0, %eax # sched: [3:1.00]
434 ; SKYLAKE-NEXT: vextractps $1, %xmm0, (%rdi) # sched: [2:1.00]
435 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
437 ; SKX-LABEL: test_extractps:
439 ; SKX-NEXT: vextractps $3, %xmm0, %eax # sched: [3:1.00]
440 ; SKX-NEXT: vextractps $1, %xmm0, (%rdi) # sched: [2:1.00]
441 ; SKX-NEXT: retq # sched: [7:1.00]
443 ; BTVER2-LABEL: test_extractps:
445 ; BTVER2-NEXT: vextractps $3, %xmm0, %eax # sched: [1:0.50]
446 ; BTVER2-NEXT: vextractps $1, %xmm0, (%rdi) # sched: [6:1.00]
447 ; BTVER2-NEXT: retq # sched: [4:1.00]
449 ; ZNVER1-LABEL: test_extractps:
451 ; ZNVER1-NEXT: vextractps $3, %xmm0, %eax # sched: [2:2.00]
452 ; ZNVER1-NEXT: vextractps $1, %xmm0, (%rdi) # sched: [5:2.50]
453 ; ZNVER1-NEXT: retq # sched: [1:0.50]
454 %1 = extractelement <4 x float> %a0, i32 3
455 %2 = extractelement <4 x float> %a0, i32 1
456 %3 = bitcast float %1 to i32
457 %4 = bitcast float %2 to i32
458 store i32 %4, i32 *%a1
462 define <4 x float> @test_insertps(<4 x float> %a0, <4 x float> %a1, float *%a2) {
463 ; GENERIC-LABEL: test_insertps:
465 ; GENERIC-NEXT: insertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00]
466 ; GENERIC-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [7:1.00]
467 ; GENERIC-NEXT: retq # sched: [1:1.00]
469 ; SLM-LABEL: test_insertps:
471 ; SLM-NEXT: insertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00]
472 ; SLM-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [4:1.00]
473 ; SLM-NEXT: retq # sched: [4:1.00]
475 ; SANDY-LABEL: test_insertps:
477 ; SANDY-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00]
478 ; SANDY-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [7:1.00]
479 ; SANDY-NEXT: retq # sched: [1:1.00]
481 ; HASWELL-LABEL: test_insertps:
483 ; HASWELL-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00]
484 ; HASWELL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [1:1.00]
485 ; HASWELL-NEXT: retq # sched: [2:1.00]
487 ; BROADWELL-LABEL: test_insertps:
489 ; BROADWELL-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00]
490 ; BROADWELL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [6:1.00]
491 ; BROADWELL-NEXT: retq # sched: [7:1.00]
493 ; SKYLAKE-LABEL: test_insertps:
495 ; SKYLAKE-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00]
496 ; SKYLAKE-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [7:1.00]
497 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
499 ; SKX-LABEL: test_insertps:
501 ; SKX-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00]
502 ; SKX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [7:1.00]
503 ; SKX-NEXT: retq # sched: [7:1.00]
505 ; BTVER2-LABEL: test_insertps:
507 ; BTVER2-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:0.50]
508 ; BTVER2-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [6:1.00]
509 ; BTVER2-NEXT: retq # sched: [4:1.00]
511 ; ZNVER1-LABEL: test_insertps:
513 ; ZNVER1-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:0.50]
514 ; ZNVER1-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [8:0.50]
515 ; ZNVER1-NEXT: retq # sched: [1:0.50]
516 %1 = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i8 17)
517 %2 = load float, float *%a2
518 %3 = insertelement <4 x float> %1, float %2, i32 3
521 declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i8) nounwind readnone
523 define <2 x i64> @test_movntdqa(i8* %a0) {
524 ; GENERIC-LABEL: test_movntdqa:
526 ; GENERIC-NEXT: movntdqa (%rdi), %xmm0 # sched: [6:0.50]
527 ; GENERIC-NEXT: retq # sched: [1:1.00]
529 ; SLM-LABEL: test_movntdqa:
531 ; SLM-NEXT: movntdqa (%rdi), %xmm0 # sched: [3:1.00]
532 ; SLM-NEXT: retq # sched: [4:1.00]
534 ; SANDY-LABEL: test_movntdqa:
536 ; SANDY-NEXT: vmovntdqa (%rdi), %xmm0 # sched: [6:0.50]
537 ; SANDY-NEXT: retq # sched: [1:1.00]
539 ; HASWELL-LABEL: test_movntdqa:
541 ; HASWELL-NEXT: vmovntdqa (%rdi), %xmm0 # sched: [1:0.50]
542 ; HASWELL-NEXT: retq # sched: [2:1.00]
544 ; BROADWELL-LABEL: test_movntdqa:
546 ; BROADWELL-NEXT: vmovntdqa (%rdi), %xmm0 # sched: [5:0.50]
547 ; BROADWELL-NEXT: retq # sched: [7:1.00]
549 ; SKYLAKE-LABEL: test_movntdqa:
551 ; SKYLAKE-NEXT: vmovntdqa (%rdi), %xmm0 # sched: [6:0.50]
552 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
554 ; SKX-LABEL: test_movntdqa:
556 ; SKX-NEXT: vmovntdqa (%rdi), %xmm0 # sched: [6:0.50]
557 ; SKX-NEXT: retq # sched: [7:1.00]
559 ; BTVER2-LABEL: test_movntdqa:
561 ; BTVER2-NEXT: vmovntdqa (%rdi), %xmm0 # sched: [5:1.00]
562 ; BTVER2-NEXT: retq # sched: [4:1.00]
564 ; ZNVER1-LABEL: test_movntdqa:
566 ; ZNVER1-NEXT: vmovntdqa (%rdi), %xmm0 # sched: [8:0.50]
567 ; ZNVER1-NEXT: retq # sched: [1:0.50]
568 %1 = call <2 x i64> @llvm.x86.sse41.movntdqa(i8* %a0)
571 declare <2 x i64> @llvm.x86.sse41.movntdqa(i8*) nounwind readnone
573 define <8 x i16> @test_mpsadbw(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
574 ; GENERIC-LABEL: test_mpsadbw:
576 ; GENERIC-NEXT: mpsadbw $7, %xmm1, %xmm0 # sched: [5:1.00]
577 ; GENERIC-NEXT: mpsadbw $7, (%rdi), %xmm0 # sched: [11:1.00]
578 ; GENERIC-NEXT: retq # sched: [1:1.00]
580 ; SLM-LABEL: test_mpsadbw:
582 ; SLM-NEXT: mpsadbw $7, %xmm1, %xmm0 # sched: [7:1.00]
583 ; SLM-NEXT: mpsadbw $7, (%rdi), %xmm0 # sched: [10:1.00]
584 ; SLM-NEXT: retq # sched: [4:1.00]
586 ; SANDY-LABEL: test_mpsadbw:
588 ; SANDY-NEXT: vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
589 ; SANDY-NEXT: vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
590 ; SANDY-NEXT: retq # sched: [1:1.00]
592 ; HASWELL-LABEL: test_mpsadbw:
594 ; HASWELL-NEXT: vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [7:2.00]
595 ; HASWELL-NEXT: vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [7:2.00]
596 ; HASWELL-NEXT: retq # sched: [2:1.00]
598 ; BROADWELL-LABEL: test_mpsadbw:
600 ; BROADWELL-NEXT: vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [7:2.00]
601 ; BROADWELL-NEXT: vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [12:2.00]
602 ; BROADWELL-NEXT: retq # sched: [7:1.00]
604 ; SKYLAKE-LABEL: test_mpsadbw:
606 ; SKYLAKE-NEXT: vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [4:2.00]
607 ; SKYLAKE-NEXT: vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [10:2.00]
608 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
610 ; SKX-LABEL: test_mpsadbw:
612 ; SKX-NEXT: vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [4:2.00]
613 ; SKX-NEXT: vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [10:2.00]
614 ; SKX-NEXT: retq # sched: [7:1.00]
616 ; BTVER2-LABEL: test_mpsadbw:
618 ; BTVER2-NEXT: vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
619 ; BTVER2-NEXT: vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [8:2.00]
620 ; BTVER2-NEXT: retq # sched: [4:1.00]
622 ; ZNVER1-LABEL: test_mpsadbw:
624 ; ZNVER1-NEXT: vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [100:?]
625 ; ZNVER1-NEXT: vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [100:?]
626 ; ZNVER1-NEXT: retq # sched: [1:0.50]
627 %1 = call <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8> %a0, <16 x i8> %a1, i8 7)
628 %2 = bitcast <8 x i16> %1 to <16 x i8>
629 %3 = load <16 x i8>, <16 x i8> *%a2, align 16
630 %4 = call <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8> %2, <16 x i8> %3, i8 7)
633 declare <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8>, <16 x i8>, i8) nounwind readnone
635 define <8 x i16> @test_packusdw(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
636 ; GENERIC-LABEL: test_packusdw:
638 ; GENERIC-NEXT: packusdw %xmm1, %xmm0 # sched: [1:0.50]
639 ; GENERIC-NEXT: packusdw (%rdi), %xmm0 # sched: [7:0.50]
640 ; GENERIC-NEXT: retq # sched: [1:1.00]
642 ; SLM-LABEL: test_packusdw:
644 ; SLM-NEXT: packusdw %xmm1, %xmm0 # sched: [1:1.00]
645 ; SLM-NEXT: packusdw (%rdi), %xmm0 # sched: [4:1.00]
646 ; SLM-NEXT: retq # sched: [4:1.00]
648 ; SANDY-LABEL: test_packusdw:
650 ; SANDY-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
651 ; SANDY-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
652 ; SANDY-NEXT: retq # sched: [1:1.00]
654 ; HASWELL-LABEL: test_packusdw:
656 ; HASWELL-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
657 ; HASWELL-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 # sched: [1:1.00]
658 ; HASWELL-NEXT: retq # sched: [2:1.00]
660 ; BROADWELL-LABEL: test_packusdw:
662 ; BROADWELL-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
663 ; BROADWELL-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
664 ; BROADWELL-NEXT: retq # sched: [7:1.00]
666 ; SKYLAKE-LABEL: test_packusdw:
668 ; SKYLAKE-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
669 ; SKYLAKE-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
670 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
672 ; SKX-LABEL: test_packusdw:
674 ; SKX-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
675 ; SKX-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
676 ; SKX-NEXT: retq # sched: [7:1.00]
678 ; BTVER2-LABEL: test_packusdw:
680 ; BTVER2-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
681 ; BTVER2-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
682 ; BTVER2-NEXT: retq # sched: [4:1.00]
684 ; ZNVER1-LABEL: test_packusdw:
686 ; ZNVER1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
687 ; ZNVER1-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
688 ; ZNVER1-NEXT: retq # sched: [1:0.50]
689 %1 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a0, <4 x i32> %a1)
690 %2 = bitcast <8 x i16> %1 to <4 x i32>
691 %3 = load <4 x i32>, <4 x i32> *%a2, align 16
692 %4 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %2, <4 x i32> %3)
695 declare <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32>, <4 x i32>) nounwind readnone
697 define <16 x i8> @test_pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2, <16 x i8> *%a3) {
698 ; GENERIC-LABEL: test_pblendvb:
700 ; GENERIC-NEXT: movdqa %xmm0, %xmm3 # sched: [1:0.33]
701 ; GENERIC-NEXT: movaps %xmm2, %xmm0 # sched: [1:1.00]
702 ; GENERIC-NEXT: pblendvb %xmm0, %xmm1, %xmm3 # sched: [8:1.00]
703 ; GENERIC-NEXT: pblendvb %xmm0, (%rdi), %xmm3 # sched: [6:1.00]
704 ; GENERIC-NEXT: movdqa %xmm3, %xmm0 # sched: [1:0.33]
705 ; GENERIC-NEXT: retq # sched: [1:1.00]
707 ; SLM-LABEL: test_pblendvb:
709 ; SLM-NEXT: movdqa %xmm0, %xmm3 # sched: [1:0.50]
710 ; SLM-NEXT: movaps %xmm2, %xmm0 # sched: [1:1.00]
711 ; SLM-NEXT: pblendvb %xmm0, %xmm1, %xmm3 # sched: [1:1.00]
712 ; SLM-NEXT: pblendvb %xmm0, (%rdi), %xmm3 # sched: [4:1.00]
713 ; SLM-NEXT: movdqa %xmm3, %xmm0 # sched: [1:0.50]
714 ; SLM-NEXT: retq # sched: [4:1.00]
716 ; SANDY-LABEL: test_pblendvb:
718 ; SANDY-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
719 ; SANDY-NEXT: vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
720 ; SANDY-NEXT: retq # sched: [1:1.00]
722 ; HASWELL-LABEL: test_pblendvb:
724 ; HASWELL-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00]
725 ; HASWELL-NEXT: vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [2:2.00]
726 ; HASWELL-NEXT: retq # sched: [2:1.00]
728 ; BROADWELL-LABEL: test_pblendvb:
730 ; BROADWELL-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00]
731 ; BROADWELL-NEXT: vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:2.00]
732 ; BROADWELL-NEXT: retq # sched: [7:1.00]
734 ; SKYLAKE-LABEL: test_pblendvb:
736 ; SKYLAKE-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:0.67]
737 ; SKYLAKE-NEXT: vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:0.67]
738 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
740 ; SKX-LABEL: test_pblendvb:
742 ; SKX-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:0.67]
743 ; SKX-NEXT: vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:0.67]
744 ; SKX-NEXT: retq # sched: [7:1.00]
746 ; BTVER2-LABEL: test_pblendvb:
748 ; BTVER2-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00]
749 ; BTVER2-NEXT: vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:2.00]
750 ; BTVER2-NEXT: retq # sched: [4:1.00]
752 ; ZNVER1-LABEL: test_pblendvb:
754 ; ZNVER1-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
755 ; ZNVER1-NEXT: vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
756 ; ZNVER1-NEXT: retq # sched: [1:0.50]
757 %1 = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2)
758 %2 = load <16 x i8>, <16 x i8> *%a3, align 16
759 %3 = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %1, <16 x i8> %2, <16 x i8> %a2)
762 declare <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
764 define <8 x i16> @test_pblendw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
765 ; GENERIC-LABEL: test_pblendw:
767 ; GENERIC-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:0.50]
768 ; GENERIC-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],mem[2,3],xmm0[4,5,6],mem[7] sched: [7:0.50]
769 ; GENERIC-NEXT: retq # sched: [1:1.00]
771 ; SLM-LABEL: test_pblendw:
773 ; SLM-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:1.00]
774 ; SLM-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],mem[2,3],xmm0[4,5,6],mem[7] sched: [4:1.00]
775 ; SLM-NEXT: retq # sched: [4:1.00]
777 ; SANDY-LABEL: test_pblendw:
779 ; SANDY-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:0.50]
780 ; SANDY-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],mem[2,3],xmm0[4,5,6],mem[7] sched: [7:0.50]
781 ; SANDY-NEXT: retq # sched: [1:1.00]
783 ; HASWELL-LABEL: test_pblendw:
785 ; HASWELL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:1.00]
786 ; HASWELL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],mem[2,3],xmm0[4,5,6],mem[7] sched: [4:1.00]
787 ; HASWELL-NEXT: retq # sched: [2:1.00]
789 ; BROADWELL-LABEL: test_pblendw:
791 ; BROADWELL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:1.00]
792 ; BROADWELL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],mem[2,3],xmm0[4,5,6],mem[7] sched: [6:1.00]
793 ; BROADWELL-NEXT: retq # sched: [7:1.00]
795 ; SKYLAKE-LABEL: test_pblendw:
797 ; SKYLAKE-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:1.00]
798 ; SKYLAKE-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],mem[2,3],xmm0[4,5,6],mem[7] sched: [7:1.00]
799 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
801 ; SKX-LABEL: test_pblendw:
803 ; SKX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:1.00]
804 ; SKX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],mem[2,3],xmm0[4,5,6],mem[7] sched: [7:1.00]
805 ; SKX-NEXT: retq # sched: [7:1.00]
807 ; BTVER2-LABEL: test_pblendw:
809 ; BTVER2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:0.50]
810 ; BTVER2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],mem[2,3],xmm0[4,5,6],mem[7] sched: [6:1.00]
811 ; BTVER2-NEXT: retq # sched: [4:1.00]
813 ; ZNVER1-LABEL: test_pblendw:
815 ; ZNVER1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:0.33]
816 ; ZNVER1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],mem[2,3],xmm0[4,5,6],mem[7] sched: [8:0.50]
817 ; ZNVER1-NEXT: retq # sched: [1:0.50]
818 %1 = shufflevector <8 x i16> %a0, <8 x i16> %a1, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
819 %2 = load <8 x i16>, <8 x i16> *%a2, align 16
820 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 0, i32 1, i32 10, i32 11, i32 4, i32 5, i32 6, i32 15>
824 define <2 x i64> @test_pcmpeqq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
825 ; GENERIC-LABEL: test_pcmpeqq:
827 ; GENERIC-NEXT: pcmpeqq %xmm1, %xmm0 # sched: [1:0.50]
828 ; GENERIC-NEXT: pcmpeqq (%rdi), %xmm0 # sched: [7:0.50]
829 ; GENERIC-NEXT: retq # sched: [1:1.00]
831 ; SLM-LABEL: test_pcmpeqq:
833 ; SLM-NEXT: pcmpeqq %xmm1, %xmm0 # sched: [1:0.50]
834 ; SLM-NEXT: pcmpeqq (%rdi), %xmm0 # sched: [4:1.00]
835 ; SLM-NEXT: retq # sched: [4:1.00]
837 ; SANDY-LABEL: test_pcmpeqq:
839 ; SANDY-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
840 ; SANDY-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
841 ; SANDY-NEXT: retq # sched: [1:1.00]
843 ; HASWELL-LABEL: test_pcmpeqq:
845 ; HASWELL-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
846 ; HASWELL-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
847 ; HASWELL-NEXT: retq # sched: [2:1.00]
849 ; BROADWELL-LABEL: test_pcmpeqq:
851 ; BROADWELL-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
852 ; BROADWELL-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
853 ; BROADWELL-NEXT: retq # sched: [7:1.00]
855 ; SKYLAKE-LABEL: test_pcmpeqq:
857 ; SKYLAKE-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
858 ; SKYLAKE-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
859 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
861 ; SKX-LABEL: test_pcmpeqq:
863 ; SKX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 # sched: [3:1.00]
864 ; SKX-NEXT: vpmovm2q %k0, %xmm0
865 ; SKX-NEXT: vpcmpeqq (%rdi), %xmm0, %k0 # sched: [9:1.00]
866 ; SKX-NEXT: vpmovm2q %k0, %xmm0
867 ; SKX-NEXT: retq # sched: [7:1.00]
869 ; BTVER2-LABEL: test_pcmpeqq:
871 ; BTVER2-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
872 ; BTVER2-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
873 ; BTVER2-NEXT: retq # sched: [4:1.00]
875 ; ZNVER1-LABEL: test_pcmpeqq:
877 ; ZNVER1-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
878 ; ZNVER1-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
879 ; ZNVER1-NEXT: retq # sched: [1:0.50]
880 %1 = icmp eq <2 x i64> %a0, %a1
881 %2 = sext <2 x i1> %1 to <2 x i64>
882 %3 = load <2 x i64>, <2 x i64>*%a2, align 16
883 %4 = icmp eq <2 x i64> %2, %3
884 %5 = sext <2 x i1> %4 to <2 x i64>
888 define i32 @test_pextrb(<16 x i8> %a0, i8 *%a1) {
889 ; GENERIC-LABEL: test_pextrb:
891 ; GENERIC-NEXT: pextrb $3, %xmm0, %eax # sched: [3:1.00]
892 ; GENERIC-NEXT: pextrb $1, %xmm0, (%rdi) # sched: [5:1.00]
893 ; GENERIC-NEXT: retq # sched: [1:1.00]
895 ; SLM-LABEL: test_pextrb:
897 ; SLM-NEXT: pextrb $3, %xmm0, %eax # sched: [1:1.00]
898 ; SLM-NEXT: pextrb $1, %xmm0, (%rdi) # sched: [4:2.00]
899 ; SLM-NEXT: retq # sched: [4:1.00]
901 ; SANDY-LABEL: test_pextrb:
903 ; SANDY-NEXT: vpextrb $3, %xmm0, %eax # sched: [3:1.00]
904 ; SANDY-NEXT: vpextrb $1, %xmm0, (%rdi) # sched: [5:1.00]
905 ; SANDY-NEXT: retq # sched: [1:1.00]
907 ; HASWELL-LABEL: test_pextrb:
909 ; HASWELL-NEXT: vpextrb $3, %xmm0, %eax # sched: [2:1.00]
910 ; HASWELL-NEXT: vpextrb $1, %xmm0, (%rdi) # sched: [1:1.00]
911 ; HASWELL-NEXT: retq # sched: [2:1.00]
913 ; BROADWELL-LABEL: test_pextrb:
915 ; BROADWELL-NEXT: vpextrb $3, %xmm0, %eax # sched: [2:1.00]
916 ; BROADWELL-NEXT: vpextrb $1, %xmm0, (%rdi) # sched: [2:1.00]
917 ; BROADWELL-NEXT: retq # sched: [7:1.00]
919 ; SKYLAKE-LABEL: test_pextrb:
921 ; SKYLAKE-NEXT: vpextrb $3, %xmm0, %eax # sched: [3:1.00]
922 ; SKYLAKE-NEXT: vpextrb $1, %xmm0, (%rdi) # sched: [2:1.00]
923 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
925 ; SKX-LABEL: test_pextrb:
927 ; SKX-NEXT: vpextrb $3, %xmm0, %eax # sched: [3:1.00]
928 ; SKX-NEXT: vpextrb $1, %xmm0, (%rdi) # sched: [2:1.00]
929 ; SKX-NEXT: retq # sched: [7:1.00]
931 ; BTVER2-LABEL: test_pextrb:
933 ; BTVER2-NEXT: vpextrb $3, %xmm0, %eax # sched: [1:0.50]
934 ; BTVER2-NEXT: vpextrb $1, %xmm0, (%rdi) # sched: [6:1.00]
935 ; BTVER2-NEXT: retq # sched: [4:1.00]
937 ; ZNVER1-LABEL: test_pextrb:
939 ; ZNVER1-NEXT: vpextrb $3, %xmm0, %eax # sched: [1:0.25]
940 ; ZNVER1-NEXT: vpextrb $1, %xmm0, (%rdi) # sched: [8:1.00]
941 ; ZNVER1-NEXT: retq # sched: [1:0.50]
942 %1 = extractelement <16 x i8> %a0, i32 3
943 %2 = extractelement <16 x i8> %a0, i32 1
945 %3 = zext i8 %1 to i32
949 define i32 @test_pextrd(<4 x i32> %a0, i32 *%a1) {
950 ; GENERIC-LABEL: test_pextrd:
952 ; GENERIC-NEXT: paddd %xmm0, %xmm0 # sched: [1:0.50]
953 ; GENERIC-NEXT: pextrd $3, %xmm0, %eax # sched: [3:1.00]
954 ; GENERIC-NEXT: pextrd $1, %xmm0, (%rdi) # sched: [5:1.00]
955 ; GENERIC-NEXT: retq # sched: [1:1.00]
957 ; SLM-LABEL: test_pextrd:
959 ; SLM-NEXT: paddd %xmm0, %xmm0 # sched: [1:0.50]
960 ; SLM-NEXT: pextrd $3, %xmm0, %eax # sched: [1:1.00]
961 ; SLM-NEXT: pextrd $1, %xmm0, (%rdi) # sched: [4:2.00]
962 ; SLM-NEXT: retq # sched: [4:1.00]
964 ; SANDY-LABEL: test_pextrd:
966 ; SANDY-NEXT: vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
967 ; SANDY-NEXT: vpextrd $3, %xmm0, %eax # sched: [3:1.00]
968 ; SANDY-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [5:1.00]
969 ; SANDY-NEXT: retq # sched: [1:1.00]
971 ; HASWELL-LABEL: test_pextrd:
973 ; HASWELL-NEXT: vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
974 ; HASWELL-NEXT: vpextrd $3, %xmm0, %eax # sched: [2:1.00]
975 ; HASWELL-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [1:1.00]
976 ; HASWELL-NEXT: retq # sched: [2:1.00]
978 ; BROADWELL-LABEL: test_pextrd:
980 ; BROADWELL-NEXT: vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
981 ; BROADWELL-NEXT: vpextrd $3, %xmm0, %eax # sched: [2:1.00]
982 ; BROADWELL-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [2:1.00]
983 ; BROADWELL-NEXT: retq # sched: [7:1.00]
985 ; SKYLAKE-LABEL: test_pextrd:
987 ; SKYLAKE-NEXT: vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.33]
988 ; SKYLAKE-NEXT: vpextrd $3, %xmm0, %eax # sched: [3:1.00]
989 ; SKYLAKE-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [2:1.00]
990 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
992 ; SKX-LABEL: test_pextrd:
994 ; SKX-NEXT: vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.33]
995 ; SKX-NEXT: vpextrd $3, %xmm0, %eax # sched: [3:1.00]
996 ; SKX-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [2:1.00]
997 ; SKX-NEXT: retq # sched: [7:1.00]
999 ; BTVER2-LABEL: test_pextrd:
1001 ; BTVER2-NEXT: vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
1002 ; BTVER2-NEXT: vpextrd $3, %xmm0, %eax # sched: [1:0.50]
1003 ; BTVER2-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [6:1.00]
1004 ; BTVER2-NEXT: retq # sched: [4:1.00]
1006 ; ZNVER1-LABEL: test_pextrd:
1008 ; ZNVER1-NEXT: vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.25]
1009 ; ZNVER1-NEXT: vpextrd $3, %xmm0, %eax # sched: [1:0.25]
1010 ; ZNVER1-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [8:1.00]
1011 ; ZNVER1-NEXT: retq # sched: [1:0.50]
1012 %1 = add <4 x i32> %a0, %a0
1013 %2 = extractelement <4 x i32> %1, i32 3
1014 %3 = extractelement <4 x i32> %1, i32 1
1015 store i32 %3, i32 *%a1
1019 define i64 @test_pextrq(<2 x i64> %a0, <2 x i64> %a1, i64 *%a2) {
1020 ; GENERIC-LABEL: test_pextrq:
1022 ; GENERIC-NEXT: pextrq $1, %xmm0, %rax # sched: [3:1.00]
1023 ; GENERIC-NEXT: pextrq $1, %xmm0, (%rdi) # sched: [5:1.00]
1024 ; GENERIC-NEXT: retq # sched: [1:1.00]
1026 ; SLM-LABEL: test_pextrq:
1028 ; SLM-NEXT: pextrq $1, %xmm0, %rax # sched: [1:1.00]
1029 ; SLM-NEXT: pextrq $1, %xmm0, (%rdi) # sched: [4:2.00]
1030 ; SLM-NEXT: retq # sched: [4:1.00]
1032 ; SANDY-LABEL: test_pextrq:
1034 ; SANDY-NEXT: vpextrq $1, %xmm0, %rax # sched: [3:1.00]
1035 ; SANDY-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [5:1.00]
1036 ; SANDY-NEXT: retq # sched: [1:1.00]
1038 ; HASWELL-LABEL: test_pextrq:
1040 ; HASWELL-NEXT: vpextrq $1, %xmm0, %rax # sched: [2:1.00]
1041 ; HASWELL-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [1:1.00]
1042 ; HASWELL-NEXT: retq # sched: [2:1.00]
1044 ; BROADWELL-LABEL: test_pextrq:
1045 ; BROADWELL: # BB#0:
1046 ; BROADWELL-NEXT: vpextrq $1, %xmm0, %rax # sched: [2:1.00]
1047 ; BROADWELL-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [2:1.00]
1048 ; BROADWELL-NEXT: retq # sched: [7:1.00]
1050 ; SKYLAKE-LABEL: test_pextrq:
1052 ; SKYLAKE-NEXT: vpextrq $1, %xmm0, %rax # sched: [3:1.00]
1053 ; SKYLAKE-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [2:1.00]
1054 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
1056 ; SKX-LABEL: test_pextrq:
1058 ; SKX-NEXT: vpextrq $1, %xmm0, %rax # sched: [3:1.00]
1059 ; SKX-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [2:1.00]
1060 ; SKX-NEXT: retq # sched: [7:1.00]
1062 ; BTVER2-LABEL: test_pextrq:
1064 ; BTVER2-NEXT: vpextrq $1, %xmm0, %rax # sched: [1:0.50]
1065 ; BTVER2-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [6:1.00]
1066 ; BTVER2-NEXT: retq # sched: [4:1.00]
1068 ; ZNVER1-LABEL: test_pextrq:
1070 ; ZNVER1-NEXT: vpextrq $1, %xmm0, %rax # sched: [1:0.25]
1071 ; ZNVER1-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [8:1.00]
1072 ; ZNVER1-NEXT: retq # sched: [1:0.50]
1073 %1 = extractelement <2 x i64> %a0, i32 1
1074 %2 = extractelement <2 x i64> %a0, i32 1
1075 store i64 %2, i64 *%a2
1079 define i32 @test_pextrw(<8 x i16> %a0, i16 *%a1) {
1080 ; GENERIC-LABEL: test_pextrw:
1082 ; GENERIC-NEXT: pextrw $3, %xmm0, %eax # sched: [3:1.00]
1083 ; GENERIC-NEXT: pextrw $1, %xmm0, (%rdi) # sched: [5:1.00]
1084 ; GENERIC-NEXT: retq # sched: [1:1.00]
1086 ; SLM-LABEL: test_pextrw:
1088 ; SLM-NEXT: pextrw $3, %xmm0, %eax # sched: [4:1.00]
1089 ; SLM-NEXT: pextrw $1, %xmm0, (%rdi) # sched: [4:2.00]
1090 ; SLM-NEXT: retq # sched: [4:1.00]
1092 ; SANDY-LABEL: test_pextrw:
1094 ; SANDY-NEXT: vpextrw $3, %xmm0, %eax # sched: [3:1.00]
1095 ; SANDY-NEXT: vpextrw $1, %xmm0, (%rdi) # sched: [5:1.00]
1096 ; SANDY-NEXT: retq # sched: [1:1.00]
1098 ; HASWELL-LABEL: test_pextrw:
1100 ; HASWELL-NEXT: vpextrw $3, %xmm0, %eax # sched: [2:1.00]
1101 ; HASWELL-NEXT: vpextrw $1, %xmm0, (%rdi) # sched: [1:1.00]
1102 ; HASWELL-NEXT: retq # sched: [2:1.00]
1104 ; BROADWELL-LABEL: test_pextrw:
1105 ; BROADWELL: # BB#0:
1106 ; BROADWELL-NEXT: vpextrw $3, %xmm0, %eax # sched: [2:1.00]
1107 ; BROADWELL-NEXT: vpextrw $1, %xmm0, (%rdi) # sched: [2:1.00]
1108 ; BROADWELL-NEXT: retq # sched: [7:1.00]
1110 ; SKYLAKE-LABEL: test_pextrw:
1112 ; SKYLAKE-NEXT: vpextrw $3, %xmm0, %eax # sched: [3:1.00]
1113 ; SKYLAKE-NEXT: vpextrw $1, %xmm0, (%rdi) # sched: [2:1.00]
1114 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
1116 ; SKX-LABEL: test_pextrw:
1118 ; SKX-NEXT: vpextrw $3, %xmm0, %eax # sched: [3:1.00]
1119 ; SKX-NEXT: vpextrw $1, %xmm0, (%rdi) # sched: [2:1.00]
1120 ; SKX-NEXT: retq # sched: [7:1.00]
1122 ; BTVER2-LABEL: test_pextrw:
1124 ; BTVER2-NEXT: vpextrw $3, %xmm0, %eax # sched: [1:0.50]
1125 ; BTVER2-NEXT: vpextrw $1, %xmm0, (%rdi) # sched: [6:1.00]
1126 ; BTVER2-NEXT: retq # sched: [4:1.00]
1128 ; ZNVER1-LABEL: test_pextrw:
1130 ; ZNVER1-NEXT: vpextrw $3, %xmm0, %eax # sched: [1:0.25]
1131 ; ZNVER1-NEXT: vpextrw $1, %xmm0, (%rdi) # sched: [8:1.00]
1132 ; ZNVER1-NEXT: retq # sched: [1:0.50]
1133 %1 = extractelement <8 x i16> %a0, i32 3
1134 %2 = extractelement <8 x i16> %a0, i32 1
1135 store i16 %2, i16 *%a1
1136 %3 = zext i16 %1 to i32
1140 define <8 x i16> @test_phminposuw(<8 x i16> *%a0) {
1141 ; GENERIC-LABEL: test_phminposuw:
1143 ; GENERIC-NEXT: phminposuw (%rdi), %xmm0 # sched: [11:1.00]
1144 ; GENERIC-NEXT: phminposuw %xmm0, %xmm0 # sched: [5:1.00]
1145 ; GENERIC-NEXT: retq # sched: [1:1.00]
1147 ; SLM-LABEL: test_phminposuw:
1149 ; SLM-NEXT: phminposuw (%rdi), %xmm0 # sched: [7:1.00]
1150 ; SLM-NEXT: phminposuw %xmm0, %xmm0 # sched: [4:1.00]
1151 ; SLM-NEXT: retq # sched: [4:1.00]
1153 ; SANDY-LABEL: test_phminposuw:
1155 ; SANDY-NEXT: vphminposuw (%rdi), %xmm0 # sched: [11:1.00]
1156 ; SANDY-NEXT: vphminposuw %xmm0, %xmm0 # sched: [5:1.00]
1157 ; SANDY-NEXT: retq # sched: [1:1.00]
1159 ; HASWELL-LABEL: test_phminposuw:
1161 ; HASWELL-NEXT: vphminposuw (%rdi), %xmm0 # sched: [5:1.00]
1162 ; HASWELL-NEXT: vphminposuw %xmm0, %xmm0 # sched: [5:1.00]
1163 ; HASWELL-NEXT: retq # sched: [2:1.00]
1165 ; BROADWELL-LABEL: test_phminposuw:
1166 ; BROADWELL: # BB#0:
1167 ; BROADWELL-NEXT: vphminposuw (%rdi), %xmm0 # sched: [10:1.00]
1168 ; BROADWELL-NEXT: vphminposuw %xmm0, %xmm0 # sched: [5:1.00]
1169 ; BROADWELL-NEXT: retq # sched: [7:1.00]
1171 ; SKYLAKE-LABEL: test_phminposuw:
1173 ; SKYLAKE-NEXT: vphminposuw (%rdi), %xmm0 # sched: [10:0.50]
1174 ; SKYLAKE-NEXT: vphminposuw %xmm0, %xmm0 # sched: [4:0.33]
1175 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
1177 ; SKX-LABEL: test_phminposuw:
1179 ; SKX-NEXT: vphminposuw (%rdi), %xmm0 # sched: [10:0.50]
1180 ; SKX-NEXT: vphminposuw %xmm0, %xmm0 # sched: [4:0.33]
1181 ; SKX-NEXT: retq # sched: [7:1.00]
1183 ; BTVER2-LABEL: test_phminposuw:
1185 ; BTVER2-NEXT: vphminposuw (%rdi), %xmm0 # sched: [7:1.00]
1186 ; BTVER2-NEXT: vphminposuw %xmm0, %xmm0 # sched: [2:1.00]
1187 ; BTVER2-NEXT: retq # sched: [4:1.00]
1189 ; ZNVER1-LABEL: test_phminposuw:
1191 ; ZNVER1-NEXT: vphminposuw (%rdi), %xmm0 # sched: [11:1.00]
1192 ; ZNVER1-NEXT: vphminposuw %xmm0, %xmm0 # sched: [4:1.00]
1193 ; ZNVER1-NEXT: retq # sched: [1:0.50]
1194 %1 = load <8 x i16>, <8 x i16> *%a0, align 16
1195 %2 = call <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16> %1)
1196 %3 = call <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16> %2)
1199 declare <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16>) nounwind readnone
1201 define <16 x i8> @test_pinsrb(<16 x i8> %a0, i8 %a1, i8 *%a2) {
1202 ; GENERIC-LABEL: test_pinsrb:
1204 ; GENERIC-NEXT: pinsrb $1, %edi, %xmm0 # sched: [2:1.00]
1205 ; GENERIC-NEXT: pinsrb $3, (%rsi), %xmm0 # sched: [7:0.50]
1206 ; GENERIC-NEXT: retq # sched: [1:1.00]
1208 ; SLM-LABEL: test_pinsrb:
1210 ; SLM-NEXT: pinsrb $1, %edi, %xmm0 # sched: [1:1.00]
1211 ; SLM-NEXT: pinsrb $3, (%rsi), %xmm0 # sched: [4:1.00]
1212 ; SLM-NEXT: retq # sched: [4:1.00]
1214 ; SANDY-LABEL: test_pinsrb:
1216 ; SANDY-NEXT: vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [2:1.00]
1217 ; SANDY-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [7:0.50]
1218 ; SANDY-NEXT: retq # sched: [1:1.00]
1220 ; HASWELL-LABEL: test_pinsrb:
1222 ; HASWELL-NEXT: vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [2:2.00]
1223 ; HASWELL-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [1:1.00]
1224 ; HASWELL-NEXT: retq # sched: [2:1.00]
1226 ; BROADWELL-LABEL: test_pinsrb:
1227 ; BROADWELL: # BB#0:
1228 ; BROADWELL-NEXT: vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [2:2.00]
1229 ; BROADWELL-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00]
1230 ; BROADWELL-NEXT: retq # sched: [7:1.00]
1232 ; SKYLAKE-LABEL: test_pinsrb:
1234 ; SKYLAKE-NEXT: vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [2:2.00]
1235 ; SKYLAKE-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00]
1236 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
1238 ; SKX-LABEL: test_pinsrb:
1240 ; SKX-NEXT: vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [2:2.00]
1241 ; SKX-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00]
1242 ; SKX-NEXT: retq # sched: [7:1.00]
1244 ; BTVER2-LABEL: test_pinsrb:
1246 ; BTVER2-NEXT: vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [1:0.50]
1247 ; BTVER2-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00]
1248 ; BTVER2-NEXT: retq # sched: [4:1.00]
1250 ; ZNVER1-LABEL: test_pinsrb:
1252 ; ZNVER1-NEXT: vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [1:0.25]
1253 ; ZNVER1-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [8:0.50]
1254 ; ZNVER1-NEXT: retq # sched: [1:0.50]
1255 %1 = insertelement <16 x i8> %a0, i8 %a1, i32 1
1256 %2 = load i8, i8 *%a2
1257 %3 = insertelement <16 x i8> %1, i8 %2, i32 3
1261 define <4 x i32> @test_pinsrd(<4 x i32> %a0, i32 %a1, i32 *%a2) {
1262 ; GENERIC-LABEL: test_pinsrd:
1264 ; GENERIC-NEXT: pinsrd $1, %edi, %xmm0 # sched: [2:1.00]
1265 ; GENERIC-NEXT: pinsrd $3, (%rsi), %xmm0 # sched: [7:0.50]
1266 ; GENERIC-NEXT: retq # sched: [1:1.00]
1268 ; SLM-LABEL: test_pinsrd:
1270 ; SLM-NEXT: pinsrd $1, %edi, %xmm0 # sched: [1:1.00]
1271 ; SLM-NEXT: pinsrd $3, (%rsi), %xmm0 # sched: [4:1.00]
1272 ; SLM-NEXT: retq # sched: [4:1.00]
1274 ; SANDY-LABEL: test_pinsrd:
1276 ; SANDY-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [2:1.00]
1277 ; SANDY-NEXT: vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [7:0.50]
1278 ; SANDY-NEXT: retq # sched: [1:1.00]
1280 ; HASWELL-LABEL: test_pinsrd:
1282 ; HASWELL-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [2:2.00]
1283 ; HASWELL-NEXT: vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [1:1.00]
1284 ; HASWELL-NEXT: retq # sched: [2:1.00]
1286 ; BROADWELL-LABEL: test_pinsrd:
1287 ; BROADWELL: # BB#0:
1288 ; BROADWELL-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [2:2.00]
1289 ; BROADWELL-NEXT: vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00]
1290 ; BROADWELL-NEXT: retq # sched: [7:1.00]
1292 ; SKYLAKE-LABEL: test_pinsrd:
1294 ; SKYLAKE-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [2:2.00]
1295 ; SKYLAKE-NEXT: vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00]
1296 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
1298 ; SKX-LABEL: test_pinsrd:
1300 ; SKX-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [2:2.00]
1301 ; SKX-NEXT: vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00]
1302 ; SKX-NEXT: retq # sched: [7:1.00]
1304 ; BTVER2-LABEL: test_pinsrd:
1306 ; BTVER2-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [1:0.50]
1307 ; BTVER2-NEXT: vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00]
1308 ; BTVER2-NEXT: retq # sched: [4:1.00]
1310 ; ZNVER1-LABEL: test_pinsrd:
1312 ; ZNVER1-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [1:0.25]
1313 ; ZNVER1-NEXT: vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [8:0.50]
1314 ; ZNVER1-NEXT: retq # sched: [1:0.50]
1315 %1 = insertelement <4 x i32> %a0, i32 %a1, i32 1
1316 %2 = load i32, i32 *%a2
1317 %3 = insertelement <4 x i32> %1, i32 %2, i32 3
1321 define <2 x i64> @test_pinsrq(<2 x i64> %a0, <2 x i64> %a1, i64 %a2, i64 *%a3) {
1322 ; GENERIC-LABEL: test_pinsrq:
1324 ; GENERIC-NEXT: pinsrq $1, %rdi, %xmm0 # sched: [2:1.00]
1325 ; GENERIC-NEXT: pinsrq $1, (%rsi), %xmm1 # sched: [7:0.50]
1326 ; GENERIC-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
1327 ; GENERIC-NEXT: retq # sched: [1:1.00]
1329 ; SLM-LABEL: test_pinsrq:
1331 ; SLM-NEXT: pinsrq $1, (%rsi), %xmm1 # sched: [4:1.00]
1332 ; SLM-NEXT: pinsrq $1, %rdi, %xmm0 # sched: [1:1.00]
1333 ; SLM-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
1334 ; SLM-NEXT: retq # sched: [4:1.00]
1336 ; SANDY-LABEL: test_pinsrq:
1338 ; SANDY-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0 # sched: [2:1.00]
1339 ; SANDY-NEXT: vpinsrq $1, (%rsi), %xmm1, %xmm1 # sched: [7:0.50]
1340 ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1341 ; SANDY-NEXT: retq # sched: [1:1.00]
1343 ; HASWELL-LABEL: test_pinsrq:
1345 ; HASWELL-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0 # sched: [2:2.00]
1346 ; HASWELL-NEXT: vpinsrq $1, (%rsi), %xmm1, %xmm1 # sched: [1:1.00]
1347 ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1348 ; HASWELL-NEXT: retq # sched: [2:1.00]
1350 ; BROADWELL-LABEL: test_pinsrq:
1351 ; BROADWELL: # BB#0:
1352 ; BROADWELL-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0 # sched: [2:2.00]
1353 ; BROADWELL-NEXT: vpinsrq $1, (%rsi), %xmm1, %xmm1 # sched: [6:1.00]
1354 ; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1355 ; BROADWELL-NEXT: retq # sched: [7:1.00]
1357 ; SKYLAKE-LABEL: test_pinsrq:
1359 ; SKYLAKE-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0 # sched: [2:2.00]
1360 ; SKYLAKE-NEXT: vpinsrq $1, (%rsi), %xmm1, %xmm1 # sched: [6:1.00]
1361 ; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
1362 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
1364 ; SKX-LABEL: test_pinsrq:
1366 ; SKX-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0 # sched: [2:2.00]
1367 ; SKX-NEXT: vpinsrq $1, (%rsi), %xmm1, %xmm1 # sched: [6:1.00]
1368 ; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
1369 ; SKX-NEXT: retq # sched: [7:1.00]
1371 ; BTVER2-LABEL: test_pinsrq:
1373 ; BTVER2-NEXT: vpinsrq $1, (%rsi), %xmm1, %xmm1 # sched: [6:1.00]
1374 ; BTVER2-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0 # sched: [1:0.50]
1375 ; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1376 ; BTVER2-NEXT: retq # sched: [4:1.00]
1378 ; ZNVER1-LABEL: test_pinsrq:
1380 ; ZNVER1-NEXT: vpinsrq $1, (%rsi), %xmm1, %xmm1 # sched: [8:0.50]
1381 ; ZNVER1-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0 # sched: [1:0.25]
1382 ; ZNVER1-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
1383 ; ZNVER1-NEXT: retq # sched: [1:0.50]
1384 %1 = insertelement <2 x i64> %a0, i64 %a2, i32 1
1385 %2 = load i64, i64 *%a3
1386 %3 = insertelement <2 x i64> %a1, i64 %2, i32 1
1387 %4 = add <2 x i64> %1, %3
1391 define <16 x i8> @test_pmaxsb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
1392 ; GENERIC-LABEL: test_pmaxsb:
1394 ; GENERIC-NEXT: pmaxsb %xmm1, %xmm0 # sched: [1:0.50]
1395 ; GENERIC-NEXT: pmaxsb (%rdi), %xmm0 # sched: [7:0.50]
1396 ; GENERIC-NEXT: retq # sched: [1:1.00]
1398 ; SLM-LABEL: test_pmaxsb:
1400 ; SLM-NEXT: pmaxsb %xmm1, %xmm0 # sched: [1:0.50]
1401 ; SLM-NEXT: pmaxsb (%rdi), %xmm0 # sched: [4:1.00]
1402 ; SLM-NEXT: retq # sched: [4:1.00]
1404 ; SANDY-LABEL: test_pmaxsb:
1406 ; SANDY-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1407 ; SANDY-NEXT: vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
1408 ; SANDY-NEXT: retq # sched: [1:1.00]
1410 ; HASWELL-LABEL: test_pmaxsb:
1412 ; HASWELL-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1413 ; HASWELL-NEXT: vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
1414 ; HASWELL-NEXT: retq # sched: [2:1.00]
1416 ; BROADWELL-LABEL: test_pmaxsb:
1417 ; BROADWELL: # BB#0:
1418 ; BROADWELL-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1419 ; BROADWELL-NEXT: vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
1420 ; BROADWELL-NEXT: retq # sched: [7:1.00]
1422 ; SKYLAKE-LABEL: test_pmaxsb:
1424 ; SKYLAKE-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1425 ; SKYLAKE-NEXT: vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
1426 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
1428 ; SKX-LABEL: test_pmaxsb:
1430 ; SKX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1431 ; SKX-NEXT: vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
1432 ; SKX-NEXT: retq # sched: [7:1.00]
1434 ; BTVER2-LABEL: test_pmaxsb:
1436 ; BTVER2-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1437 ; BTVER2-NEXT: vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
1438 ; BTVER2-NEXT: retq # sched: [4:1.00]
1440 ; ZNVER1-LABEL: test_pmaxsb:
1442 ; ZNVER1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
1443 ; ZNVER1-NEXT: vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
1444 ; ZNVER1-NEXT: retq # sched: [1:0.50]
1445 %1 = call <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8> %a0, <16 x i8> %a1)
1446 %2 = load <16 x i8>, <16 x i8> *%a2, align 16
1447 %3 = call <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8> %1, <16 x i8> %2)
1450 declare <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8>, <16 x i8>) nounwind readnone
1452 define <4 x i32> @test_pmaxsd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
1453 ; GENERIC-LABEL: test_pmaxsd:
1455 ; GENERIC-NEXT: pmaxsd %xmm1, %xmm0 # sched: [1:0.50]
1456 ; GENERIC-NEXT: pmaxsd (%rdi), %xmm0 # sched: [7:0.50]
1457 ; GENERIC-NEXT: retq # sched: [1:1.00]
1459 ; SLM-LABEL: test_pmaxsd:
1461 ; SLM-NEXT: pmaxsd %xmm1, %xmm0 # sched: [1:0.50]
1462 ; SLM-NEXT: pmaxsd (%rdi), %xmm0 # sched: [4:1.00]
1463 ; SLM-NEXT: retq # sched: [4:1.00]
1465 ; SANDY-LABEL: test_pmaxsd:
1467 ; SANDY-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1468 ; SANDY-NEXT: vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
1469 ; SANDY-NEXT: retq # sched: [1:1.00]
1471 ; HASWELL-LABEL: test_pmaxsd:
1473 ; HASWELL-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1474 ; HASWELL-NEXT: vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
1475 ; HASWELL-NEXT: retq # sched: [2:1.00]
1477 ; BROADWELL-LABEL: test_pmaxsd:
1478 ; BROADWELL: # BB#0:
1479 ; BROADWELL-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1480 ; BROADWELL-NEXT: vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
1481 ; BROADWELL-NEXT: retq # sched: [7:1.00]
1483 ; SKYLAKE-LABEL: test_pmaxsd:
1485 ; SKYLAKE-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1486 ; SKYLAKE-NEXT: vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
1487 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
1489 ; SKX-LABEL: test_pmaxsd:
1491 ; SKX-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1492 ; SKX-NEXT: vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
1493 ; SKX-NEXT: retq # sched: [7:1.00]
1495 ; BTVER2-LABEL: test_pmaxsd:
1497 ; BTVER2-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1498 ; BTVER2-NEXT: vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
1499 ; BTVER2-NEXT: retq # sched: [4:1.00]
1501 ; ZNVER1-LABEL: test_pmaxsd:
1503 ; ZNVER1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
1504 ; ZNVER1-NEXT: vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
1505 ; ZNVER1-NEXT: retq # sched: [1:0.50]
1506 %1 = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %a0, <4 x i32> %a1)
1507 %2 = load <4 x i32>, <4 x i32> *%a2, align 16
1508 %3 = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %1, <4 x i32> %2)
1511 declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>) nounwind readnone
1513 define <4 x i32> @test_pmaxud(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
1514 ; GENERIC-LABEL: test_pmaxud:
1516 ; GENERIC-NEXT: pmaxud %xmm1, %xmm0 # sched: [1:0.50]
1517 ; GENERIC-NEXT: pmaxud (%rdi), %xmm0 # sched: [7:0.50]
1518 ; GENERIC-NEXT: retq # sched: [1:1.00]
1520 ; SLM-LABEL: test_pmaxud:
1522 ; SLM-NEXT: pmaxud %xmm1, %xmm0 # sched: [1:0.50]
1523 ; SLM-NEXT: pmaxud (%rdi), %xmm0 # sched: [4:1.00]
1524 ; SLM-NEXT: retq # sched: [4:1.00]
1526 ; SANDY-LABEL: test_pmaxud:
1528 ; SANDY-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1529 ; SANDY-NEXT: vpmaxud (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
1530 ; SANDY-NEXT: retq # sched: [1:1.00]
1532 ; HASWELL-LABEL: test_pmaxud:
1534 ; HASWELL-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1535 ; HASWELL-NEXT: vpmaxud (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
1536 ; HASWELL-NEXT: retq # sched: [2:1.00]
1538 ; BROADWELL-LABEL: test_pmaxud:
1539 ; BROADWELL: # BB#0:
1540 ; BROADWELL-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1541 ; BROADWELL-NEXT: vpmaxud (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
1542 ; BROADWELL-NEXT: retq # sched: [7:1.00]
1544 ; SKYLAKE-LABEL: test_pmaxud:
1546 ; SKYLAKE-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1547 ; SKYLAKE-NEXT: vpmaxud (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
1548 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
1550 ; SKX-LABEL: test_pmaxud:
1552 ; SKX-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1553 ; SKX-NEXT: vpmaxud (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
1554 ; SKX-NEXT: retq # sched: [7:1.00]
1556 ; BTVER2-LABEL: test_pmaxud:
1558 ; BTVER2-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1559 ; BTVER2-NEXT: vpmaxud (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
1560 ; BTVER2-NEXT: retq # sched: [4:1.00]
1562 ; ZNVER1-LABEL: test_pmaxud:
1564 ; ZNVER1-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
1565 ; ZNVER1-NEXT: vpmaxud (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
1566 ; ZNVER1-NEXT: retq # sched: [1:0.50]
1567 %1 = call <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32> %a0, <4 x i32> %a1)
1568 %2 = load <4 x i32>, <4 x i32> *%a2, align 16
1569 %3 = call <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32> %1, <4 x i32> %2)
1572 declare <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32>, <4 x i32>) nounwind readnone
1574 define <8 x i16> @test_pmaxuw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
1575 ; GENERIC-LABEL: test_pmaxuw:
1577 ; GENERIC-NEXT: pmaxuw %xmm1, %xmm0 # sched: [1:0.50]
1578 ; GENERIC-NEXT: pmaxuw (%rdi), %xmm0 # sched: [7:0.50]
1579 ; GENERIC-NEXT: retq # sched: [1:1.00]
1581 ; SLM-LABEL: test_pmaxuw:
1583 ; SLM-NEXT: pmaxuw %xmm1, %xmm0 # sched: [1:0.50]
1584 ; SLM-NEXT: pmaxuw (%rdi), %xmm0 # sched: [4:1.00]
1585 ; SLM-NEXT: retq # sched: [4:1.00]
1587 ; SANDY-LABEL: test_pmaxuw:
1589 ; SANDY-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1590 ; SANDY-NEXT: vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
1591 ; SANDY-NEXT: retq # sched: [1:1.00]
1593 ; HASWELL-LABEL: test_pmaxuw:
1595 ; HASWELL-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1596 ; HASWELL-NEXT: vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
1597 ; HASWELL-NEXT: retq # sched: [2:1.00]
1599 ; BROADWELL-LABEL: test_pmaxuw:
1600 ; BROADWELL: # BB#0:
1601 ; BROADWELL-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1602 ; BROADWELL-NEXT: vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
1603 ; BROADWELL-NEXT: retq # sched: [7:1.00]
1605 ; SKYLAKE-LABEL: test_pmaxuw:
1607 ; SKYLAKE-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1608 ; SKYLAKE-NEXT: vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
1609 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
1611 ; SKX-LABEL: test_pmaxuw:
1613 ; SKX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1614 ; SKX-NEXT: vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
1615 ; SKX-NEXT: retq # sched: [7:1.00]
1617 ; BTVER2-LABEL: test_pmaxuw:
1619 ; BTVER2-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1620 ; BTVER2-NEXT: vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
1621 ; BTVER2-NEXT: retq # sched: [4:1.00]
1623 ; ZNVER1-LABEL: test_pmaxuw:
1625 ; ZNVER1-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
1626 ; ZNVER1-NEXT: vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
1627 ; ZNVER1-NEXT: retq # sched: [1:0.50]
1628 %1 = call <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16> %a0, <8 x i16> %a1)
1629 %2 = load <8 x i16>, <8 x i16> *%a2, align 16
1630 %3 = call <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16> %1, <8 x i16> %2)
1633 declare <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16>, <8 x i16>) nounwind readnone
1635 define <16 x i8> @test_pminsb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
1636 ; GENERIC-LABEL: test_pminsb:
1638 ; GENERIC-NEXT: pminsb %xmm1, %xmm0 # sched: [1:0.50]
1639 ; GENERIC-NEXT: pminsb (%rdi), %xmm0 # sched: [7:0.50]
1640 ; GENERIC-NEXT: retq # sched: [1:1.00]
1642 ; SLM-LABEL: test_pminsb:
1644 ; SLM-NEXT: pminsb %xmm1, %xmm0 # sched: [1:0.50]
1645 ; SLM-NEXT: pminsb (%rdi), %xmm0 # sched: [4:1.00]
1646 ; SLM-NEXT: retq # sched: [4:1.00]
1648 ; SANDY-LABEL: test_pminsb:
1650 ; SANDY-NEXT: vpminsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1651 ; SANDY-NEXT: vpminsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
1652 ; SANDY-NEXT: retq # sched: [1:1.00]
1654 ; HASWELL-LABEL: test_pminsb:
1656 ; HASWELL-NEXT: vpminsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1657 ; HASWELL-NEXT: vpminsb (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
1658 ; HASWELL-NEXT: retq # sched: [2:1.00]
1660 ; BROADWELL-LABEL: test_pminsb:
1661 ; BROADWELL: # BB#0:
1662 ; BROADWELL-NEXT: vpminsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1663 ; BROADWELL-NEXT: vpminsb (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
1664 ; BROADWELL-NEXT: retq # sched: [7:1.00]
1666 ; SKYLAKE-LABEL: test_pminsb:
1668 ; SKYLAKE-NEXT: vpminsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1669 ; SKYLAKE-NEXT: vpminsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
1670 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
1672 ; SKX-LABEL: test_pminsb:
1674 ; SKX-NEXT: vpminsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1675 ; SKX-NEXT: vpminsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
1676 ; SKX-NEXT: retq # sched: [7:1.00]
1678 ; BTVER2-LABEL: test_pminsb:
1680 ; BTVER2-NEXT: vpminsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1681 ; BTVER2-NEXT: vpminsb (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
1682 ; BTVER2-NEXT: retq # sched: [4:1.00]
1684 ; ZNVER1-LABEL: test_pminsb:
1686 ; ZNVER1-NEXT: vpminsb %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
1687 ; ZNVER1-NEXT: vpminsb (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
1688 ; ZNVER1-NEXT: retq # sched: [1:0.50]
1689 %1 = call <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8> %a0, <16 x i8> %a1)
1690 %2 = load <16 x i8>, <16 x i8> *%a2, align 16
1691 %3 = call <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8> %1, <16 x i8> %2)
1694 declare <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8>, <16 x i8>) nounwind readnone
1696 define <4 x i32> @test_pminsd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
1697 ; GENERIC-LABEL: test_pminsd:
1699 ; GENERIC-NEXT: pminsd %xmm1, %xmm0 # sched: [1:0.50]
1700 ; GENERIC-NEXT: pminsd (%rdi), %xmm0 # sched: [7:0.50]
1701 ; GENERIC-NEXT: retq # sched: [1:1.00]
1703 ; SLM-LABEL: test_pminsd:
1705 ; SLM-NEXT: pminsd %xmm1, %xmm0 # sched: [1:0.50]
1706 ; SLM-NEXT: pminsd (%rdi), %xmm0 # sched: [4:1.00]
1707 ; SLM-NEXT: retq # sched: [4:1.00]
1709 ; SANDY-LABEL: test_pminsd:
1711 ; SANDY-NEXT: vpminsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1712 ; SANDY-NEXT: vpminsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
1713 ; SANDY-NEXT: retq # sched: [1:1.00]
1715 ; HASWELL-LABEL: test_pminsd:
1717 ; HASWELL-NEXT: vpminsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1718 ; HASWELL-NEXT: vpminsd (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
1719 ; HASWELL-NEXT: retq # sched: [2:1.00]
1721 ; BROADWELL-LABEL: test_pminsd:
1722 ; BROADWELL: # BB#0:
1723 ; BROADWELL-NEXT: vpminsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1724 ; BROADWELL-NEXT: vpminsd (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
1725 ; BROADWELL-NEXT: retq # sched: [7:1.00]
1727 ; SKYLAKE-LABEL: test_pminsd:
1729 ; SKYLAKE-NEXT: vpminsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1730 ; SKYLAKE-NEXT: vpminsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
1731 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
1733 ; SKX-LABEL: test_pminsd:
1735 ; SKX-NEXT: vpminsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1736 ; SKX-NEXT: vpminsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
1737 ; SKX-NEXT: retq # sched: [7:1.00]
1739 ; BTVER2-LABEL: test_pminsd:
1741 ; BTVER2-NEXT: vpminsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1742 ; BTVER2-NEXT: vpminsd (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
1743 ; BTVER2-NEXT: retq # sched: [4:1.00]
1745 ; ZNVER1-LABEL: test_pminsd:
1747 ; ZNVER1-NEXT: vpminsd %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
1748 ; ZNVER1-NEXT: vpminsd (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
1749 ; ZNVER1-NEXT: retq # sched: [1:0.50]
1750 %1 = call <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32> %a0, <4 x i32> %a1)
1751 %2 = load <4 x i32>, <4 x i32> *%a2, align 16
1752 %3 = call <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32> %1, <4 x i32> %2)
1755 declare <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32>, <4 x i32>) nounwind readnone
1757 define <4 x i32> @test_pminud(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
1758 ; GENERIC-LABEL: test_pminud:
1760 ; GENERIC-NEXT: pminud %xmm1, %xmm0 # sched: [1:0.50]
1761 ; GENERIC-NEXT: pminud (%rdi), %xmm0 # sched: [7:0.50]
1762 ; GENERIC-NEXT: retq # sched: [1:1.00]
1764 ; SLM-LABEL: test_pminud:
1766 ; SLM-NEXT: pminud %xmm1, %xmm0 # sched: [1:0.50]
1767 ; SLM-NEXT: pminud (%rdi), %xmm0 # sched: [4:1.00]
1768 ; SLM-NEXT: retq # sched: [4:1.00]
1770 ; SANDY-LABEL: test_pminud:
1772 ; SANDY-NEXT: vpminud %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1773 ; SANDY-NEXT: vpminud (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
1774 ; SANDY-NEXT: retq # sched: [1:1.00]
1776 ; HASWELL-LABEL: test_pminud:
1778 ; HASWELL-NEXT: vpminud %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1779 ; HASWELL-NEXT: vpminud (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
1780 ; HASWELL-NEXT: retq # sched: [2:1.00]
1782 ; BROADWELL-LABEL: test_pminud:
1783 ; BROADWELL: # BB#0:
1784 ; BROADWELL-NEXT: vpminud %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1785 ; BROADWELL-NEXT: vpminud (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
1786 ; BROADWELL-NEXT: retq # sched: [7:1.00]
1788 ; SKYLAKE-LABEL: test_pminud:
1790 ; SKYLAKE-NEXT: vpminud %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1791 ; SKYLAKE-NEXT: vpminud (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
1792 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
1794 ; SKX-LABEL: test_pminud:
1796 ; SKX-NEXT: vpminud %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1797 ; SKX-NEXT: vpminud (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
1798 ; SKX-NEXT: retq # sched: [7:1.00]
1800 ; BTVER2-LABEL: test_pminud:
1802 ; BTVER2-NEXT: vpminud %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1803 ; BTVER2-NEXT: vpminud (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
1804 ; BTVER2-NEXT: retq # sched: [4:1.00]
1806 ; ZNVER1-LABEL: test_pminud:
1808 ; ZNVER1-NEXT: vpminud %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
1809 ; ZNVER1-NEXT: vpminud (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
1810 ; ZNVER1-NEXT: retq # sched: [1:0.50]
1811 %1 = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %a0, <4 x i32> %a1)
1812 %2 = load <4 x i32>, <4 x i32> *%a2, align 16
1813 %3 = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %1, <4 x i32> %2)
1816 declare <4 x i32> @llvm.x86.sse41.pminud(<4 x i32>, <4 x i32>) nounwind readnone
1818 define <8 x i16> @test_pminuw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
1819 ; GENERIC-LABEL: test_pminuw:
1821 ; GENERIC-NEXT: pminuw %xmm1, %xmm0 # sched: [1:0.50]
1822 ; GENERIC-NEXT: pminuw (%rdi), %xmm0 # sched: [7:0.50]
1823 ; GENERIC-NEXT: retq # sched: [1:1.00]
1825 ; SLM-LABEL: test_pminuw:
1827 ; SLM-NEXT: pminuw %xmm1, %xmm0 # sched: [1:0.50]
1828 ; SLM-NEXT: pminuw (%rdi), %xmm0 # sched: [4:1.00]
1829 ; SLM-NEXT: retq # sched: [4:1.00]
1831 ; SANDY-LABEL: test_pminuw:
1833 ; SANDY-NEXT: vpminuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1834 ; SANDY-NEXT: vpminuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
1835 ; SANDY-NEXT: retq # sched: [1:1.00]
1837 ; HASWELL-LABEL: test_pminuw:
1839 ; HASWELL-NEXT: vpminuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1840 ; HASWELL-NEXT: vpminuw (%rdi), %xmm0, %xmm0 # sched: [1:0.50]
1841 ; HASWELL-NEXT: retq # sched: [2:1.00]
1843 ; BROADWELL-LABEL: test_pminuw:
1844 ; BROADWELL: # BB#0:
1845 ; BROADWELL-NEXT: vpminuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1846 ; BROADWELL-NEXT: vpminuw (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
1847 ; BROADWELL-NEXT: retq # sched: [7:1.00]
1849 ; SKYLAKE-LABEL: test_pminuw:
1851 ; SKYLAKE-NEXT: vpminuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1852 ; SKYLAKE-NEXT: vpminuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
1853 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
1855 ; SKX-LABEL: test_pminuw:
1857 ; SKX-NEXT: vpminuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1858 ; SKX-NEXT: vpminuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
1859 ; SKX-NEXT: retq # sched: [7:1.00]
1861 ; BTVER2-LABEL: test_pminuw:
1863 ; BTVER2-NEXT: vpminuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1864 ; BTVER2-NEXT: vpminuw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
1865 ; BTVER2-NEXT: retq # sched: [4:1.00]
1867 ; ZNVER1-LABEL: test_pminuw:
1869 ; ZNVER1-NEXT: vpminuw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
1870 ; ZNVER1-NEXT: vpminuw (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
1871 ; ZNVER1-NEXT: retq # sched: [1:0.50]
1872 %1 = call <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16> %a0, <8 x i16> %a1)
1873 %2 = load <8 x i16>, <8 x i16> *%a2, align 16
1874 %3 = call <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16> %1, <8 x i16> %2)
1877 declare <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16>, <8 x i16>) nounwind readnone
1879 define <8 x i16> @test_pmovsxbw(<16 x i8> %a0, <8 x i8> *%a1) {
1880 ; GENERIC-LABEL: test_pmovsxbw:
1882 ; GENERIC-NEXT: pmovsxbw %xmm0, %xmm1 # sched: [1:0.50]
1883 ; GENERIC-NEXT: pmovsxbw (%rdi), %xmm0 # sched: [7:0.50]
1884 ; GENERIC-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50]
1885 ; GENERIC-NEXT: retq # sched: [1:1.00]
1887 ; SLM-LABEL: test_pmovsxbw:
1889 ; SLM-NEXT: pmovsxbw (%rdi), %xmm1 # sched: [4:1.00]
1890 ; SLM-NEXT: pmovsxbw %xmm0, %xmm0 # sched: [1:1.00]
1891 ; SLM-NEXT: paddw %xmm0, %xmm1 # sched: [1:0.50]
1892 ; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50]
1893 ; SLM-NEXT: retq # sched: [4:1.00]
1895 ; SANDY-LABEL: test_pmovsxbw:
1897 ; SANDY-NEXT: vpmovsxbw %xmm0, %xmm0 # sched: [1:0.50]
1898 ; SANDY-NEXT: vpmovsxbw (%rdi), %xmm1 # sched: [7:0.50]
1899 ; SANDY-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1900 ; SANDY-NEXT: retq # sched: [1:1.00]
1902 ; HASWELL-LABEL: test_pmovsxbw:
1904 ; HASWELL-NEXT: vpmovsxbw %xmm0, %xmm0 # sched: [1:1.00]
1905 ; HASWELL-NEXT: vpmovsxbw (%rdi), %xmm1 # sched: [1:1.00]
1906 ; HASWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1907 ; HASWELL-NEXT: retq # sched: [2:1.00]
1909 ; BROADWELL-LABEL: test_pmovsxbw:
1910 ; BROADWELL: # BB#0:
1911 ; BROADWELL-NEXT: vpmovsxbw %xmm0, %xmm0 # sched: [1:1.00]
1912 ; BROADWELL-NEXT: vpmovsxbw (%rdi), %xmm1 # sched: [6:1.00]
1913 ; BROADWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1914 ; BROADWELL-NEXT: retq # sched: [7:1.00]
1916 ; SKYLAKE-LABEL: test_pmovsxbw:
1918 ; SKYLAKE-NEXT: vpmovsxbw %xmm0, %xmm0 # sched: [1:1.00]
1919 ; SKYLAKE-NEXT: vpmovsxbw (%rdi), %xmm1 # sched: [6:1.00]
1920 ; SKYLAKE-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
1921 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
1923 ; SKX-LABEL: test_pmovsxbw:
1925 ; SKX-NEXT: vpmovsxbw %xmm0, %xmm0 # sched: [1:1.00]
1926 ; SKX-NEXT: vpmovsxbw (%rdi), %xmm1 # sched: [6:1.00]
1927 ; SKX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
1928 ; SKX-NEXT: retq # sched: [7:1.00]
1930 ; BTVER2-LABEL: test_pmovsxbw:
1932 ; BTVER2-NEXT: vpmovsxbw (%rdi), %xmm1 # sched: [6:1.00]
1933 ; BTVER2-NEXT: vpmovsxbw %xmm0, %xmm0 # sched: [1:0.50]
1934 ; BTVER2-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1935 ; BTVER2-NEXT: retq # sched: [4:1.00]
1937 ; ZNVER1-LABEL: test_pmovsxbw:
1939 ; ZNVER1-NEXT: vpmovsxbw (%rdi), %xmm1 # sched: [8:0.50]
1940 ; ZNVER1-NEXT: vpmovsxbw %xmm0, %xmm0 # sched: [1:0.25]
1941 ; ZNVER1-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
1942 ; ZNVER1-NEXT: retq # sched: [1:0.50]
1943 %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1944 %2 = sext <8 x i8> %1 to <8 x i16>
1945 %3 = load <8 x i8>, <8 x i8>* %a1, align 1
1946 %4 = sext <8 x i8> %3 to <8 x i16>
1947 %5 = add <8 x i16> %2, %4
1951 define <4 x i32> @test_pmovsxbd(<16 x i8> %a0, <4 x i8> *%a1) {
1952 ; GENERIC-LABEL: test_pmovsxbd:
1954 ; GENERIC-NEXT: pmovsxbd %xmm0, %xmm1 # sched: [1:0.50]
1955 ; GENERIC-NEXT: pmovsxbd (%rdi), %xmm0 # sched: [7:0.50]
1956 ; GENERIC-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
1957 ; GENERIC-NEXT: retq # sched: [1:1.00]
1959 ; SLM-LABEL: test_pmovsxbd:
1961 ; SLM-NEXT: pmovsxbd (%rdi), %xmm1 # sched: [4:1.00]
1962 ; SLM-NEXT: pmovsxbd %xmm0, %xmm0 # sched: [1:1.00]
1963 ; SLM-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50]
1964 ; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50]
1965 ; SLM-NEXT: retq # sched: [4:1.00]
1967 ; SANDY-LABEL: test_pmovsxbd:
1969 ; SANDY-NEXT: vpmovsxbd %xmm0, %xmm0 # sched: [1:0.50]
1970 ; SANDY-NEXT: vpmovsxbd (%rdi), %xmm1 # sched: [7:0.50]
1971 ; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1972 ; SANDY-NEXT: retq # sched: [1:1.00]
1974 ; HASWELL-LABEL: test_pmovsxbd:
1976 ; HASWELL-NEXT: vpmovsxbd %xmm0, %xmm0 # sched: [1:1.00]
1977 ; HASWELL-NEXT: vpmovsxbd (%rdi), %xmm1 # sched: [1:1.00]
1978 ; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1979 ; HASWELL-NEXT: retq # sched: [2:1.00]
1981 ; BROADWELL-LABEL: test_pmovsxbd:
1982 ; BROADWELL: # BB#0:
1983 ; BROADWELL-NEXT: vpmovsxbd %xmm0, %xmm0 # sched: [1:1.00]
1984 ; BROADWELL-NEXT: vpmovsxbd (%rdi), %xmm1 # sched: [6:1.00]
1985 ; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
1986 ; BROADWELL-NEXT: retq # sched: [7:1.00]
1988 ; SKYLAKE-LABEL: test_pmovsxbd:
1990 ; SKYLAKE-NEXT: vpmovsxbd %xmm0, %xmm0 # sched: [1:1.00]
1991 ; SKYLAKE-NEXT: vpmovsxbd (%rdi), %xmm1 # sched: [6:1.00]
1992 ; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
1993 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
1995 ; SKX-LABEL: test_pmovsxbd:
1997 ; SKX-NEXT: vpmovsxbd %xmm0, %xmm0 # sched: [1:1.00]
1998 ; SKX-NEXT: vpmovsxbd (%rdi), %xmm1 # sched: [6:1.00]
1999 ; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
2000 ; SKX-NEXT: retq # sched: [7:1.00]
2002 ; BTVER2-LABEL: test_pmovsxbd:
2004 ; BTVER2-NEXT: vpmovsxbd (%rdi), %xmm1 # sched: [6:1.00]
2005 ; BTVER2-NEXT: vpmovsxbd %xmm0, %xmm0 # sched: [1:0.50]
2006 ; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
2007 ; BTVER2-NEXT: retq # sched: [4:1.00]
2009 ; ZNVER1-LABEL: test_pmovsxbd:
2011 ; ZNVER1-NEXT: vpmovsxbd (%rdi), %xmm1 # sched: [8:0.50]
2012 ; ZNVER1-NEXT: vpmovsxbd %xmm0, %xmm0 # sched: [1:0.25]
2013 ; ZNVER1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
2014 ; ZNVER1-NEXT: retq # sched: [1:0.50]
2015 %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2016 %2 = sext <4 x i8> %1 to <4 x i32>
2017 %3 = load <4 x i8>, <4 x i8>* %a1, align 1
2018 %4 = sext <4 x i8> %3 to <4 x i32>
2019 %5 = add <4 x i32> %2, %4
2023 define <2 x i64> @test_pmovsxbq(<16 x i8> %a0, <2 x i8> *%a1) {
2024 ; GENERIC-LABEL: test_pmovsxbq:
2026 ; GENERIC-NEXT: pmovsxbq %xmm0, %xmm1 # sched: [1:0.50]
2027 ; GENERIC-NEXT: pmovsxbq (%rdi), %xmm0 # sched: [7:0.50]
2028 ; GENERIC-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
2029 ; GENERIC-NEXT: retq # sched: [1:1.00]
2031 ; SLM-LABEL: test_pmovsxbq:
2033 ; SLM-NEXT: pmovsxbq (%rdi), %xmm1 # sched: [4:1.00]
2034 ; SLM-NEXT: pmovsxbq %xmm0, %xmm0 # sched: [1:1.00]
2035 ; SLM-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50]
2036 ; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50]
2037 ; SLM-NEXT: retq # sched: [4:1.00]
2039 ; SANDY-LABEL: test_pmovsxbq:
2041 ; SANDY-NEXT: vpmovsxbq %xmm0, %xmm0 # sched: [1:0.50]
2042 ; SANDY-NEXT: vpmovsxbq (%rdi), %xmm1 # sched: [7:0.50]
2043 ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
2044 ; SANDY-NEXT: retq # sched: [1:1.00]
2046 ; HASWELL-LABEL: test_pmovsxbq:
2048 ; HASWELL-NEXT: vpmovsxbq %xmm0, %xmm0 # sched: [1:1.00]
2049 ; HASWELL-NEXT: vpmovsxbq (%rdi), %xmm1 # sched: [1:1.00]
2050 ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
2051 ; HASWELL-NEXT: retq # sched: [2:1.00]
2053 ; BROADWELL-LABEL: test_pmovsxbq:
2054 ; BROADWELL: # BB#0:
2055 ; BROADWELL-NEXT: vpmovsxbq %xmm0, %xmm0 # sched: [1:1.00]
2056 ; BROADWELL-NEXT: vpmovsxbq (%rdi), %xmm1 # sched: [6:1.00]
2057 ; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
2058 ; BROADWELL-NEXT: retq # sched: [7:1.00]
2060 ; SKYLAKE-LABEL: test_pmovsxbq:
2062 ; SKYLAKE-NEXT: vpmovsxbq %xmm0, %xmm0 # sched: [1:1.00]
2063 ; SKYLAKE-NEXT: vpmovsxbq (%rdi), %xmm1 # sched: [6:1.00]
2064 ; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
2065 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
2067 ; SKX-LABEL: test_pmovsxbq:
2069 ; SKX-NEXT: vpmovsxbq %xmm0, %xmm0 # sched: [1:1.00]
2070 ; SKX-NEXT: vpmovsxbq (%rdi), %xmm1 # sched: [6:1.00]
2071 ; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
2072 ; SKX-NEXT: retq # sched: [7:1.00]
2074 ; BTVER2-LABEL: test_pmovsxbq:
2076 ; BTVER2-NEXT: vpmovsxbq (%rdi), %xmm1 # sched: [6:1.00]
2077 ; BTVER2-NEXT: vpmovsxbq %xmm0, %xmm0 # sched: [1:0.50]
2078 ; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
2079 ; BTVER2-NEXT: retq # sched: [4:1.00]
2081 ; ZNVER1-LABEL: test_pmovsxbq:
2083 ; ZNVER1-NEXT: vpmovsxbq (%rdi), %xmm1 # sched: [8:0.50]
2084 ; ZNVER1-NEXT: vpmovsxbq %xmm0, %xmm0 # sched: [1:0.25]
2085 ; ZNVER1-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
2086 ; ZNVER1-NEXT: retq # sched: [1:0.50]
2087 %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <2 x i32> <i32 0, i32 1>
2088 %2 = sext <2 x i8> %1 to <2 x i64>
2089 %3 = load <2 x i8>, <2 x i8>* %a1, align 1
2090 %4 = sext <2 x i8> %3 to <2 x i64>
2091 %5 = add <2 x i64> %2, %4
2095 define <2 x i64> @test_pmovsxdq(<4 x i32> %a0, <2 x i32> *%a1) {
2096 ; GENERIC-LABEL: test_pmovsxdq:
2098 ; GENERIC-NEXT: pmovsxdq %xmm0, %xmm1 # sched: [1:0.50]
2099 ; GENERIC-NEXT: pmovsxdq (%rdi), %xmm0 # sched: [7:0.50]
2100 ; GENERIC-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
2101 ; GENERIC-NEXT: retq # sched: [1:1.00]
2103 ; SLM-LABEL: test_pmovsxdq:
2105 ; SLM-NEXT: pmovsxdq (%rdi), %xmm1 # sched: [4:1.00]
2106 ; SLM-NEXT: pmovsxdq %xmm0, %xmm0 # sched: [1:1.00]
2107 ; SLM-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50]
2108 ; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50]
2109 ; SLM-NEXT: retq # sched: [4:1.00]
2111 ; SANDY-LABEL: test_pmovsxdq:
2113 ; SANDY-NEXT: vpmovsxdq %xmm0, %xmm0 # sched: [1:0.50]
2114 ; SANDY-NEXT: vpmovsxdq (%rdi), %xmm1 # sched: [7:0.50]
2115 ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
2116 ; SANDY-NEXT: retq # sched: [1:1.00]
2118 ; HASWELL-LABEL: test_pmovsxdq:
2120 ; HASWELL-NEXT: vpmovsxdq %xmm0, %xmm0 # sched: [1:1.00]
2121 ; HASWELL-NEXT: vpmovsxdq (%rdi), %xmm1 # sched: [1:1.00]
2122 ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
2123 ; HASWELL-NEXT: retq # sched: [2:1.00]
2125 ; BROADWELL-LABEL: test_pmovsxdq:
2126 ; BROADWELL: # BB#0:
2127 ; BROADWELL-NEXT: vpmovsxdq %xmm0, %xmm0 # sched: [1:1.00]
2128 ; BROADWELL-NEXT: vpmovsxdq (%rdi), %xmm1 # sched: [6:1.00]
2129 ; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
2130 ; BROADWELL-NEXT: retq # sched: [7:1.00]
2132 ; SKYLAKE-LABEL: test_pmovsxdq:
2134 ; SKYLAKE-NEXT: vpmovsxdq %xmm0, %xmm0 # sched: [1:1.00]
2135 ; SKYLAKE-NEXT: vpmovsxdq (%rdi), %xmm1 # sched: [6:1.00]
2136 ; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
2137 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
2139 ; SKX-LABEL: test_pmovsxdq:
2141 ; SKX-NEXT: vpmovsxdq %xmm0, %xmm0 # sched: [1:1.00]
2142 ; SKX-NEXT: vpmovsxdq (%rdi), %xmm1 # sched: [6:1.00]
2143 ; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
2144 ; SKX-NEXT: retq # sched: [7:1.00]
2146 ; BTVER2-LABEL: test_pmovsxdq:
2148 ; BTVER2-NEXT: vpmovsxdq (%rdi), %xmm1 # sched: [6:1.00]
2149 ; BTVER2-NEXT: vpmovsxdq %xmm0, %xmm0 # sched: [1:0.50]
2150 ; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
2151 ; BTVER2-NEXT: retq # sched: [4:1.00]
2153 ; ZNVER1-LABEL: test_pmovsxdq:
2155 ; ZNVER1-NEXT: vpmovsxdq (%rdi), %xmm1 # sched: [8:0.50]
2156 ; ZNVER1-NEXT: vpmovsxdq %xmm0, %xmm0 # sched: [1:0.25]
2157 ; ZNVER1-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
2158 ; ZNVER1-NEXT: retq # sched: [1:0.50]
2159 %1 = shufflevector <4 x i32> %a0, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
2160 %2 = sext <2 x i32> %1 to <2 x i64>
2161 %3 = load <2 x i32>, <2 x i32>* %a1, align 1
2162 %4 = sext <2 x i32> %3 to <2 x i64>
2163 %5 = add <2 x i64> %2, %4
2167 define <4 x i32> @test_pmovsxwd(<8 x i16> %a0, <4 x i16> *%a1) {
2168 ; GENERIC-LABEL: test_pmovsxwd:
2170 ; GENERIC-NEXT: pmovsxwd %xmm0, %xmm1 # sched: [1:0.50]
2171 ; GENERIC-NEXT: pmovsxwd (%rdi), %xmm0 # sched: [7:0.50]
2172 ; GENERIC-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
2173 ; GENERIC-NEXT: retq # sched: [1:1.00]
2175 ; SLM-LABEL: test_pmovsxwd:
2177 ; SLM-NEXT: pmovsxwd (%rdi), %xmm1 # sched: [4:1.00]
2178 ; SLM-NEXT: pmovsxwd %xmm0, %xmm0 # sched: [1:1.00]
2179 ; SLM-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50]
2180 ; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50]
2181 ; SLM-NEXT: retq # sched: [4:1.00]
2183 ; SANDY-LABEL: test_pmovsxwd:
2185 ; SANDY-NEXT: vpmovsxwd %xmm0, %xmm0 # sched: [1:0.50]
2186 ; SANDY-NEXT: vpmovsxwd (%rdi), %xmm1 # sched: [7:0.50]
2187 ; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
2188 ; SANDY-NEXT: retq # sched: [1:1.00]
2190 ; HASWELL-LABEL: test_pmovsxwd:
2192 ; HASWELL-NEXT: vpmovsxwd %xmm0, %xmm0 # sched: [1:1.00]
2193 ; HASWELL-NEXT: vpmovsxwd (%rdi), %xmm1 # sched: [1:1.00]
2194 ; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
2195 ; HASWELL-NEXT: retq # sched: [2:1.00]
2197 ; BROADWELL-LABEL: test_pmovsxwd:
2198 ; BROADWELL: # BB#0:
2199 ; BROADWELL-NEXT: vpmovsxwd %xmm0, %xmm0 # sched: [1:1.00]
2200 ; BROADWELL-NEXT: vpmovsxwd (%rdi), %xmm1 # sched: [6:1.00]
2201 ; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
2202 ; BROADWELL-NEXT: retq # sched: [7:1.00]
2204 ; SKYLAKE-LABEL: test_pmovsxwd:
2206 ; SKYLAKE-NEXT: vpmovsxwd %xmm0, %xmm0 # sched: [1:1.00]
2207 ; SKYLAKE-NEXT: vpmovsxwd (%rdi), %xmm1 # sched: [6:1.00]
2208 ; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
2209 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
2211 ; SKX-LABEL: test_pmovsxwd:
2213 ; SKX-NEXT: vpmovsxwd %xmm0, %xmm0 # sched: [1:1.00]
2214 ; SKX-NEXT: vpmovsxwd (%rdi), %xmm1 # sched: [6:1.00]
2215 ; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
2216 ; SKX-NEXT: retq # sched: [7:1.00]
2218 ; BTVER2-LABEL: test_pmovsxwd:
2220 ; BTVER2-NEXT: vpmovsxwd (%rdi), %xmm1 # sched: [6:1.00]
2221 ; BTVER2-NEXT: vpmovsxwd %xmm0, %xmm0 # sched: [1:0.50]
2222 ; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
2223 ; BTVER2-NEXT: retq # sched: [4:1.00]
2225 ; ZNVER1-LABEL: test_pmovsxwd:
2227 ; ZNVER1-NEXT: vpmovsxwd (%rdi), %xmm1 # sched: [8:0.50]
2228 ; ZNVER1-NEXT: vpmovsxwd %xmm0, %xmm0 # sched: [1:0.25]
2229 ; ZNVER1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
2230 ; ZNVER1-NEXT: retq # sched: [1:0.50]
2231 %1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2232 %2 = sext <4 x i16> %1 to <4 x i32>
2233 %3 = load <4 x i16>, <4 x i16>* %a1, align 1
2234 %4 = sext <4 x i16> %3 to <4 x i32>
2235 %5 = add <4 x i32> %2, %4
2239 define <2 x i64> @test_pmovsxwq(<8 x i16> %a0, <2 x i16> *%a1) {
2240 ; GENERIC-LABEL: test_pmovsxwq:
2242 ; GENERIC-NEXT: pmovsxwq %xmm0, %xmm1 # sched: [1:0.50]
2243 ; GENERIC-NEXT: pmovsxwq (%rdi), %xmm0 # sched: [7:0.50]
2244 ; GENERIC-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
2245 ; GENERIC-NEXT: retq # sched: [1:1.00]
2247 ; SLM-LABEL: test_pmovsxwq:
2249 ; SLM-NEXT: pmovsxwq (%rdi), %xmm1 # sched: [4:1.00]
2250 ; SLM-NEXT: pmovsxwq %xmm0, %xmm0 # sched: [1:1.00]
2251 ; SLM-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50]
2252 ; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50]
2253 ; SLM-NEXT: retq # sched: [4:1.00]
2255 ; SANDY-LABEL: test_pmovsxwq:
2257 ; SANDY-NEXT: vpmovsxwq %xmm0, %xmm0 # sched: [1:0.50]
2258 ; SANDY-NEXT: vpmovsxwq (%rdi), %xmm1 # sched: [7:0.50]
2259 ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
2260 ; SANDY-NEXT: retq # sched: [1:1.00]
2262 ; HASWELL-LABEL: test_pmovsxwq:
2264 ; HASWELL-NEXT: vpmovsxwq %xmm0, %xmm0 # sched: [1:1.00]
2265 ; HASWELL-NEXT: vpmovsxwq (%rdi), %xmm1 # sched: [1:1.00]
2266 ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
2267 ; HASWELL-NEXT: retq # sched: [2:1.00]
2269 ; BROADWELL-LABEL: test_pmovsxwq:
2270 ; BROADWELL: # BB#0:
2271 ; BROADWELL-NEXT: vpmovsxwq %xmm0, %xmm0 # sched: [1:1.00]
2272 ; BROADWELL-NEXT: vpmovsxwq (%rdi), %xmm1 # sched: [6:1.00]
2273 ; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
2274 ; BROADWELL-NEXT: retq # sched: [7:1.00]
2276 ; SKYLAKE-LABEL: test_pmovsxwq:
2278 ; SKYLAKE-NEXT: vpmovsxwq %xmm0, %xmm0 # sched: [1:1.00]
2279 ; SKYLAKE-NEXT: vpmovsxwq (%rdi), %xmm1 # sched: [6:1.00]
2280 ; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
2281 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
2283 ; SKX-LABEL: test_pmovsxwq:
2285 ; SKX-NEXT: vpmovsxwq %xmm0, %xmm0 # sched: [1:1.00]
2286 ; SKX-NEXT: vpmovsxwq (%rdi), %xmm1 # sched: [6:1.00]
2287 ; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
2288 ; SKX-NEXT: retq # sched: [7:1.00]
2290 ; BTVER2-LABEL: test_pmovsxwq:
2292 ; BTVER2-NEXT: vpmovsxwq (%rdi), %xmm1 # sched: [6:1.00]
2293 ; BTVER2-NEXT: vpmovsxwq %xmm0, %xmm0 # sched: [1:0.50]
2294 ; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
2295 ; BTVER2-NEXT: retq # sched: [4:1.00]
2297 ; ZNVER1-LABEL: test_pmovsxwq:
2299 ; ZNVER1-NEXT: vpmovsxwq (%rdi), %xmm1 # sched: [8:0.50]
2300 ; ZNVER1-NEXT: vpmovsxwq %xmm0, %xmm0 # sched: [1:0.25]
2301 ; ZNVER1-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
2302 ; ZNVER1-NEXT: retq # sched: [1:0.50]
2303 %1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <2 x i32> <i32 0, i32 1>
2304 %2 = sext <2 x i16> %1 to <2 x i64>
2305 %3 = load <2 x i16>, <2 x i16>* %a1, align 1
2306 %4 = sext <2 x i16> %3 to <2 x i64>
2307 %5 = add <2 x i64> %2, %4
2311 define <8 x i16> @test_pmovzxbw(<16 x i8> %a0, <8 x i8> *%a1) {
2312 ; GENERIC-LABEL: test_pmovzxbw:
2314 ; GENERIC-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:0.50]
2315 ; GENERIC-NEXT: pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [7:0.50]
2316 ; GENERIC-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50]
2317 ; GENERIC-NEXT: retq # sched: [1:1.00]
2319 ; SLM-LABEL: test_pmovzxbw:
2321 ; SLM-NEXT: pmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [4:1.00]
2322 ; SLM-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00]
2323 ; SLM-NEXT: paddw %xmm0, %xmm1 # sched: [1:0.50]
2324 ; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50]
2325 ; SLM-NEXT: retq # sched: [4:1.00]
2327 ; SANDY-LABEL: test_pmovzxbw:
2329 ; SANDY-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:0.50]
2330 ; SANDY-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [7:0.50]
2331 ; SANDY-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
2332 ; SANDY-NEXT: retq # sched: [1:1.00]
2334 ; HASWELL-LABEL: test_pmovzxbw:
2336 ; HASWELL-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00]
2337 ; HASWELL-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [1:1.00]
2338 ; HASWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
2339 ; HASWELL-NEXT: retq # sched: [2:1.00]
2341 ; BROADWELL-LABEL: test_pmovzxbw:
2342 ; BROADWELL: # BB#0:
2343 ; BROADWELL-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00]
2344 ; BROADWELL-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [6:1.00]
2345 ; BROADWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
2346 ; BROADWELL-NEXT: retq # sched: [7:1.00]
2348 ; SKYLAKE-LABEL: test_pmovzxbw:
2350 ; SKYLAKE-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00]
2351 ; SKYLAKE-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [6:1.00]
2352 ; SKYLAKE-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
2353 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
2355 ; SKX-LABEL: test_pmovzxbw:
2357 ; SKX-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00]
2358 ; SKX-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [6:1.00]
2359 ; SKX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
2360 ; SKX-NEXT: retq # sched: [7:1.00]
2362 ; BTVER2-LABEL: test_pmovzxbw:
2364 ; BTVER2-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [6:1.00]
2365 ; BTVER2-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:0.50]
2366 ; BTVER2-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
2367 ; BTVER2-NEXT: retq # sched: [4:1.00]
2369 ; ZNVER1-LABEL: test_pmovzxbw:
2371 ; ZNVER1-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [8:0.50]
2372 ; ZNVER1-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:0.25]
2373 ; ZNVER1-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
2374 ; ZNVER1-NEXT: retq # sched: [1:0.50]
2375 %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
2376 %2 = zext <8 x i8> %1 to <8 x i16>
2377 %3 = load <8 x i8>, <8 x i8>* %a1, align 1
2378 %4 = zext <8 x i8> %3 to <8 x i16>
2379 %5 = add <8 x i16> %2, %4
2383 define <4 x i32> @test_pmovzxbd(<16 x i8> %a0, <4 x i8> *%a1) {
2384 ; GENERIC-LABEL: test_pmovzxbd:
2386 ; GENERIC-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:0.50]
2387 ; GENERIC-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [7:0.50]
2388 ; GENERIC-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
2389 ; GENERIC-NEXT: retq # sched: [1:1.00]
2391 ; SLM-LABEL: test_pmovzxbd:
2393 ; SLM-NEXT: pmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [4:1.00]
2394 ; SLM-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00]
2395 ; SLM-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50]
2396 ; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50]
2397 ; SLM-NEXT: retq # sched: [4:1.00]
2399 ; SANDY-LABEL: test_pmovzxbd:
2401 ; SANDY-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:0.50]
2402 ; SANDY-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [7:0.50]
2403 ; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
2404 ; SANDY-NEXT: retq # sched: [1:1.00]
2406 ; HASWELL-LABEL: test_pmovzxbd:
2408 ; HASWELL-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00]
2409 ; HASWELL-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [1:1.00]
2410 ; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
2411 ; HASWELL-NEXT: retq # sched: [2:1.00]
2413 ; BROADWELL-LABEL: test_pmovzxbd:
2414 ; BROADWELL: # BB#0:
2415 ; BROADWELL-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00]
2416 ; BROADWELL-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [6:1.00]
2417 ; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
2418 ; BROADWELL-NEXT: retq # sched: [7:1.00]
2420 ; SKYLAKE-LABEL: test_pmovzxbd:
2422 ; SKYLAKE-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00]
2423 ; SKYLAKE-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [6:1.00]
2424 ; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
2425 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
2427 ; SKX-LABEL: test_pmovzxbd:
2429 ; SKX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00]
2430 ; SKX-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [6:1.00]
2431 ; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
2432 ; SKX-NEXT: retq # sched: [7:1.00]
2434 ; BTVER2-LABEL: test_pmovzxbd:
2436 ; BTVER2-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [6:1.00]
2437 ; BTVER2-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:0.50]
2438 ; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
2439 ; BTVER2-NEXT: retq # sched: [4:1.00]
2441 ; ZNVER1-LABEL: test_pmovzxbd:
2443 ; ZNVER1-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [8:0.50]
2444 ; ZNVER1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:0.25]
2445 ; ZNVER1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
2446 ; ZNVER1-NEXT: retq # sched: [1:0.50]
2447 %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2448 %2 = zext <4 x i8> %1 to <4 x i32>
2449 %3 = load <4 x i8>, <4 x i8>* %a1, align 1
2450 %4 = zext <4 x i8> %3 to <4 x i32>
2451 %5 = add <4 x i32> %2, %4
2455 define <2 x i64> @test_pmovzxbq(<16 x i8> %a0, <2 x i8> *%a1) {
2456 ; GENERIC-LABEL: test_pmovzxbq:
2458 ; GENERIC-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:0.50]
2459 ; GENERIC-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [7:0.50]
2460 ; GENERIC-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
2461 ; GENERIC-NEXT: retq # sched: [1:1.00]
2463 ; SLM-LABEL: test_pmovzxbq:
2465 ; SLM-NEXT: pmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [4:1.00]
2466 ; SLM-NEXT: pmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00]
2467 ; SLM-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50]
2468 ; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50]
2469 ; SLM-NEXT: retq # sched: [4:1.00]
2471 ; SANDY-LABEL: test_pmovzxbq:
2473 ; SANDY-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:0.50]
2474 ; SANDY-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [7:0.50]
2475 ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
2476 ; SANDY-NEXT: retq # sched: [1:1.00]
2478 ; HASWELL-LABEL: test_pmovzxbq:
2480 ; HASWELL-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00]
2481 ; HASWELL-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00]
2482 ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
2483 ; HASWELL-NEXT: retq # sched: [2:1.00]
2485 ; BROADWELL-LABEL: test_pmovzxbq:
2486 ; BROADWELL: # BB#0:
2487 ; BROADWELL-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00]
2488 ; BROADWELL-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [6:1.00]
2489 ; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
2490 ; BROADWELL-NEXT: retq # sched: [7:1.00]
2492 ; SKYLAKE-LABEL: test_pmovzxbq:
2494 ; SKYLAKE-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00]
2495 ; SKYLAKE-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [6:1.00]
2496 ; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
2497 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
2499 ; SKX-LABEL: test_pmovzxbq:
2501 ; SKX-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00]
2502 ; SKX-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [6:1.00]
2503 ; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
2504 ; SKX-NEXT: retq # sched: [7:1.00]
2506 ; BTVER2-LABEL: test_pmovzxbq:
2508 ; BTVER2-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [6:1.00]
2509 ; BTVER2-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:0.50]
2510 ; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
2511 ; BTVER2-NEXT: retq # sched: [4:1.00]
2513 ; ZNVER1-LABEL: test_pmovzxbq:
2515 ; ZNVER1-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [8:0.50]
2516 ; ZNVER1-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:0.25]
2517 ; ZNVER1-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
2518 ; ZNVER1-NEXT: retq # sched: [1:0.50]
2519 %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <2 x i32> <i32 0, i32 1>
2520 %2 = zext <2 x i8> %1 to <2 x i64>
2521 %3 = load <2 x i8>, <2 x i8>* %a1, align 1
2522 %4 = zext <2 x i8> %3 to <2 x i64>
2523 %5 = add <2 x i64> %2, %4
2527 define <2 x i64> @test_pmovzxdq(<4 x i32> %a0, <2 x i32> *%a1) {
2528 ; GENERIC-LABEL: test_pmovzxdq:
2530 ; GENERIC-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero sched: [1:0.50]
2531 ; GENERIC-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero sched: [7:0.50]
2532 ; GENERIC-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
2533 ; GENERIC-NEXT: retq # sched: [1:1.00]
2535 ; SLM-LABEL: test_pmovzxdq:
2537 ; SLM-NEXT: pmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero sched: [4:1.00]
2538 ; SLM-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero sched: [1:1.00]
2539 ; SLM-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50]
2540 ; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50]
2541 ; SLM-NEXT: retq # sched: [4:1.00]
2543 ; SANDY-LABEL: test_pmovzxdq:
2545 ; SANDY-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero sched: [1:0.50]
2546 ; SANDY-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero sched: [7:0.50]
2547 ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
2548 ; SANDY-NEXT: retq # sched: [1:1.00]
2550 ; HASWELL-LABEL: test_pmovzxdq:
2552 ; HASWELL-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero sched: [1:1.00]
2553 ; HASWELL-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero sched: [1:1.00]
2554 ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
2555 ; HASWELL-NEXT: retq # sched: [2:1.00]
2557 ; BROADWELL-LABEL: test_pmovzxdq:
2558 ; BROADWELL: # BB#0:
2559 ; BROADWELL-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero sched: [1:1.00]
2560 ; BROADWELL-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero sched: [6:1.00]
2561 ; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
2562 ; BROADWELL-NEXT: retq # sched: [7:1.00]
2564 ; SKYLAKE-LABEL: test_pmovzxdq:
2566 ; SKYLAKE-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero sched: [1:1.00]
2567 ; SKYLAKE-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero sched: [6:1.00]
2568 ; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
2569 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
2571 ; SKX-LABEL: test_pmovzxdq:
2573 ; SKX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero sched: [1:1.00]
2574 ; SKX-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero sched: [6:1.00]
2575 ; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
2576 ; SKX-NEXT: retq # sched: [7:1.00]
2578 ; BTVER2-LABEL: test_pmovzxdq:
2580 ; BTVER2-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero sched: [6:1.00]
2581 ; BTVER2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero sched: [1:0.50]
2582 ; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
2583 ; BTVER2-NEXT: retq # sched: [4:1.00]
2585 ; ZNVER1-LABEL: test_pmovzxdq:
2587 ; ZNVER1-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero sched: [8:0.50]
2588 ; ZNVER1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero sched: [1:0.25]
2589 ; ZNVER1-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
2590 ; ZNVER1-NEXT: retq # sched: [1:0.50]
2591 %1 = shufflevector <4 x i32> %a0, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
2592 %2 = zext <2 x i32> %1 to <2 x i64>
2593 %3 = load <2 x i32>, <2 x i32>* %a1, align 1
2594 %4 = zext <2 x i32> %3 to <2 x i64>
2595 %5 = add <2 x i64> %2, %4
2599 define <4 x i32> @test_pmovzxwd(<8 x i16> %a0, <4 x i16> *%a1) {
2600 ; GENERIC-LABEL: test_pmovzxwd:
2602 ; GENERIC-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:0.50]
2603 ; GENERIC-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [7:0.50]
2604 ; GENERIC-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
2605 ; GENERIC-NEXT: retq # sched: [1:1.00]
2607 ; SLM-LABEL: test_pmovzxwd:
2609 ; SLM-NEXT: pmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [4:1.00]
2610 ; SLM-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00]
2611 ; SLM-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50]
2612 ; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50]
2613 ; SLM-NEXT: retq # sched: [4:1.00]
2615 ; SANDY-LABEL: test_pmovzxwd:
2617 ; SANDY-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:0.50]
2618 ; SANDY-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [7:0.50]
2619 ; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
2620 ; SANDY-NEXT: retq # sched: [1:1.00]
2622 ; HASWELL-LABEL: test_pmovzxwd:
2624 ; HASWELL-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00]
2625 ; HASWELL-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [1:1.00]
2626 ; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
2627 ; HASWELL-NEXT: retq # sched: [2:1.00]
2629 ; BROADWELL-LABEL: test_pmovzxwd:
2630 ; BROADWELL: # BB#0:
2631 ; BROADWELL-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00]
2632 ; BROADWELL-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [6:1.00]
2633 ; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
2634 ; BROADWELL-NEXT: retq # sched: [7:1.00]
2636 ; SKYLAKE-LABEL: test_pmovzxwd:
2638 ; SKYLAKE-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00]
2639 ; SKYLAKE-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [6:1.00]
2640 ; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
2641 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
2643 ; SKX-LABEL: test_pmovzxwd:
2645 ; SKX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00]
2646 ; SKX-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [6:1.00]
2647 ; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
2648 ; SKX-NEXT: retq # sched: [7:1.00]
2650 ; BTVER2-LABEL: test_pmovzxwd:
2652 ; BTVER2-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [6:1.00]
2653 ; BTVER2-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:0.50]
2654 ; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
2655 ; BTVER2-NEXT: retq # sched: [4:1.00]
2657 ; ZNVER1-LABEL: test_pmovzxwd:
2659 ; ZNVER1-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [8:0.50]
2660 ; ZNVER1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:0.25]
2661 ; ZNVER1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
2662 ; ZNVER1-NEXT: retq # sched: [1:0.50]
2663 %1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2664 %2 = zext <4 x i16> %1 to <4 x i32>
2665 %3 = load <4 x i16>, <4 x i16>* %a1, align 1
2666 %4 = zext <4 x i16> %3 to <4 x i32>
2667 %5 = add <4 x i32> %2, %4
2671 define <2 x i64> @test_pmovzxwq(<8 x i16> %a0, <2 x i16> *%a1) {
2672 ; GENERIC-LABEL: test_pmovzxwq:
2674 ; GENERIC-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:0.50]
2675 ; GENERIC-NEXT: pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [7:0.50]
2676 ; GENERIC-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
2677 ; GENERIC-NEXT: retq # sched: [1:1.00]
2679 ; SLM-LABEL: test_pmovzxwq:
2681 ; SLM-NEXT: pmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [4:1.00]
2682 ; SLM-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:1.00]
2683 ; SLM-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50]
2684 ; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50]
2685 ; SLM-NEXT: retq # sched: [4:1.00]
2687 ; SANDY-LABEL: test_pmovzxwq:
2689 ; SANDY-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:0.50]
2690 ; SANDY-NEXT: vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [7:0.50]
2691 ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
2692 ; SANDY-NEXT: retq # sched: [1:1.00]
2694 ; HASWELL-LABEL: test_pmovzxwq:
2696 ; HASWELL-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:1.00]
2697 ; HASWELL-NEXT: vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [1:1.00]
2698 ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
2699 ; HASWELL-NEXT: retq # sched: [2:1.00]
2701 ; BROADWELL-LABEL: test_pmovzxwq:
2702 ; BROADWELL: # BB#0:
2703 ; BROADWELL-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:1.00]
2704 ; BROADWELL-NEXT: vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [6:1.00]
2705 ; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
2706 ; BROADWELL-NEXT: retq # sched: [7:1.00]
2708 ; SKYLAKE-LABEL: test_pmovzxwq:
2710 ; SKYLAKE-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:1.00]
2711 ; SKYLAKE-NEXT: vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [6:1.00]
2712 ; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
2713 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
2715 ; SKX-LABEL: test_pmovzxwq:
2717 ; SKX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:1.00]
2718 ; SKX-NEXT: vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [6:1.00]
2719 ; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
2720 ; SKX-NEXT: retq # sched: [7:1.00]
2722 ; BTVER2-LABEL: test_pmovzxwq:
2724 ; BTVER2-NEXT: vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [6:1.00]
2725 ; BTVER2-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:0.50]
2726 ; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
2727 ; BTVER2-NEXT: retq # sched: [4:1.00]
2729 ; ZNVER1-LABEL: test_pmovzxwq:
2731 ; ZNVER1-NEXT: vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [8:0.50]
2732 ; ZNVER1-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:0.25]
2733 ; ZNVER1-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
2734 ; ZNVER1-NEXT: retq # sched: [1:0.50]
2735 %1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <2 x i32> <i32 0, i32 1>
2736 %2 = zext <2 x i16> %1 to <2 x i64>
2737 %3 = load <2 x i16>, <2 x i16>* %a1, align 1
2738 %4 = zext <2 x i16> %3 to <2 x i64>
2739 %5 = add <2 x i64> %2, %4
2743 define <2 x i64> @test_pmuldq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
2744 ; GENERIC-LABEL: test_pmuldq:
2746 ; GENERIC-NEXT: pmuldq %xmm1, %xmm0 # sched: [3:1.00]
2747 ; GENERIC-NEXT: pmuldq (%rdi), %xmm0 # sched: [9:1.00]
2748 ; GENERIC-NEXT: retq # sched: [1:1.00]
2750 ; SLM-LABEL: test_pmuldq:
2752 ; SLM-NEXT: pmuldq %xmm1, %xmm0 # sched: [4:1.00]
2753 ; SLM-NEXT: pmuldq (%rdi), %xmm0 # sched: [7:1.00]
2754 ; SLM-NEXT: retq # sched: [4:1.00]
2756 ; SANDY-LABEL: test_pmuldq:
2758 ; SANDY-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
2759 ; SANDY-NEXT: vpmuldq (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
2760 ; SANDY-NEXT: retq # sched: [1:1.00]
2762 ; HASWELL-LABEL: test_pmuldq:
2764 ; HASWELL-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
2765 ; HASWELL-NEXT: vpmuldq (%rdi), %xmm0, %xmm0 # sched: [5:1.00]
2766 ; HASWELL-NEXT: retq # sched: [2:1.00]
2768 ; BROADWELL-LABEL: test_pmuldq:
2769 ; BROADWELL: # BB#0:
2770 ; BROADWELL-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
2771 ; BROADWELL-NEXT: vpmuldq (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
2772 ; BROADWELL-NEXT: retq # sched: [7:1.00]
2774 ; SKYLAKE-LABEL: test_pmuldq:
2776 ; SKYLAKE-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
2777 ; SKYLAKE-NEXT: vpmuldq (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
2778 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
2780 ; SKX-LABEL: test_pmuldq:
2782 ; SKX-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
2783 ; SKX-NEXT: vpmuldq (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
2784 ; SKX-NEXT: retq # sched: [7:1.00]
2786 ; BTVER2-LABEL: test_pmuldq:
2788 ; BTVER2-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
2789 ; BTVER2-NEXT: vpmuldq (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
2790 ; BTVER2-NEXT: retq # sched: [4:1.00]
2792 ; ZNVER1-LABEL: test_pmuldq:
2794 ; ZNVER1-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [4:1.00]
2795 ; ZNVER1-NEXT: vpmuldq (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
2796 ; ZNVER1-NEXT: retq # sched: [1:0.50]
2797 %1 = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> %a0, <4 x i32> %a1)
2798 %2 = bitcast <2 x i64> %1 to <4 x i32>
2799 %3 = load <4 x i32>, <4 x i32> *%a2, align 16
2800 %4 = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> %2, <4 x i32> %3)
2803 declare <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32>, <4 x i32>) nounwind readnone
2805 define <4 x i32> @test_pmulld(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
2806 ; GENERIC-LABEL: test_pmulld:
2808 ; GENERIC-NEXT: pmulld %xmm1, %xmm0 # sched: [3:1.00]
2809 ; GENERIC-NEXT: pmulld (%rdi), %xmm0 # sched: [9:1.00]
2810 ; GENERIC-NEXT: retq # sched: [1:1.00]
2812 ; SLM-LABEL: test_pmulld:
2814 ; SLM-NEXT: pmulld %xmm1, %xmm0 # sched: [4:1.00]
2815 ; SLM-NEXT: pmulld (%rdi), %xmm0 # sched: [7:1.00]
2816 ; SLM-NEXT: retq # sched: [4:1.00]
2818 ; SANDY-LABEL: test_pmulld:
2820 ; SANDY-NEXT: vpmulld %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
2821 ; SANDY-NEXT: vpmulld (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
2822 ; SANDY-NEXT: retq # sched: [1:1.00]
2824 ; HASWELL-LABEL: test_pmulld:
2826 ; HASWELL-NEXT: vpmulld %xmm1, %xmm0, %xmm0 # sched: [10:2.00]
2827 ; HASWELL-NEXT: vpmulld (%rdi), %xmm0, %xmm0 # sched: [10:2.00]
2828 ; HASWELL-NEXT: retq # sched: [2:1.00]
2830 ; BROADWELL-LABEL: test_pmulld:
2831 ; BROADWELL: # BB#0:
2832 ; BROADWELL-NEXT: vpmulld %xmm1, %xmm0, %xmm0 # sched: [10:2.00]
2833 ; BROADWELL-NEXT: vpmulld (%rdi), %xmm0, %xmm0 # sched: [15:2.00]
2834 ; BROADWELL-NEXT: retq # sched: [7:1.00]
2836 ; SKYLAKE-LABEL: test_pmulld:
2838 ; SKYLAKE-NEXT: vpmulld %xmm1, %xmm0, %xmm0 # sched: [8:0.67]
2839 ; SKYLAKE-NEXT: vpmulld (%rdi), %xmm0, %xmm0 # sched: [14:0.67]
2840 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
2842 ; SKX-LABEL: test_pmulld:
2844 ; SKX-NEXT: vpmulld %xmm1, %xmm0, %xmm0 # sched: [8:0.67]
2845 ; SKX-NEXT: vpmulld (%rdi), %xmm0, %xmm0 # sched: [14:0.67]
2846 ; SKX-NEXT: retq # sched: [7:1.00]
2848 ; BTVER2-LABEL: test_pmulld:
2850 ; BTVER2-NEXT: vpmulld %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
2851 ; BTVER2-NEXT: vpmulld (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
2852 ; BTVER2-NEXT: retq # sched: [4:1.00]
2854 ; ZNVER1-LABEL: test_pmulld:
2856 ; ZNVER1-NEXT: vpmulld %xmm1, %xmm0, %xmm0 # sched: [4:1.00]
2857 ; ZNVER1-NEXT: vpmulld (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
2858 ; ZNVER1-NEXT: retq # sched: [1:0.50]
2859 %1 = mul <4 x i32> %a0, %a1
2860 %2 = load <4 x i32>, <4 x i32> *%a2, align 16
2861 %3 = mul <4 x i32> %1, %2
2865 define i32 @test_ptest(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
2866 ; GENERIC-LABEL: test_ptest:
2868 ; GENERIC-NEXT: ptest %xmm1, %xmm0 # sched: [2:1.00]
2869 ; GENERIC-NEXT: setb %al # sched: [1:0.50]
2870 ; GENERIC-NEXT: ptest (%rdi), %xmm0 # sched: [8:1.00]
2871 ; GENERIC-NEXT: setb %cl # sched: [1:0.50]
2872 ; GENERIC-NEXT: andb %al, %cl # sched: [1:0.33]
2873 ; GENERIC-NEXT: movzbl %cl, %eax # sched: [1:0.33]
2874 ; GENERIC-NEXT: retq # sched: [1:1.00]
2876 ; SLM-LABEL: test_ptest:
2878 ; SLM-NEXT: ptest %xmm1, %xmm0 # sched: [1:0.50]
2879 ; SLM-NEXT: setb %al # sched: [1:0.50]
2880 ; SLM-NEXT: ptest (%rdi), %xmm0 # sched: [4:1.00]
2881 ; SLM-NEXT: setb %cl # sched: [1:0.50]
2882 ; SLM-NEXT: andb %al, %cl # sched: [1:0.50]
2883 ; SLM-NEXT: movzbl %cl, %eax # sched: [1:0.50]
2884 ; SLM-NEXT: retq # sched: [4:1.00]
2886 ; SANDY-LABEL: test_ptest:
2888 ; SANDY-NEXT: vptest %xmm1, %xmm0 # sched: [2:1.00]
2889 ; SANDY-NEXT: setb %al # sched: [1:0.50]
2890 ; SANDY-NEXT: vptest (%rdi), %xmm0 # sched: [8:1.00]
2891 ; SANDY-NEXT: setb %cl # sched: [1:0.50]
2892 ; SANDY-NEXT: andb %al, %cl # sched: [1:0.33]
2893 ; SANDY-NEXT: movzbl %cl, %eax # sched: [1:0.33]
2894 ; SANDY-NEXT: retq # sched: [1:1.00]
2896 ; HASWELL-LABEL: test_ptest:
2898 ; HASWELL-NEXT: vptest %xmm1, %xmm0 # sched: [2:1.00]
2899 ; HASWELL-NEXT: setb %al # sched: [1:0.50]
2900 ; HASWELL-NEXT: vptest (%rdi), %xmm0 # sched: [2:1.00]
2901 ; HASWELL-NEXT: setb %cl # sched: [1:0.50]
2902 ; HASWELL-NEXT: andb %al, %cl # sched: [1:0.25]
2903 ; HASWELL-NEXT: movzbl %cl, %eax # sched: [1:0.25]
2904 ; HASWELL-NEXT: retq # sched: [2:1.00]
2906 ; BROADWELL-LABEL: test_ptest:
2907 ; BROADWELL: # BB#0:
2908 ; BROADWELL-NEXT: vptest %xmm1, %xmm0 # sched: [2:1.00]
2909 ; BROADWELL-NEXT: setb %al # sched: [1:0.50]
2910 ; BROADWELL-NEXT: vptest (%rdi), %xmm0 # sched: [7:1.00]
2911 ; BROADWELL-NEXT: setb %cl # sched: [1:0.50]
2912 ; BROADWELL-NEXT: andb %al, %cl # sched: [1:0.25]
2913 ; BROADWELL-NEXT: movzbl %cl, %eax # sched: [1:0.25]
2914 ; BROADWELL-NEXT: retq # sched: [7:1.00]
2916 ; SKYLAKE-LABEL: test_ptest:
2918 ; SKYLAKE-NEXT: vptest %xmm1, %xmm0 # sched: [3:1.00]
2919 ; SKYLAKE-NEXT: setb %al # sched: [1:0.50]
2920 ; SKYLAKE-NEXT: vptest (%rdi), %xmm0 # sched: [9:1.00]
2921 ; SKYLAKE-NEXT: setb %cl # sched: [1:0.50]
2922 ; SKYLAKE-NEXT: andb %al, %cl # sched: [1:0.25]
2923 ; SKYLAKE-NEXT: movzbl %cl, %eax # sched: [1:0.25]
2924 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
2926 ; SKX-LABEL: test_ptest:
2928 ; SKX-NEXT: vptest %xmm1, %xmm0 # sched: [3:1.00]
2929 ; SKX-NEXT: setb %al # sched: [1:0.50]
2930 ; SKX-NEXT: vptest (%rdi), %xmm0 # sched: [9:1.00]
2931 ; SKX-NEXT: setb %cl # sched: [1:0.50]
2932 ; SKX-NEXT: andb %al, %cl # sched: [1:0.25]
2933 ; SKX-NEXT: movzbl %cl, %eax # sched: [1:0.25]
2934 ; SKX-NEXT: retq # sched: [7:1.00]
2936 ; BTVER2-LABEL: test_ptest:
2938 ; BTVER2-NEXT: vptest %xmm1, %xmm0 # sched: [3:1.00]
2939 ; BTVER2-NEXT: setb %al # sched: [1:0.50]
2940 ; BTVER2-NEXT: vptest (%rdi), %xmm0 # sched: [8:1.00]
2941 ; BTVER2-NEXT: setb %cl # sched: [1:0.50]
2942 ; BTVER2-NEXT: andb %al, %cl # sched: [1:0.50]
2943 ; BTVER2-NEXT: movzbl %cl, %eax # sched: [1:0.50]
2944 ; BTVER2-NEXT: retq # sched: [4:1.00]
2946 ; ZNVER1-LABEL: test_ptest:
2948 ; ZNVER1-NEXT: vptest %xmm1, %xmm0 # sched: [1:1.00]
2949 ; ZNVER1-NEXT: setb %al # sched: [1:0.25]
2950 ; ZNVER1-NEXT: vptest (%rdi), %xmm0 # sched: [8:1.00]
2951 ; ZNVER1-NEXT: setb %cl # sched: [1:0.25]
2952 ; ZNVER1-NEXT: andb %al, %cl # sched: [1:0.25]
2953 ; ZNVER1-NEXT: movzbl %cl, %eax # sched: [1:0.25]
2954 ; ZNVER1-NEXT: retq # sched: [1:0.50]
2955 %1 = call i32 @llvm.x86.sse41.ptestc(<2 x i64> %a0, <2 x i64> %a1)
2956 %2 = load <2 x i64>, <2 x i64> *%a2, align 16
2957 %3 = call i32 @llvm.x86.sse41.ptestc(<2 x i64> %a0, <2 x i64> %2)
2961 declare i32 @llvm.x86.sse41.ptestc(<2 x i64>, <2 x i64>) nounwind readnone
2963 define <2 x double> @test_roundpd(<2 x double> %a0, <2 x double> *%a1) {
2964 ; GENERIC-LABEL: test_roundpd:
2966 ; GENERIC-NEXT: roundpd $7, %xmm0, %xmm1 # sched: [3:1.00]
2967 ; GENERIC-NEXT: roundpd $7, (%rdi), %xmm0 # sched: [9:1.00]
2968 ; GENERIC-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
2969 ; GENERIC-NEXT: retq # sched: [1:1.00]
2971 ; SLM-LABEL: test_roundpd:
2973 ; SLM-NEXT: roundpd $7, (%rdi), %xmm1 # sched: [6:1.00]
2974 ; SLM-NEXT: roundpd $7, %xmm0, %xmm0 # sched: [3:1.00]
2975 ; SLM-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00]
2976 ; SLM-NEXT: movapd %xmm1, %xmm0 # sched: [1:1.00]
2977 ; SLM-NEXT: retq # sched: [4:1.00]
2979 ; SANDY-LABEL: test_roundpd:
2981 ; SANDY-NEXT: vroundpd $7, %xmm0, %xmm0 # sched: [3:1.00]
2982 ; SANDY-NEXT: vroundpd $7, (%rdi), %xmm1 # sched: [9:1.00]
2983 ; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
2984 ; SANDY-NEXT: retq # sched: [1:1.00]
2986 ; HASWELL-LABEL: test_roundpd:
2988 ; HASWELL-NEXT: vroundpd $7, %xmm0, %xmm0 # sched: [5:1.25]
2989 ; HASWELL-NEXT: vroundpd $7, (%rdi), %xmm1 # sched: [6:2.00]
2990 ; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
2991 ; HASWELL-NEXT: retq # sched: [2:1.00]
2993 ; BROADWELL-LABEL: test_roundpd:
2994 ; BROADWELL: # BB#0:
2995 ; BROADWELL-NEXT: vroundpd $7, %xmm0, %xmm0 # sched: [6:0.50]
2996 ; BROADWELL-NEXT: vroundpd $7, (%rdi), %xmm1 # sched: [11:2.00]
2997 ; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
2998 ; BROADWELL-NEXT: retq # sched: [7:1.00]
3000 ; SKYLAKE-LABEL: test_roundpd:
3002 ; SKYLAKE-NEXT: vroundpd $7, %xmm0, %xmm0 # sched: [8:0.67]
3003 ; SKYLAKE-NEXT: vroundpd $7, (%rdi), %xmm1 # sched: [14:0.67]
3004 ; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
3005 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
3007 ; SKX-LABEL: test_roundpd:
3009 ; SKX-NEXT: vrndscalepd $7, %xmm0, %xmm0 # sched: [8:0.67]
3010 ; SKX-NEXT: vrndscalepd $7, (%rdi), %xmm1 # sched: [14:0.67]
3011 ; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
3012 ; SKX-NEXT: retq # sched: [7:1.00]
3014 ; BTVER2-LABEL: test_roundpd:
3016 ; BTVER2-NEXT: vroundpd $7, (%rdi), %xmm1 # sched: [8:1.00]
3017 ; BTVER2-NEXT: vroundpd $7, %xmm0, %xmm0 # sched: [3:1.00]
3018 ; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
3019 ; BTVER2-NEXT: retq # sched: [4:1.00]
3021 ; ZNVER1-LABEL: test_roundpd:
3023 ; ZNVER1-NEXT: vroundpd $7, (%rdi), %xmm1 # sched: [11:1.00]
3024 ; ZNVER1-NEXT: vroundpd $7, %xmm0, %xmm0 # sched: [4:1.00]
3025 ; ZNVER1-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
3026 ; ZNVER1-NEXT: retq # sched: [1:0.50]
3027 %1 = call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %a0, i32 7)
3028 %2 = load <2 x double>, <2 x double> *%a1, align 16
3029 %3 = call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %2, i32 7)
3030 %4 = fadd <2 x double> %1, %3
3033 declare <2 x double> @llvm.x86.sse41.round.pd(<2 x double>, i32) nounwind readnone
3035 define <4 x float> @test_roundps(<4 x float> %a0, <4 x float> *%a1) {
3036 ; GENERIC-LABEL: test_roundps:
3038 ; GENERIC-NEXT: roundps $7, %xmm0, %xmm1 # sched: [3:1.00]
3039 ; GENERIC-NEXT: roundps $7, (%rdi), %xmm0 # sched: [9:1.00]
3040 ; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
3041 ; GENERIC-NEXT: retq # sched: [1:1.00]
3043 ; SLM-LABEL: test_roundps:
3045 ; SLM-NEXT: roundps $7, (%rdi), %xmm1 # sched: [6:1.00]
3046 ; SLM-NEXT: roundps $7, %xmm0, %xmm0 # sched: [3:1.00]
3047 ; SLM-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00]
3048 ; SLM-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00]
3049 ; SLM-NEXT: retq # sched: [4:1.00]
3051 ; SANDY-LABEL: test_roundps:
3053 ; SANDY-NEXT: vroundps $7, %xmm0, %xmm0 # sched: [3:1.00]
3054 ; SANDY-NEXT: vroundps $7, (%rdi), %xmm1 # sched: [9:1.00]
3055 ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
3056 ; SANDY-NEXT: retq # sched: [1:1.00]
3058 ; HASWELL-LABEL: test_roundps:
3060 ; HASWELL-NEXT: vroundps $7, %xmm0, %xmm0 # sched: [5:1.25]
3061 ; HASWELL-NEXT: vroundps $7, (%rdi), %xmm1 # sched: [6:2.00]
3062 ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
3063 ; HASWELL-NEXT: retq # sched: [2:1.00]
3065 ; BROADWELL-LABEL: test_roundps:
3066 ; BROADWELL: # BB#0:
3067 ; BROADWELL-NEXT: vroundps $7, %xmm0, %xmm0 # sched: [6:0.50]
3068 ; BROADWELL-NEXT: vroundps $7, (%rdi), %xmm1 # sched: [11:2.00]
3069 ; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
3070 ; BROADWELL-NEXT: retq # sched: [7:1.00]
3072 ; SKYLAKE-LABEL: test_roundps:
3074 ; SKYLAKE-NEXT: vroundps $7, %xmm0, %xmm0 # sched: [8:0.67]
3075 ; SKYLAKE-NEXT: vroundps $7, (%rdi), %xmm1 # sched: [14:0.67]
3076 ; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
3077 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
3079 ; SKX-LABEL: test_roundps:
3081 ; SKX-NEXT: vrndscaleps $7, %xmm0, %xmm0 # sched: [8:0.67]
3082 ; SKX-NEXT: vrndscaleps $7, (%rdi), %xmm1 # sched: [14:0.67]
3083 ; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
3084 ; SKX-NEXT: retq # sched: [7:1.00]
3086 ; BTVER2-LABEL: test_roundps:
3088 ; BTVER2-NEXT: vroundps $7, (%rdi), %xmm1 # sched: [8:1.00]
3089 ; BTVER2-NEXT: vroundps $7, %xmm0, %xmm0 # sched: [3:1.00]
3090 ; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
3091 ; BTVER2-NEXT: retq # sched: [4:1.00]
3093 ; ZNVER1-LABEL: test_roundps:
3095 ; ZNVER1-NEXT: vroundps $7, (%rdi), %xmm1 # sched: [11:1.00]
3096 ; ZNVER1-NEXT: vroundps $7, %xmm0, %xmm0 # sched: [4:1.00]
3097 ; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
3098 ; ZNVER1-NEXT: retq # sched: [1:0.50]
3099 %1 = call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %a0, i32 7)
3100 %2 = load <4 x float>, <4 x float> *%a1, align 16
3101 %3 = call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %2, i32 7)
3102 %4 = fadd <4 x float> %1, %3
3105 declare <4 x float> @llvm.x86.sse41.round.ps(<4 x float>, i32) nounwind readnone
3107 define <2 x double> @test_roundsd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
3108 ; GENERIC-LABEL: test_roundsd:
3110 ; GENERIC-NEXT: movapd %xmm0, %xmm2 # sched: [1:1.00]
3111 ; GENERIC-NEXT: roundsd $7, %xmm1, %xmm2 # sched: [3:1.00]
3112 ; GENERIC-NEXT: roundsd $7, (%rdi), %xmm0 # sched: [9:1.00]
3113 ; GENERIC-NEXT: addpd %xmm2, %xmm0 # sched: [3:1.00]
3114 ; GENERIC-NEXT: retq # sched: [1:1.00]
3116 ; SLM-LABEL: test_roundsd:
3118 ; SLM-NEXT: movapd %xmm0, %xmm2 # sched: [1:1.00]
3119 ; SLM-NEXT: roundsd $7, (%rdi), %xmm0 # sched: [6:1.00]
3120 ; SLM-NEXT: roundsd $7, %xmm1, %xmm2 # sched: [3:1.00]
3121 ; SLM-NEXT: addpd %xmm2, %xmm0 # sched: [3:1.00]
3122 ; SLM-NEXT: retq # sched: [4:1.00]
3124 ; SANDY-LABEL: test_roundsd:
3126 ; SANDY-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
3127 ; SANDY-NEXT: vroundsd $7, (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
3128 ; SANDY-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
3129 ; SANDY-NEXT: retq # sched: [1:1.00]
3131 ; HASWELL-LABEL: test_roundsd:
3133 ; HASWELL-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm1 # sched: [5:1.25]
3134 ; HASWELL-NEXT: vroundsd $7, (%rdi), %xmm0, %xmm0 # sched: [6:2.00]
3135 ; HASWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
3136 ; HASWELL-NEXT: retq # sched: [2:1.00]
3138 ; BROADWELL-LABEL: test_roundsd:
3139 ; BROADWELL: # BB#0:
3140 ; BROADWELL-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm1 # sched: [6:0.50]
3141 ; BROADWELL-NEXT: vroundsd $7, (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
3142 ; BROADWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
3143 ; BROADWELL-NEXT: retq # sched: [7:1.00]
3145 ; SKYLAKE-LABEL: test_roundsd:
3147 ; SKYLAKE-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm1 # sched: [8:0.67]
3148 ; SKYLAKE-NEXT: vroundsd $7, (%rdi), %xmm0, %xmm0 # sched: [14:0.67]
3149 ; SKYLAKE-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
3150 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
3152 ; SKX-LABEL: test_roundsd:
3154 ; SKX-NEXT: vrndscalesd $7, %xmm1, %xmm0, %xmm1 # sched: [8:0.67]
3155 ; SKX-NEXT: vrndscalesd $7, (%rdi), %xmm0, %xmm0 # sched: [14:0.67]
3156 ; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.33]
3157 ; SKX-NEXT: retq # sched: [7:1.00]
3159 ; BTVER2-LABEL: test_roundsd:
3161 ; BTVER2-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
3162 ; BTVER2-NEXT: vroundsd $7, (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
3163 ; BTVER2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
3164 ; BTVER2-NEXT: retq # sched: [4:1.00]
3166 ; ZNVER1-LABEL: test_roundsd:
3168 ; ZNVER1-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm1 # sched: [4:1.00]
3169 ; ZNVER1-NEXT: vroundsd $7, (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
3170 ; ZNVER1-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
3171 ; ZNVER1-NEXT: retq # sched: [1:0.50]
3172 %1 = call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %a0, <2 x double> %a1, i32 7)
3173 %2 = load <2 x double>, <2 x double>* %a2, align 16
3174 %3 = call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %a0, <2 x double> %2, i32 7)
3175 %4 = fadd <2 x double> %1, %3
3178 declare <2 x double> @llvm.x86.sse41.round.sd(<2 x double>, <2 x double>, i32) nounwind readnone
3180 define <4 x float> @test_roundss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
3181 ; GENERIC-LABEL: test_roundss:
3183 ; GENERIC-NEXT: movaps %xmm0, %xmm2 # sched: [1:1.00]
3184 ; GENERIC-NEXT: roundss $7, %xmm1, %xmm2 # sched: [3:1.00]
3185 ; GENERIC-NEXT: roundss $7, (%rdi), %xmm0 # sched: [9:1.00]
3186 ; GENERIC-NEXT: addps %xmm2, %xmm0 # sched: [3:1.00]
3187 ; GENERIC-NEXT: retq # sched: [1:1.00]
3189 ; SLM-LABEL: test_roundss:
3191 ; SLM-NEXT: movaps %xmm0, %xmm2 # sched: [1:1.00]
3192 ; SLM-NEXT: roundss $7, (%rdi), %xmm0 # sched: [6:1.00]
3193 ; SLM-NEXT: roundss $7, %xmm1, %xmm2 # sched: [3:1.00]
3194 ; SLM-NEXT: addps %xmm2, %xmm0 # sched: [3:1.00]
3195 ; SLM-NEXT: retq # sched: [4:1.00]
3197 ; SANDY-LABEL: test_roundss:
3199 ; SANDY-NEXT: vroundss $7, %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
3200 ; SANDY-NEXT: vroundss $7, (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
3201 ; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
3202 ; SANDY-NEXT: retq # sched: [1:1.00]
3204 ; HASWELL-LABEL: test_roundss:
3206 ; HASWELL-NEXT: vroundss $7, %xmm1, %xmm0, %xmm1 # sched: [5:1.25]
3207 ; HASWELL-NEXT: vroundss $7, (%rdi), %xmm0, %xmm0 # sched: [6:2.00]
3208 ; HASWELL-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
3209 ; HASWELL-NEXT: retq # sched: [2:1.00]
3211 ; BROADWELL-LABEL: test_roundss:
3212 ; BROADWELL: # BB#0:
3213 ; BROADWELL-NEXT: vroundss $7, %xmm1, %xmm0, %xmm1 # sched: [6:0.50]
3214 ; BROADWELL-NEXT: vroundss $7, (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
3215 ; BROADWELL-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
3216 ; BROADWELL-NEXT: retq # sched: [7:1.00]
3218 ; SKYLAKE-LABEL: test_roundss:
3220 ; SKYLAKE-NEXT: vroundss $7, %xmm1, %xmm0, %xmm1 # sched: [8:0.67]
3221 ; SKYLAKE-NEXT: vroundss $7, (%rdi), %xmm0, %xmm0 # sched: [14:0.67]
3222 ; SKYLAKE-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
3223 ; SKYLAKE-NEXT: retq # sched: [7:1.00]
3225 ; SKX-LABEL: test_roundss:
3227 ; SKX-NEXT: vrndscaless $7, %xmm1, %xmm0, %xmm1 # sched: [8:0.67]
3228 ; SKX-NEXT: vrndscaless $7, (%rdi), %xmm0, %xmm0 # sched: [14:0.67]
3229 ; SKX-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [4:0.33]
3230 ; SKX-NEXT: retq # sched: [7:1.00]
3232 ; BTVER2-LABEL: test_roundss:
3234 ; BTVER2-NEXT: vroundss $7, %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
3235 ; BTVER2-NEXT: vroundss $7, (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
3236 ; BTVER2-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
3237 ; BTVER2-NEXT: retq # sched: [4:1.00]
3239 ; ZNVER1-LABEL: test_roundss:
3241 ; ZNVER1-NEXT: vroundss $7, %xmm1, %xmm0, %xmm1 # sched: [4:1.00]
3242 ; ZNVER1-NEXT: vroundss $7, (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
3243 ; ZNVER1-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
3244 ; ZNVER1-NEXT: retq # sched: [1:0.50]
3245 %1 = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %a0, <4 x float> %a1, i32 7)
3246 %2 = load <4 x float>, <4 x float> *%a2, align 16
3247 %3 = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %a0, <4 x float> %2, i32 7)
3248 %4 = fadd <4 x float> %1, %3
3251 declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) nounwind readnone