1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse,-sse2 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SSE,CHECK-SSE1
3 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse,+sse2 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SSE,CHECK-SSE2
4 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+xop < %s | FileCheck %s --check-prefixes=CHECK,CHECK-XOP
6 ; ============================================================================ ;
7 ; Various cases with %x and/or %y being a constant
8 ; ============================================================================ ;
10 define <4 x i32> @out_constant_varx_mone(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
11 ; CHECK-SSE1-LABEL: out_constant_varx_mone:
12 ; CHECK-SSE1: # %bb.0:
13 ; CHECK-SSE1-NEXT: movq %rdi, %rax
14 ; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0
15 ; CHECK-SSE1-NEXT: movaps {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN]
16 ; CHECK-SSE1-NEXT: xorps %xmm0, %xmm1
17 ; CHECK-SSE1-NEXT: andps (%rsi), %xmm0
18 ; CHECK-SSE1-NEXT: orps %xmm1, %xmm0
19 ; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi)
20 ; CHECK-SSE1-NEXT: retq
22 ; CHECK-SSE2-LABEL: out_constant_varx_mone:
23 ; CHECK-SSE2: # %bb.0:
24 ; CHECK-SSE2-NEXT: movdqa (%rdx), %xmm0
25 ; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm1
26 ; CHECK-SSE2-NEXT: pxor %xmm0, %xmm1
27 ; CHECK-SSE2-NEXT: pand (%rdi), %xmm0
28 ; CHECK-SSE2-NEXT: por %xmm1, %xmm0
29 ; CHECK-SSE2-NEXT: retq
31 ; CHECK-XOP-LABEL: out_constant_varx_mone:
33 ; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm0
34 ; CHECK-XOP-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
35 ; CHECK-XOP-NEXT: vpxor %xmm1, %xmm0, %xmm1
36 ; CHECK-XOP-NEXT: vpand (%rdi), %xmm0, %xmm0
37 ; CHECK-XOP-NEXT: vpor %xmm1, %xmm0, %xmm0
38 ; CHECK-XOP-NEXT: retq
39 %x = load <4 x i32>, <4 x i32> *%px, align 16
40 %y = load <4 x i32>, <4 x i32> *%py, align 16
41 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16
42 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
43 %mx = and <4 x i32> %mask, %x
44 %my = and <4 x i32> %notmask, <i32 -1, i32 -1, i32 -1, i32 -1>
45 %r = or <4 x i32> %mx, %my
49 define <4 x i32> @in_constant_varx_mone(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
50 ; CHECK-SSE1-LABEL: in_constant_varx_mone:
51 ; CHECK-SSE1: # %bb.0:
52 ; CHECK-SSE1-NEXT: movq %rdi, %rax
53 ; CHECK-SSE1-NEXT: movaps (%rsi), %xmm0
54 ; CHECK-SSE1-NEXT: andnps (%rcx), %xmm0
55 ; CHECK-SSE1-NEXT: xorps {{.*}}(%rip), %xmm0
56 ; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi)
57 ; CHECK-SSE1-NEXT: retq
59 ; CHECK-SSE2-LABEL: in_constant_varx_mone:
60 ; CHECK-SSE2: # %bb.0:
61 ; CHECK-SSE2-NEXT: movdqa (%rdi), %xmm0
62 ; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm1
63 ; CHECK-SSE2-NEXT: pandn (%rdx), %xmm0
64 ; CHECK-SSE2-NEXT: pxor %xmm1, %xmm0
65 ; CHECK-SSE2-NEXT: retq
67 ; CHECK-XOP-LABEL: in_constant_varx_mone:
69 ; CHECK-XOP-NEXT: vmovdqa (%rdi), %xmm0
70 ; CHECK-XOP-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
71 ; CHECK-XOP-NEXT: vpandn (%rdx), %xmm0, %xmm0
72 ; CHECK-XOP-NEXT: vpxor %xmm1, %xmm0, %xmm0
73 ; CHECK-XOP-NEXT: retq
74 %x = load <4 x i32>, <4 x i32> *%px, align 16
75 %y = load <4 x i32>, <4 x i32> *%py, align 16
76 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16
77 %n0 = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> ; %x
78 %n1 = and <4 x i32> %n0, %mask
79 %r = xor <4 x i32> %n1, <i32 -1, i32 -1, i32 -1, i32 -1>
83 ; This is not a canonical form. Testing for completeness only.
84 define <4 x i32> @out_constant_varx_mone_invmask(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
85 ; CHECK-SSE1-LABEL: out_constant_varx_mone_invmask:
86 ; CHECK-SSE1: # %bb.0:
87 ; CHECK-SSE1-NEXT: movq %rdi, %rax
88 ; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0
89 ; CHECK-SSE1-NEXT: movaps %xmm0, %xmm1
90 ; CHECK-SSE1-NEXT: andnps (%rsi), %xmm1
91 ; CHECK-SSE1-NEXT: orps %xmm0, %xmm1
92 ; CHECK-SSE1-NEXT: movaps %xmm1, (%rdi)
93 ; CHECK-SSE1-NEXT: retq
95 ; CHECK-SSE2-LABEL: out_constant_varx_mone_invmask:
96 ; CHECK-SSE2: # %bb.0:
97 ; CHECK-SSE2-NEXT: movaps (%rdx), %xmm1
98 ; CHECK-SSE2-NEXT: movaps %xmm1, %xmm0
99 ; CHECK-SSE2-NEXT: andnps (%rdi), %xmm0
100 ; CHECK-SSE2-NEXT: orps %xmm1, %xmm0
101 ; CHECK-SSE2-NEXT: retq
103 ; CHECK-XOP-LABEL: out_constant_varx_mone_invmask:
104 ; CHECK-XOP: # %bb.0:
105 ; CHECK-XOP-NEXT: vmovaps (%rdx), %xmm0
106 ; CHECK-XOP-NEXT: vandnps (%rdi), %xmm0, %xmm1
107 ; CHECK-XOP-NEXT: vorps %xmm0, %xmm1, %xmm0
108 ; CHECK-XOP-NEXT: retq
109 %x = load <4 x i32>, <4 x i32> *%px, align 16
110 %y = load <4 x i32>, <4 x i32> *%py, align 16
111 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16
112 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
113 %mx = and <4 x i32> %notmask, %x
114 %my = and <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
115 %r = or <4 x i32> %mx, %my
119 ; This is not a canonical form. Testing for completeness only.
120 define <4 x i32> @in_constant_varx_mone_invmask(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
121 ; CHECK-SSE1-LABEL: in_constant_varx_mone_invmask:
122 ; CHECK-SSE1: # %bb.0:
123 ; CHECK-SSE1-NEXT: movq %rdi, %rax
124 ; CHECK-SSE1-NEXT: movaps (%rsi), %xmm0
125 ; CHECK-SSE1-NEXT: movaps {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN]
126 ; CHECK-SSE1-NEXT: movaps (%rcx), %xmm2
127 ; CHECK-SSE1-NEXT: xorps %xmm1, %xmm2
128 ; CHECK-SSE1-NEXT: andnps %xmm2, %xmm0
129 ; CHECK-SSE1-NEXT: xorps %xmm1, %xmm0
130 ; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi)
131 ; CHECK-SSE1-NEXT: retq
133 ; CHECK-SSE2-LABEL: in_constant_varx_mone_invmask:
134 ; CHECK-SSE2: # %bb.0:
135 ; CHECK-SSE2-NEXT: movdqa (%rdi), %xmm0
136 ; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm1
137 ; CHECK-SSE2-NEXT: movdqa (%rdx), %xmm2
138 ; CHECK-SSE2-NEXT: pxor %xmm1, %xmm2
139 ; CHECK-SSE2-NEXT: pandn %xmm2, %xmm0
140 ; CHECK-SSE2-NEXT: pxor %xmm1, %xmm0
141 ; CHECK-SSE2-NEXT: retq
143 ; CHECK-XOP-LABEL: in_constant_varx_mone_invmask:
144 ; CHECK-XOP: # %bb.0:
145 ; CHECK-XOP-NEXT: vmovdqa (%rdi), %xmm0
146 ; CHECK-XOP-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
147 ; CHECK-XOP-NEXT: vpxor (%rdx), %xmm1, %xmm2
148 ; CHECK-XOP-NEXT: vpandn %xmm2, %xmm0, %xmm0
149 ; CHECK-XOP-NEXT: vpxor %xmm1, %xmm0, %xmm0
150 ; CHECK-XOP-NEXT: retq
151 %x = load <4 x i32>, <4 x i32> *%px, align 16
152 %y = load <4 x i32>, <4 x i32> *%py, align 16
153 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16
154 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
155 %n0 = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> ; %x
156 %n1 = and <4 x i32> %n0, %notmask
157 %r = xor <4 x i32> %n1, <i32 -1, i32 -1, i32 -1, i32 -1>
161 define <4 x i32> @out_constant_varx_42(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
162 ; CHECK-SSE1-LABEL: out_constant_varx_42:
163 ; CHECK-SSE1: # %bb.0:
164 ; CHECK-SSE1-NEXT: movq %rdi, %rax
165 ; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0
166 ; CHECK-SSE1-NEXT: movaps (%rsi), %xmm1
167 ; CHECK-SSE1-NEXT: andps %xmm0, %xmm1
168 ; CHECK-SSE1-NEXT: andnps {{.*}}(%rip), %xmm0
169 ; CHECK-SSE1-NEXT: orps %xmm1, %xmm0
170 ; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi)
171 ; CHECK-SSE1-NEXT: retq
173 ; CHECK-SSE2-LABEL: out_constant_varx_42:
174 ; CHECK-SSE2: # %bb.0:
175 ; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0
176 ; CHECK-SSE2-NEXT: movaps (%rdi), %xmm1
177 ; CHECK-SSE2-NEXT: andps %xmm0, %xmm1
178 ; CHECK-SSE2-NEXT: andnps {{.*}}(%rip), %xmm0
179 ; CHECK-SSE2-NEXT: orps %xmm1, %xmm0
180 ; CHECK-SSE2-NEXT: retq
182 ; CHECK-XOP-LABEL: out_constant_varx_42:
183 ; CHECK-XOP: # %bb.0:
184 ; CHECK-XOP-NEXT: vmovdqa (%rdi), %xmm0
185 ; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm1
186 ; CHECK-XOP-NEXT: vpcmov %xmm1, {{.*}}(%rip), %xmm0, %xmm0
187 ; CHECK-XOP-NEXT: retq
188 %x = load <4 x i32>, <4 x i32> *%px, align 16
189 %y = load <4 x i32>, <4 x i32> *%py, align 16
190 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16
191 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
192 %mx = and <4 x i32> %mask, %x
193 %my = and <4 x i32> %notmask, <i32 42, i32 42, i32 42, i32 42>
194 %r = or <4 x i32> %mx, %my
198 define <4 x i32> @in_constant_varx_42(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
199 ; CHECK-SSE1-LABEL: in_constant_varx_42:
200 ; CHECK-SSE1: # %bb.0:
201 ; CHECK-SSE1-NEXT: movq %rdi, %rax
202 ; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0
203 ; CHECK-SSE1-NEXT: movaps (%rsi), %xmm1
204 ; CHECK-SSE1-NEXT: andps %xmm0, %xmm1
205 ; CHECK-SSE1-NEXT: andnps {{.*}}(%rip), %xmm0
206 ; CHECK-SSE1-NEXT: orps %xmm1, %xmm0
207 ; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi)
208 ; CHECK-SSE1-NEXT: retq
210 ; CHECK-SSE2-LABEL: in_constant_varx_42:
211 ; CHECK-SSE2: # %bb.0:
212 ; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0
213 ; CHECK-SSE2-NEXT: movaps (%rdi), %xmm1
214 ; CHECK-SSE2-NEXT: andps %xmm0, %xmm1
215 ; CHECK-SSE2-NEXT: andnps {{.*}}(%rip), %xmm0
216 ; CHECK-SSE2-NEXT: orps %xmm1, %xmm0
217 ; CHECK-SSE2-NEXT: retq
219 ; CHECK-XOP-LABEL: in_constant_varx_42:
220 ; CHECK-XOP: # %bb.0:
221 ; CHECK-XOP-NEXT: vmovdqa (%rdi), %xmm0
222 ; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm1
223 ; CHECK-XOP-NEXT: vpcmov %xmm1, {{.*}}(%rip), %xmm0, %xmm0
224 ; CHECK-XOP-NEXT: retq
225 %x = load <4 x i32>, <4 x i32> *%px, align 16
226 %y = load <4 x i32>, <4 x i32> *%py, align 16
227 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16
228 %n0 = xor <4 x i32> %x, <i32 42, i32 42, i32 42, i32 42> ; %x
229 %n1 = and <4 x i32> %n0, %mask
230 %r = xor <4 x i32> %n1, <i32 42, i32 42, i32 42, i32 42>
234 ; This is not a canonical form. Testing for completeness only.
235 define <4 x i32> @out_constant_varx_42_invmask(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
236 ; CHECK-SSE1-LABEL: out_constant_varx_42_invmask:
237 ; CHECK-SSE1: # %bb.0:
238 ; CHECK-SSE1-NEXT: movq %rdi, %rax
239 ; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0
240 ; CHECK-SSE1-NEXT: movaps %xmm0, %xmm1
241 ; CHECK-SSE1-NEXT: andnps (%rsi), %xmm1
242 ; CHECK-SSE1-NEXT: andps {{.*}}(%rip), %xmm0
243 ; CHECK-SSE1-NEXT: orps %xmm1, %xmm0
244 ; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi)
245 ; CHECK-SSE1-NEXT: retq
247 ; CHECK-SSE2-LABEL: out_constant_varx_42_invmask:
248 ; CHECK-SSE2: # %bb.0:
249 ; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0
250 ; CHECK-SSE2-NEXT: movaps %xmm0, %xmm1
251 ; CHECK-SSE2-NEXT: andnps (%rdi), %xmm1
252 ; CHECK-SSE2-NEXT: andps {{.*}}(%rip), %xmm0
253 ; CHECK-SSE2-NEXT: orps %xmm1, %xmm0
254 ; CHECK-SSE2-NEXT: retq
256 ; CHECK-XOP-LABEL: out_constant_varx_42_invmask:
257 ; CHECK-XOP: # %bb.0:
258 ; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm0
259 ; CHECK-XOP-NEXT: vmovdqa {{.*#+}} xmm1 = [42,42,42,42]
260 ; CHECK-XOP-NEXT: vpcmov %xmm0, (%rdi), %xmm1, %xmm0
261 ; CHECK-XOP-NEXT: retq
262 %x = load <4 x i32>, <4 x i32> *%px, align 16
263 %y = load <4 x i32>, <4 x i32> *%py, align 16
264 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16
265 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
266 %mx = and <4 x i32> %notmask, %x
267 %my = and <4 x i32> %mask, <i32 42, i32 42, i32 42, i32 42>
268 %r = or <4 x i32> %mx, %my
272 ; This is not a canonical form. Testing for completeness only.
273 define <4 x i32> @in_constant_varx_42_invmask(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
274 ; CHECK-SSE1-LABEL: in_constant_varx_42_invmask:
275 ; CHECK-SSE1: # %bb.0:
276 ; CHECK-SSE1-NEXT: movq %rdi, %rax
277 ; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0
278 ; CHECK-SSE1-NEXT: movaps %xmm0, %xmm1
279 ; CHECK-SSE1-NEXT: andnps (%rsi), %xmm1
280 ; CHECK-SSE1-NEXT: andps {{.*}}(%rip), %xmm0
281 ; CHECK-SSE1-NEXT: orps %xmm1, %xmm0
282 ; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi)
283 ; CHECK-SSE1-NEXT: retq
285 ; CHECK-SSE2-LABEL: in_constant_varx_42_invmask:
286 ; CHECK-SSE2: # %bb.0:
287 ; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0
288 ; CHECK-SSE2-NEXT: movaps %xmm0, %xmm1
289 ; CHECK-SSE2-NEXT: andnps (%rdi), %xmm1
290 ; CHECK-SSE2-NEXT: andps {{.*}}(%rip), %xmm0
291 ; CHECK-SSE2-NEXT: orps %xmm1, %xmm0
292 ; CHECK-SSE2-NEXT: retq
294 ; CHECK-XOP-LABEL: in_constant_varx_42_invmask:
295 ; CHECK-XOP: # %bb.0:
296 ; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm0
297 ; CHECK-XOP-NEXT: vmovdqa {{.*#+}} xmm1 = [42,42,42,42]
298 ; CHECK-XOP-NEXT: vpcmov %xmm0, (%rdi), %xmm1, %xmm0
299 ; CHECK-XOP-NEXT: retq
300 %x = load <4 x i32>, <4 x i32> *%px, align 16
301 %y = load <4 x i32>, <4 x i32> *%py, align 16
302 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16
303 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
304 %n0 = xor <4 x i32> %x, <i32 42, i32 42, i32 42, i32 42> ; %x
305 %n1 = and <4 x i32> %n0, %notmask
306 %r = xor <4 x i32> %n1, <i32 42, i32 42, i32 42, i32 42>
310 define <4 x i32> @out_constant_mone_vary(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
311 ; CHECK-SSE1-LABEL: out_constant_mone_vary:
312 ; CHECK-SSE1: # %bb.0:
313 ; CHECK-SSE1-NEXT: movq %rdi, %rax
314 ; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0
315 ; CHECK-SSE1-NEXT: movaps %xmm0, %xmm1
316 ; CHECK-SSE1-NEXT: andnps (%rdx), %xmm1
317 ; CHECK-SSE1-NEXT: orps %xmm0, %xmm1
318 ; CHECK-SSE1-NEXT: movaps %xmm1, (%rdi)
319 ; CHECK-SSE1-NEXT: retq
321 ; CHECK-SSE2-LABEL: out_constant_mone_vary:
322 ; CHECK-SSE2: # %bb.0:
323 ; CHECK-SSE2-NEXT: movaps (%rdx), %xmm1
324 ; CHECK-SSE2-NEXT: movaps %xmm1, %xmm0
325 ; CHECK-SSE2-NEXT: andnps (%rsi), %xmm0
326 ; CHECK-SSE2-NEXT: orps %xmm1, %xmm0
327 ; CHECK-SSE2-NEXT: retq
329 ; CHECK-XOP-LABEL: out_constant_mone_vary:
330 ; CHECK-XOP: # %bb.0:
331 ; CHECK-XOP-NEXT: vmovaps (%rdx), %xmm0
332 ; CHECK-XOP-NEXT: vandnps (%rsi), %xmm0, %xmm1
333 ; CHECK-XOP-NEXT: vorps %xmm1, %xmm0, %xmm0
334 ; CHECK-XOP-NEXT: retq
335 %x = load <4 x i32>, <4 x i32> *%px, align 16
336 %y = load <4 x i32>, <4 x i32> *%py, align 16
337 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16
338 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
339 %mx = and <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
340 %my = and <4 x i32> %notmask, %y
341 %r = or <4 x i32> %mx, %my
345 define <4 x i32> @in_constant_mone_vary(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
346 ; CHECK-SSE1-LABEL: in_constant_mone_vary:
347 ; CHECK-SSE1: # %bb.0:
348 ; CHECK-SSE1-NEXT: movq %rdi, %rax
349 ; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0
350 ; CHECK-SSE1-NEXT: movaps %xmm0, %xmm1
351 ; CHECK-SSE1-NEXT: andnps (%rdx), %xmm1
352 ; CHECK-SSE1-NEXT: orps %xmm0, %xmm1
353 ; CHECK-SSE1-NEXT: movaps %xmm1, (%rdi)
354 ; CHECK-SSE1-NEXT: retq
356 ; CHECK-SSE2-LABEL: in_constant_mone_vary:
357 ; CHECK-SSE2: # %bb.0:
358 ; CHECK-SSE2-NEXT: movaps (%rdx), %xmm1
359 ; CHECK-SSE2-NEXT: movaps %xmm1, %xmm0
360 ; CHECK-SSE2-NEXT: andnps (%rsi), %xmm0
361 ; CHECK-SSE2-NEXT: orps %xmm1, %xmm0
362 ; CHECK-SSE2-NEXT: retq
364 ; CHECK-XOP-LABEL: in_constant_mone_vary:
365 ; CHECK-XOP: # %bb.0:
366 ; CHECK-XOP-NEXT: vmovaps (%rdx), %xmm0
367 ; CHECK-XOP-NEXT: vandnps (%rsi), %xmm0, %xmm1
368 ; CHECK-XOP-NEXT: vorps %xmm1, %xmm0, %xmm0
369 ; CHECK-XOP-NEXT: retq
370 %x = load <4 x i32>, <4 x i32> *%px, align 16
371 %y = load <4 x i32>, <4 x i32> *%py, align 16
372 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16
373 %n0 = xor <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, %y ; %x
374 %n1 = and <4 x i32> %n0, %mask
375 %r = xor <4 x i32> %n1, %y
379 ; This is not a canonical form. Testing for completeness only.
380 define <4 x i32> @out_constant_mone_vary_invmask(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
381 ; CHECK-SSE1-LABEL: out_constant_mone_vary_invmask:
382 ; CHECK-SSE1: # %bb.0:
383 ; CHECK-SSE1-NEXT: movq %rdi, %rax
384 ; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0
385 ; CHECK-SSE1-NEXT: movaps {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN]
386 ; CHECK-SSE1-NEXT: xorps %xmm0, %xmm1
387 ; CHECK-SSE1-NEXT: andps (%rdx), %xmm0
388 ; CHECK-SSE1-NEXT: orps %xmm1, %xmm0
389 ; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi)
390 ; CHECK-SSE1-NEXT: retq
392 ; CHECK-SSE2-LABEL: out_constant_mone_vary_invmask:
393 ; CHECK-SSE2: # %bb.0:
394 ; CHECK-SSE2-NEXT: movdqa (%rdx), %xmm0
395 ; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm1
396 ; CHECK-SSE2-NEXT: pxor %xmm0, %xmm1
397 ; CHECK-SSE2-NEXT: pand (%rsi), %xmm0
398 ; CHECK-SSE2-NEXT: por %xmm1, %xmm0
399 ; CHECK-SSE2-NEXT: retq
401 ; CHECK-XOP-LABEL: out_constant_mone_vary_invmask:
402 ; CHECK-XOP: # %bb.0:
403 ; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm0
404 ; CHECK-XOP-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
405 ; CHECK-XOP-NEXT: vpxor %xmm1, %xmm0, %xmm1
406 ; CHECK-XOP-NEXT: vpand (%rsi), %xmm0, %xmm0
407 ; CHECK-XOP-NEXT: vpor %xmm0, %xmm1, %xmm0
408 ; CHECK-XOP-NEXT: retq
409 %x = load <4 x i32>, <4 x i32> *%px, align 16
410 %y = load <4 x i32>, <4 x i32> *%py, align 16
411 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16
412 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
413 %mx = and <4 x i32> %notmask, <i32 -1, i32 -1, i32 -1, i32 -1>
414 %my = and <4 x i32> %mask, %y
415 %r = or <4 x i32> %mx, %my
419 ; This is not a canonical form. Testing for completeness only.
420 define <4 x i32> @in_constant_mone_vary_invmask(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
421 ; CHECK-SSE1-LABEL: in_constant_mone_vary_invmask:
422 ; CHECK-SSE1: # %bb.0:
423 ; CHECK-SSE1-NEXT: movq %rdi, %rax
424 ; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0
425 ; CHECK-SSE1-NEXT: movaps {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN]
426 ; CHECK-SSE1-NEXT: xorps %xmm0, %xmm1
427 ; CHECK-SSE1-NEXT: andps (%rdx), %xmm0
428 ; CHECK-SSE1-NEXT: orps %xmm1, %xmm0
429 ; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi)
430 ; CHECK-SSE1-NEXT: retq
432 ; CHECK-SSE2-LABEL: in_constant_mone_vary_invmask:
433 ; CHECK-SSE2: # %bb.0:
434 ; CHECK-SSE2-NEXT: movdqa (%rdx), %xmm0
435 ; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm1
436 ; CHECK-SSE2-NEXT: pxor %xmm0, %xmm1
437 ; CHECK-SSE2-NEXT: pand (%rsi), %xmm0
438 ; CHECK-SSE2-NEXT: por %xmm1, %xmm0
439 ; CHECK-SSE2-NEXT: retq
441 ; CHECK-XOP-LABEL: in_constant_mone_vary_invmask:
442 ; CHECK-XOP: # %bb.0:
443 ; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm0
444 ; CHECK-XOP-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
445 ; CHECK-XOP-NEXT: vpxor %xmm1, %xmm0, %xmm1
446 ; CHECK-XOP-NEXT: vpand (%rsi), %xmm0, %xmm0
447 ; CHECK-XOP-NEXT: vpor %xmm0, %xmm1, %xmm0
448 ; CHECK-XOP-NEXT: retq
449 %x = load <4 x i32>, <4 x i32> *%px, align 16
450 %y = load <4 x i32>, <4 x i32> *%py, align 16
451 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16
452 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
453 %n0 = xor <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, %y ; %x
454 %n1 = and <4 x i32> %n0, %notmask
455 %r = xor <4 x i32> %n1, %y
459 define <4 x i32> @out_constant_42_vary(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
460 ; CHECK-SSE1-LABEL: out_constant_42_vary:
461 ; CHECK-SSE1: # %bb.0:
462 ; CHECK-SSE1-NEXT: movq %rdi, %rax
463 ; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0
464 ; CHECK-SSE1-NEXT: movaps {{.*#+}} xmm1 = [5.88545355E-44,5.88545355E-44,5.88545355E-44,5.88545355E-44]
465 ; CHECK-SSE1-NEXT: andps %xmm0, %xmm1
466 ; CHECK-SSE1-NEXT: andnps (%rdx), %xmm0
467 ; CHECK-SSE1-NEXT: orps %xmm1, %xmm0
468 ; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi)
469 ; CHECK-SSE1-NEXT: retq
471 ; CHECK-SSE2-LABEL: out_constant_42_vary:
472 ; CHECK-SSE2: # %bb.0:
473 ; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0
474 ; CHECK-SSE2-NEXT: movaps {{.*#+}} xmm1 = [42,42,42,42]
475 ; CHECK-SSE2-NEXT: andps %xmm0, %xmm1
476 ; CHECK-SSE2-NEXT: andnps (%rsi), %xmm0
477 ; CHECK-SSE2-NEXT: orps %xmm1, %xmm0
478 ; CHECK-SSE2-NEXT: retq
480 ; CHECK-XOP-LABEL: out_constant_42_vary:
481 ; CHECK-XOP: # %bb.0:
482 ; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm0
483 ; CHECK-XOP-NEXT: vmovdqa {{.*#+}} xmm1 = [42,42,42,42]
484 ; CHECK-XOP-NEXT: vpcmov %xmm0, (%rsi), %xmm1, %xmm0
485 ; CHECK-XOP-NEXT: retq
486 %x = load <4 x i32>, <4 x i32> *%px, align 16
487 %y = load <4 x i32>, <4 x i32> *%py, align 16
488 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16
489 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
490 %mx = and <4 x i32> %mask, <i32 42, i32 42, i32 42, i32 42>
491 %my = and <4 x i32> %notmask, %y
492 %r = or <4 x i32> %mx, %my
496 define <4 x i32> @in_constant_42_vary(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
497 ; CHECK-SSE1-LABEL: in_constant_42_vary:
498 ; CHECK-SSE1: # %bb.0:
499 ; CHECK-SSE1-NEXT: movq %rdi, %rax
500 ; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0
501 ; CHECK-SSE1-NEXT: movaps %xmm0, %xmm1
502 ; CHECK-SSE1-NEXT: andnps (%rdx), %xmm1
503 ; CHECK-SSE1-NEXT: andps {{.*}}(%rip), %xmm0
504 ; CHECK-SSE1-NEXT: orps %xmm1, %xmm0
505 ; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi)
506 ; CHECK-SSE1-NEXT: retq
508 ; CHECK-SSE2-LABEL: in_constant_42_vary:
509 ; CHECK-SSE2: # %bb.0:
510 ; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0
511 ; CHECK-SSE2-NEXT: movaps %xmm0, %xmm1
512 ; CHECK-SSE2-NEXT: andnps (%rsi), %xmm1
513 ; CHECK-SSE2-NEXT: andps {{.*}}(%rip), %xmm0
514 ; CHECK-SSE2-NEXT: orps %xmm1, %xmm0
515 ; CHECK-SSE2-NEXT: retq
517 ; CHECK-XOP-LABEL: in_constant_42_vary:
518 ; CHECK-XOP: # %bb.0:
519 ; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm0
520 ; CHECK-XOP-NEXT: vmovdqa {{.*#+}} xmm1 = [42,42,42,42]
521 ; CHECK-XOP-NEXT: vpcmov %xmm0, (%rsi), %xmm1, %xmm0
522 ; CHECK-XOP-NEXT: retq
523 %x = load <4 x i32>, <4 x i32> *%px, align 16
524 %y = load <4 x i32>, <4 x i32> *%py, align 16
525 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16
526 %n0 = xor <4 x i32> <i32 42, i32 42, i32 42, i32 42>, %y ; %x
527 %n1 = and <4 x i32> %n0, %mask
528 %r = xor <4 x i32> %n1, %y
532 ; This is not a canonical form. Testing for completeness only.
533 define <4 x i32> @out_constant_42_vary_invmask(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
534 ; CHECK-SSE1-LABEL: out_constant_42_vary_invmask:
535 ; CHECK-SSE1: # %bb.0:
536 ; CHECK-SSE1-NEXT: movq %rdi, %rax
537 ; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0
538 ; CHECK-SSE1-NEXT: movaps %xmm0, %xmm1
539 ; CHECK-SSE1-NEXT: andnps {{.*}}(%rip), %xmm1
540 ; CHECK-SSE1-NEXT: andps (%rdx), %xmm0
541 ; CHECK-SSE1-NEXT: orps %xmm1, %xmm0
542 ; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi)
543 ; CHECK-SSE1-NEXT: retq
545 ; CHECK-SSE2-LABEL: out_constant_42_vary_invmask:
546 ; CHECK-SSE2: # %bb.0:
547 ; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0
548 ; CHECK-SSE2-NEXT: movaps %xmm0, %xmm1
549 ; CHECK-SSE2-NEXT: andnps {{.*}}(%rip), %xmm1
550 ; CHECK-SSE2-NEXT: andps (%rsi), %xmm0
551 ; CHECK-SSE2-NEXT: orps %xmm1, %xmm0
552 ; CHECK-SSE2-NEXT: retq
554 ; CHECK-XOP-LABEL: out_constant_42_vary_invmask:
555 ; CHECK-XOP: # %bb.0:
556 ; CHECK-XOP-NEXT: vmovdqa (%rsi), %xmm0
557 ; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm1
558 ; CHECK-XOP-NEXT: vpcmov %xmm1, {{.*}}(%rip), %xmm0, %xmm0
559 ; CHECK-XOP-NEXT: retq
560 %x = load <4 x i32>, <4 x i32> *%px, align 16
561 %y = load <4 x i32>, <4 x i32> *%py, align 16
562 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16
563 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
564 %mx = and <4 x i32> %notmask, <i32 42, i32 42, i32 42, i32 42>
565 %my = and <4 x i32> %mask, %y
566 %r = or <4 x i32> %mx, %my
570 ; This is not a canonical form. Testing for completeness only.
571 define <4 x i32> @in_constant_42_vary_invmask(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
572 ; CHECK-SSE1-LABEL: in_constant_42_vary_invmask:
573 ; CHECK-SSE1: # %bb.0:
574 ; CHECK-SSE1-NEXT: movq %rdi, %rax
575 ; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0
576 ; CHECK-SSE1-NEXT: movaps (%rdx), %xmm1
577 ; CHECK-SSE1-NEXT: andps %xmm0, %xmm1
578 ; CHECK-SSE1-NEXT: andnps {{.*}}(%rip), %xmm0
579 ; CHECK-SSE1-NEXT: orps %xmm1, %xmm0
580 ; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi)
581 ; CHECK-SSE1-NEXT: retq
583 ; CHECK-SSE2-LABEL: in_constant_42_vary_invmask:
584 ; CHECK-SSE2: # %bb.0:
585 ; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0
586 ; CHECK-SSE2-NEXT: movaps (%rsi), %xmm1
587 ; CHECK-SSE2-NEXT: andps %xmm0, %xmm1
588 ; CHECK-SSE2-NEXT: andnps {{.*}}(%rip), %xmm0
589 ; CHECK-SSE2-NEXT: orps %xmm1, %xmm0
590 ; CHECK-SSE2-NEXT: retq
592 ; CHECK-XOP-LABEL: in_constant_42_vary_invmask:
593 ; CHECK-XOP: # %bb.0:
594 ; CHECK-XOP-NEXT: vmovdqa (%rsi), %xmm0
595 ; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm1
596 ; CHECK-XOP-NEXT: vpcmov %xmm1, {{.*}}(%rip), %xmm0, %xmm0
597 ; CHECK-XOP-NEXT: retq
598 %x = load <4 x i32>, <4 x i32> *%px, align 16
599 %y = load <4 x i32>, <4 x i32> *%py, align 16
600 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16
601 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
602 %n0 = xor <4 x i32> <i32 42, i32 42, i32 42, i32 42>, %y ; %x
603 %n1 = and <4 x i32> %n0, %notmask
604 %r = xor <4 x i32> %n1, %y