1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse,-sse2 < %s | FileCheck %s --check-prefix=CHECK-SSE1
3 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse,+sse2 < %s | FileCheck %s --check-prefix=CHECK-SSE2
4 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+xop < %s | FileCheck %s --check-prefix=CHECK-XOP
6 ; ============================================================================ ;
7 ; Various cases with %x and/or %y being a constant
8 ; ============================================================================ ;
10 define <4 x i32> @out_constant_varx_mone(ptr%px, ptr%py, ptr%pmask) {
11 ; CHECK-SSE1-LABEL: out_constant_varx_mone:
12 ; CHECK-SSE1: # %bb.0:
13 ; CHECK-SSE1-NEXT: movq %rdi, %rax
14 ; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0
15 ; CHECK-SSE1-NEXT: movaps {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN]
16 ; CHECK-SSE1-NEXT: xorps %xmm0, %xmm1
17 ; CHECK-SSE1-NEXT: andps (%rsi), %xmm0
18 ; CHECK-SSE1-NEXT: orps %xmm1, %xmm0
19 ; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi)
20 ; CHECK-SSE1-NEXT: retq
22 ; CHECK-SSE2-LABEL: out_constant_varx_mone:
23 ; CHECK-SSE2: # %bb.0:
24 ; CHECK-SSE2-NEXT: movdqa (%rdx), %xmm0
25 ; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm1
26 ; CHECK-SSE2-NEXT: pxor %xmm0, %xmm1
27 ; CHECK-SSE2-NEXT: pand (%rdi), %xmm0
28 ; CHECK-SSE2-NEXT: por %xmm1, %xmm0
29 ; CHECK-SSE2-NEXT: retq
31 ; CHECK-XOP-LABEL: out_constant_varx_mone:
33 ; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm0
34 ; CHECK-XOP-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
35 ; CHECK-XOP-NEXT: vpxor %xmm1, %xmm0, %xmm1
36 ; CHECK-XOP-NEXT: vpand (%rdi), %xmm0, %xmm0
37 ; CHECK-XOP-NEXT: vpor %xmm1, %xmm0, %xmm0
38 ; CHECK-XOP-NEXT: retq
39 %x = load <4 x i32>, ptr%px, align 16
40 %y = load <4 x i32>, ptr%py, align 16
41 %mask = load <4 x i32>, ptr%pmask, align 16
42 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
43 %mx = and <4 x i32> %mask, %x
44 %my = and <4 x i32> %notmask, <i32 -1, i32 -1, i32 -1, i32 -1>
45 %r = or <4 x i32> %mx, %my
49 define <4 x i32> @in_constant_varx_mone(ptr%px, ptr%py, ptr%pmask) {
50 ; CHECK-SSE1-LABEL: in_constant_varx_mone:
51 ; CHECK-SSE1: # %bb.0:
52 ; CHECK-SSE1-NEXT: movq %rdi, %rax
53 ; CHECK-SSE1-NEXT: movaps (%rsi), %xmm0
54 ; CHECK-SSE1-NEXT: andnps (%rcx), %xmm0
55 ; CHECK-SSE1-NEXT: xorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
56 ; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi)
57 ; CHECK-SSE1-NEXT: retq
59 ; CHECK-SSE2-LABEL: in_constant_varx_mone:
60 ; CHECK-SSE2: # %bb.0:
61 ; CHECK-SSE2-NEXT: movdqa (%rdi), %xmm0
62 ; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm1
63 ; CHECK-SSE2-NEXT: pandn (%rdx), %xmm0
64 ; CHECK-SSE2-NEXT: pxor %xmm1, %xmm0
65 ; CHECK-SSE2-NEXT: retq
67 ; CHECK-XOP-LABEL: in_constant_varx_mone:
69 ; CHECK-XOP-NEXT: vmovdqa (%rdi), %xmm0
70 ; CHECK-XOP-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
71 ; CHECK-XOP-NEXT: vpandn (%rdx), %xmm0, %xmm0
72 ; CHECK-XOP-NEXT: vpxor %xmm1, %xmm0, %xmm0
73 ; CHECK-XOP-NEXT: retq
74 %x = load <4 x i32>, ptr%px, align 16
75 %y = load <4 x i32>, ptr%py, align 16
76 %mask = load <4 x i32>, ptr%pmask, align 16
77 %n0 = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> ; %x
78 %n1 = and <4 x i32> %n0, %mask
79 %r = xor <4 x i32> %n1, <i32 -1, i32 -1, i32 -1, i32 -1>
83 ; This is not a canonical form. Testing for completeness only.
84 define <4 x i32> @out_constant_varx_mone_invmask(ptr%px, ptr%py, ptr%pmask) {
85 ; CHECK-SSE1-LABEL: out_constant_varx_mone_invmask:
86 ; CHECK-SSE1: # %bb.0:
87 ; CHECK-SSE1-NEXT: movq %rdi, %rax
88 ; CHECK-SSE1-NEXT: movaps (%rsi), %xmm0
89 ; CHECK-SSE1-NEXT: orps (%rcx), %xmm0
90 ; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi)
91 ; CHECK-SSE1-NEXT: retq
93 ; CHECK-SSE2-LABEL: out_constant_varx_mone_invmask:
94 ; CHECK-SSE2: # %bb.0:
95 ; CHECK-SSE2-NEXT: movaps (%rdi), %xmm0
96 ; CHECK-SSE2-NEXT: orps (%rdx), %xmm0
97 ; CHECK-SSE2-NEXT: retq
99 ; CHECK-XOP-LABEL: out_constant_varx_mone_invmask:
100 ; CHECK-XOP: # %bb.0:
101 ; CHECK-XOP-NEXT: vmovaps (%rdi), %xmm0
102 ; CHECK-XOP-NEXT: vorps (%rdx), %xmm0, %xmm0
103 ; CHECK-XOP-NEXT: retq
104 %x = load <4 x i32>, ptr%px, align 16
105 %y = load <4 x i32>, ptr%py, align 16
106 %mask = load <4 x i32>, ptr%pmask, align 16
107 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
108 %mx = and <4 x i32> %notmask, %x
109 %my = and <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
110 %r = or <4 x i32> %mx, %my
114 ; This is not a canonical form. Testing for completeness only.
115 define <4 x i32> @in_constant_varx_mone_invmask(ptr%px, ptr%py, ptr%pmask) {
116 ; CHECK-SSE1-LABEL: in_constant_varx_mone_invmask:
117 ; CHECK-SSE1: # %bb.0:
118 ; CHECK-SSE1-NEXT: movq %rdi, %rax
119 ; CHECK-SSE1-NEXT: movaps (%rsi), %xmm0
120 ; CHECK-SSE1-NEXT: movaps {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN]
121 ; CHECK-SSE1-NEXT: movaps (%rcx), %xmm2
122 ; CHECK-SSE1-NEXT: xorps %xmm1, %xmm2
123 ; CHECK-SSE1-NEXT: andnps %xmm2, %xmm0
124 ; CHECK-SSE1-NEXT: xorps %xmm1, %xmm0
125 ; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi)
126 ; CHECK-SSE1-NEXT: retq
128 ; CHECK-SSE2-LABEL: in_constant_varx_mone_invmask:
129 ; CHECK-SSE2: # %bb.0:
130 ; CHECK-SSE2-NEXT: movaps (%rdi), %xmm0
131 ; CHECK-SSE2-NEXT: orps (%rdx), %xmm0
132 ; CHECK-SSE2-NEXT: retq
134 ; CHECK-XOP-LABEL: in_constant_varx_mone_invmask:
135 ; CHECK-XOP: # %bb.0:
136 ; CHECK-XOP-NEXT: vmovaps (%rdi), %xmm0
137 ; CHECK-XOP-NEXT: vorps (%rdx), %xmm0, %xmm0
138 ; CHECK-XOP-NEXT: retq
139 %x = load <4 x i32>, ptr%px, align 16
140 %y = load <4 x i32>, ptr%py, align 16
141 %mask = load <4 x i32>, ptr%pmask, align 16
142 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
143 %n0 = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> ; %x
144 %n1 = and <4 x i32> %n0, %notmask
145 %r = xor <4 x i32> %n1, <i32 -1, i32 -1, i32 -1, i32 -1>
149 define <4 x i32> @out_constant_varx_42(ptr%px, ptr%py, ptr%pmask) {
150 ; CHECK-SSE1-LABEL: out_constant_varx_42:
151 ; CHECK-SSE1: # %bb.0:
152 ; CHECK-SSE1-NEXT: movq %rdi, %rax
153 ; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0
154 ; CHECK-SSE1-NEXT: movaps (%rsi), %xmm1
155 ; CHECK-SSE1-NEXT: andps %xmm0, %xmm1
156 ; CHECK-SSE1-NEXT: andnps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
157 ; CHECK-SSE1-NEXT: orps %xmm1, %xmm0
158 ; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi)
159 ; CHECK-SSE1-NEXT: retq
161 ; CHECK-SSE2-LABEL: out_constant_varx_42:
162 ; CHECK-SSE2: # %bb.0:
163 ; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0
164 ; CHECK-SSE2-NEXT: movaps (%rdi), %xmm1
165 ; CHECK-SSE2-NEXT: andps %xmm0, %xmm1
166 ; CHECK-SSE2-NEXT: andnps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
167 ; CHECK-SSE2-NEXT: orps %xmm1, %xmm0
168 ; CHECK-SSE2-NEXT: retq
170 ; CHECK-XOP-LABEL: out_constant_varx_42:
171 ; CHECK-XOP: # %bb.0:
172 ; CHECK-XOP-NEXT: vmovdqa (%rdi), %xmm0
173 ; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm1
174 ; CHECK-XOP-NEXT: vpcmov %xmm1, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
175 ; CHECK-XOP-NEXT: retq
176 %x = load <4 x i32>, ptr%px, align 16
177 %y = load <4 x i32>, ptr%py, align 16
178 %mask = load <4 x i32>, ptr%pmask, align 16
179 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
180 %mx = and <4 x i32> %mask, %x
181 %my = and <4 x i32> %notmask, <i32 42, i32 42, i32 42, i32 42>
182 %r = or <4 x i32> %mx, %my
186 define <4 x i32> @in_constant_varx_42(ptr%px, ptr%py, ptr%pmask) {
187 ; CHECK-SSE1-LABEL: in_constant_varx_42:
188 ; CHECK-SSE1: # %bb.0:
189 ; CHECK-SSE1-NEXT: movq %rdi, %rax
190 ; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0
191 ; CHECK-SSE1-NEXT: movaps (%rsi), %xmm1
192 ; CHECK-SSE1-NEXT: andps %xmm0, %xmm1
193 ; CHECK-SSE1-NEXT: andnps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
194 ; CHECK-SSE1-NEXT: orps %xmm1, %xmm0
195 ; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi)
196 ; CHECK-SSE1-NEXT: retq
198 ; CHECK-SSE2-LABEL: in_constant_varx_42:
199 ; CHECK-SSE2: # %bb.0:
200 ; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0
201 ; CHECK-SSE2-NEXT: movaps (%rdi), %xmm1
202 ; CHECK-SSE2-NEXT: andps %xmm0, %xmm1
203 ; CHECK-SSE2-NEXT: andnps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
204 ; CHECK-SSE2-NEXT: orps %xmm1, %xmm0
205 ; CHECK-SSE2-NEXT: retq
207 ; CHECK-XOP-LABEL: in_constant_varx_42:
208 ; CHECK-XOP: # %bb.0:
209 ; CHECK-XOP-NEXT: vmovdqa (%rdi), %xmm0
210 ; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm1
211 ; CHECK-XOP-NEXT: vpcmov %xmm1, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
212 ; CHECK-XOP-NEXT: retq
213 %x = load <4 x i32>, ptr%px, align 16
214 %y = load <4 x i32>, ptr%py, align 16
215 %mask = load <4 x i32>, ptr%pmask, align 16
216 %n0 = xor <4 x i32> %x, <i32 42, i32 42, i32 42, i32 42> ; %x
217 %n1 = and <4 x i32> %n0, %mask
218 %r = xor <4 x i32> %n1, <i32 42, i32 42, i32 42, i32 42>
222 ; This is not a canonical form. Testing for completeness only.
223 define <4 x i32> @out_constant_varx_42_invmask(ptr%px, ptr%py, ptr%pmask) {
224 ; CHECK-SSE1-LABEL: out_constant_varx_42_invmask:
225 ; CHECK-SSE1: # %bb.0:
226 ; CHECK-SSE1-NEXT: movq %rdi, %rax
227 ; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0
228 ; CHECK-SSE1-NEXT: movaps %xmm0, %xmm1
229 ; CHECK-SSE1-NEXT: andnps (%rsi), %xmm1
230 ; CHECK-SSE1-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
231 ; CHECK-SSE1-NEXT: orps %xmm1, %xmm0
232 ; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi)
233 ; CHECK-SSE1-NEXT: retq
235 ; CHECK-SSE2-LABEL: out_constant_varx_42_invmask:
236 ; CHECK-SSE2: # %bb.0:
237 ; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0
238 ; CHECK-SSE2-NEXT: movaps %xmm0, %xmm1
239 ; CHECK-SSE2-NEXT: andnps (%rdi), %xmm1
240 ; CHECK-SSE2-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
241 ; CHECK-SSE2-NEXT: orps %xmm1, %xmm0
242 ; CHECK-SSE2-NEXT: retq
244 ; CHECK-XOP-LABEL: out_constant_varx_42_invmask:
245 ; CHECK-XOP: # %bb.0:
246 ; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm0
247 ; CHECK-XOP-NEXT: vbroadcastss {{.*#+}} xmm1 = [42,42,42,42]
248 ; CHECK-XOP-NEXT: vpcmov %xmm0, (%rdi), %xmm1, %xmm0
249 ; CHECK-XOP-NEXT: retq
250 %x = load <4 x i32>, ptr%px, align 16
251 %y = load <4 x i32>, ptr%py, align 16
252 %mask = load <4 x i32>, ptr%pmask, align 16
253 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
254 %mx = and <4 x i32> %notmask, %x
255 %my = and <4 x i32> %mask, <i32 42, i32 42, i32 42, i32 42>
256 %r = or <4 x i32> %mx, %my
260 ; This is not a canonical form. Testing for completeness only.
261 define <4 x i32> @in_constant_varx_42_invmask(ptr%px, ptr%py, ptr%pmask) {
262 ; CHECK-SSE1-LABEL: in_constant_varx_42_invmask:
263 ; CHECK-SSE1: # %bb.0:
264 ; CHECK-SSE1-NEXT: movq %rdi, %rax
265 ; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0
266 ; CHECK-SSE1-NEXT: movaps %xmm0, %xmm1
267 ; CHECK-SSE1-NEXT: andnps (%rsi), %xmm1
268 ; CHECK-SSE1-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
269 ; CHECK-SSE1-NEXT: orps %xmm1, %xmm0
270 ; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi)
271 ; CHECK-SSE1-NEXT: retq
273 ; CHECK-SSE2-LABEL: in_constant_varx_42_invmask:
274 ; CHECK-SSE2: # %bb.0:
275 ; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0
276 ; CHECK-SSE2-NEXT: movaps %xmm0, %xmm1
277 ; CHECK-SSE2-NEXT: andnps (%rdi), %xmm1
278 ; CHECK-SSE2-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
279 ; CHECK-SSE2-NEXT: orps %xmm1, %xmm0
280 ; CHECK-SSE2-NEXT: retq
282 ; CHECK-XOP-LABEL: in_constant_varx_42_invmask:
283 ; CHECK-XOP: # %bb.0:
284 ; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm0
285 ; CHECK-XOP-NEXT: vbroadcastss {{.*#+}} xmm1 = [42,42,42,42]
286 ; CHECK-XOP-NEXT: vpcmov %xmm0, (%rdi), %xmm1, %xmm0
287 ; CHECK-XOP-NEXT: retq
288 %x = load <4 x i32>, ptr%px, align 16
289 %y = load <4 x i32>, ptr%py, align 16
290 %mask = load <4 x i32>, ptr%pmask, align 16
291 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
292 %n0 = xor <4 x i32> %x, <i32 42, i32 42, i32 42, i32 42> ; %x
293 %n1 = and <4 x i32> %n0, %notmask
294 %r = xor <4 x i32> %n1, <i32 42, i32 42, i32 42, i32 42>
298 define <4 x i32> @out_constant_mone_vary(ptr%px, ptr%py, ptr%pmask) {
299 ; CHECK-SSE1-LABEL: out_constant_mone_vary:
300 ; CHECK-SSE1: # %bb.0:
301 ; CHECK-SSE1-NEXT: movq %rdi, %rax
302 ; CHECK-SSE1-NEXT: movaps (%rdx), %xmm0
303 ; CHECK-SSE1-NEXT: orps (%rcx), %xmm0
304 ; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi)
305 ; CHECK-SSE1-NEXT: retq
307 ; CHECK-SSE2-LABEL: out_constant_mone_vary:
308 ; CHECK-SSE2: # %bb.0:
309 ; CHECK-SSE2-NEXT: movaps (%rsi), %xmm0
310 ; CHECK-SSE2-NEXT: orps (%rdx), %xmm0
311 ; CHECK-SSE2-NEXT: retq
313 ; CHECK-XOP-LABEL: out_constant_mone_vary:
314 ; CHECK-XOP: # %bb.0:
315 ; CHECK-XOP-NEXT: vmovaps (%rsi), %xmm0
316 ; CHECK-XOP-NEXT: vorps (%rdx), %xmm0, %xmm0
317 ; CHECK-XOP-NEXT: retq
318 %x = load <4 x i32>, ptr%px, align 16
319 %y = load <4 x i32>, ptr%py, align 16
320 %mask = load <4 x i32>, ptr%pmask, align 16
321 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
322 %mx = and <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
323 %my = and <4 x i32> %notmask, %y
324 %r = or <4 x i32> %mx, %my
328 define <4 x i32> @in_constant_mone_vary(ptr%px, ptr%py, ptr%pmask) {
329 ; CHECK-SSE1-LABEL: in_constant_mone_vary:
330 ; CHECK-SSE1: # %bb.0:
331 ; CHECK-SSE1-NEXT: movq %rdi, %rax
332 ; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0
333 ; CHECK-SSE1-NEXT: orps (%rdx), %xmm0
334 ; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi)
335 ; CHECK-SSE1-NEXT: retq
337 ; CHECK-SSE2-LABEL: in_constant_mone_vary:
338 ; CHECK-SSE2: # %bb.0:
339 ; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0
340 ; CHECK-SSE2-NEXT: orps (%rsi), %xmm0
341 ; CHECK-SSE2-NEXT: retq
343 ; CHECK-XOP-LABEL: in_constant_mone_vary:
344 ; CHECK-XOP: # %bb.0:
345 ; CHECK-XOP-NEXT: vmovaps (%rdx), %xmm0
346 ; CHECK-XOP-NEXT: vorps (%rsi), %xmm0, %xmm0
347 ; CHECK-XOP-NEXT: retq
348 %x = load <4 x i32>, ptr%px, align 16
349 %y = load <4 x i32>, ptr%py, align 16
350 %mask = load <4 x i32>, ptr%pmask, align 16
351 %n0 = xor <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, %y ; %x
352 %n1 = and <4 x i32> %n0, %mask
353 %r = xor <4 x i32> %n1, %y
357 ; This is not a canonical form. Testing for completeness only.
358 define <4 x i32> @out_constant_mone_vary_invmask(ptr%px, ptr%py, ptr%pmask) {
359 ; CHECK-SSE1-LABEL: out_constant_mone_vary_invmask:
360 ; CHECK-SSE1: # %bb.0:
361 ; CHECK-SSE1-NEXT: movq %rdi, %rax
362 ; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0
363 ; CHECK-SSE1-NEXT: movaps {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN]
364 ; CHECK-SSE1-NEXT: xorps %xmm0, %xmm1
365 ; CHECK-SSE1-NEXT: andps (%rdx), %xmm0
366 ; CHECK-SSE1-NEXT: orps %xmm1, %xmm0
367 ; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi)
368 ; CHECK-SSE1-NEXT: retq
370 ; CHECK-SSE2-LABEL: out_constant_mone_vary_invmask:
371 ; CHECK-SSE2: # %bb.0:
372 ; CHECK-SSE2-NEXT: movdqa (%rdx), %xmm0
373 ; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm1
374 ; CHECK-SSE2-NEXT: pxor %xmm0, %xmm1
375 ; CHECK-SSE2-NEXT: pand (%rsi), %xmm0
376 ; CHECK-SSE2-NEXT: por %xmm1, %xmm0
377 ; CHECK-SSE2-NEXT: retq
379 ; CHECK-XOP-LABEL: out_constant_mone_vary_invmask:
380 ; CHECK-XOP: # %bb.0:
381 ; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm0
382 ; CHECK-XOP-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
383 ; CHECK-XOP-NEXT: vpxor %xmm1, %xmm0, %xmm1
384 ; CHECK-XOP-NEXT: vpand (%rsi), %xmm0, %xmm0
385 ; CHECK-XOP-NEXT: vpor %xmm0, %xmm1, %xmm0
386 ; CHECK-XOP-NEXT: retq
387 %x = load <4 x i32>, ptr%px, align 16
388 %y = load <4 x i32>, ptr%py, align 16
389 %mask = load <4 x i32>, ptr%pmask, align 16
390 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
391 %mx = and <4 x i32> %notmask, <i32 -1, i32 -1, i32 -1, i32 -1>
392 %my = and <4 x i32> %mask, %y
393 %r = or <4 x i32> %mx, %my
397 ; This is not a canonical form. Testing for completeness only.
398 define <4 x i32> @in_constant_mone_vary_invmask(ptr%px, ptr%py, ptr%pmask) {
399 ; CHECK-SSE1-LABEL: in_constant_mone_vary_invmask:
400 ; CHECK-SSE1: # %bb.0:
401 ; CHECK-SSE1-NEXT: movq %rdi, %rax
402 ; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0
403 ; CHECK-SSE1-NEXT: xorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
404 ; CHECK-SSE1-NEXT: orps (%rdx), %xmm0
405 ; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi)
406 ; CHECK-SSE1-NEXT: retq
408 ; CHECK-SSE2-LABEL: in_constant_mone_vary_invmask:
409 ; CHECK-SSE2: # %bb.0:
410 ; CHECK-SSE2-NEXT: pcmpeqd %xmm0, %xmm0
411 ; CHECK-SSE2-NEXT: pxor (%rdx), %xmm0
412 ; CHECK-SSE2-NEXT: por (%rsi), %xmm0
413 ; CHECK-SSE2-NEXT: retq
415 ; CHECK-XOP-LABEL: in_constant_mone_vary_invmask:
416 ; CHECK-XOP: # %bb.0:
417 ; CHECK-XOP-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
418 ; CHECK-XOP-NEXT: vpxor (%rdx), %xmm0, %xmm0
419 ; CHECK-XOP-NEXT: vpor (%rsi), %xmm0, %xmm0
420 ; CHECK-XOP-NEXT: retq
421 %x = load <4 x i32>, ptr%px, align 16
422 %y = load <4 x i32>, ptr%py, align 16
423 %mask = load <4 x i32>, ptr%pmask, align 16
424 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
425 %n0 = xor <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, %y ; %x
426 %n1 = and <4 x i32> %n0, %notmask
427 %r = xor <4 x i32> %n1, %y
431 define <4 x i32> @out_constant_42_vary(ptr%px, ptr%py, ptr%pmask) {
432 ; CHECK-SSE1-LABEL: out_constant_42_vary:
433 ; CHECK-SSE1: # %bb.0:
434 ; CHECK-SSE1-NEXT: movq %rdi, %rax
435 ; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0
436 ; CHECK-SSE1-NEXT: movaps {{.*#+}} xmm1 = [5.88545355E-44,5.88545355E-44,5.88545355E-44,5.88545355E-44]
437 ; CHECK-SSE1-NEXT: andps %xmm0, %xmm1
438 ; CHECK-SSE1-NEXT: andnps (%rdx), %xmm0
439 ; CHECK-SSE1-NEXT: orps %xmm1, %xmm0
440 ; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi)
441 ; CHECK-SSE1-NEXT: retq
443 ; CHECK-SSE2-LABEL: out_constant_42_vary:
444 ; CHECK-SSE2: # %bb.0:
445 ; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0
446 ; CHECK-SSE2-NEXT: movaps {{.*#+}} xmm1 = [42,42,42,42]
447 ; CHECK-SSE2-NEXT: andps %xmm0, %xmm1
448 ; CHECK-SSE2-NEXT: andnps (%rsi), %xmm0
449 ; CHECK-SSE2-NEXT: orps %xmm1, %xmm0
450 ; CHECK-SSE2-NEXT: retq
452 ; CHECK-XOP-LABEL: out_constant_42_vary:
453 ; CHECK-XOP: # %bb.0:
454 ; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm0
455 ; CHECK-XOP-NEXT: vbroadcastss {{.*#+}} xmm1 = [42,42,42,42]
456 ; CHECK-XOP-NEXT: vpcmov %xmm0, (%rsi), %xmm1, %xmm0
457 ; CHECK-XOP-NEXT: retq
458 %x = load <4 x i32>, ptr%px, align 16
459 %y = load <4 x i32>, ptr%py, align 16
460 %mask = load <4 x i32>, ptr%pmask, align 16
461 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
462 %mx = and <4 x i32> %mask, <i32 42, i32 42, i32 42, i32 42>
463 %my = and <4 x i32> %notmask, %y
464 %r = or <4 x i32> %mx, %my
468 define <4 x i32> @in_constant_42_vary(ptr%px, ptr%py, ptr%pmask) {
469 ; CHECK-SSE1-LABEL: in_constant_42_vary:
470 ; CHECK-SSE1: # %bb.0:
471 ; CHECK-SSE1-NEXT: movq %rdi, %rax
472 ; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0
473 ; CHECK-SSE1-NEXT: movaps %xmm0, %xmm1
474 ; CHECK-SSE1-NEXT: andnps (%rdx), %xmm1
475 ; CHECK-SSE1-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
476 ; CHECK-SSE1-NEXT: orps %xmm1, %xmm0
477 ; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi)
478 ; CHECK-SSE1-NEXT: retq
480 ; CHECK-SSE2-LABEL: in_constant_42_vary:
481 ; CHECK-SSE2: # %bb.0:
482 ; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0
483 ; CHECK-SSE2-NEXT: movaps %xmm0, %xmm1
484 ; CHECK-SSE2-NEXT: andnps (%rsi), %xmm1
485 ; CHECK-SSE2-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
486 ; CHECK-SSE2-NEXT: orps %xmm1, %xmm0
487 ; CHECK-SSE2-NEXT: retq
489 ; CHECK-XOP-LABEL: in_constant_42_vary:
490 ; CHECK-XOP: # %bb.0:
491 ; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm0
492 ; CHECK-XOP-NEXT: vbroadcastss {{.*#+}} xmm1 = [42,42,42,42]
493 ; CHECK-XOP-NEXT: vpcmov %xmm0, (%rsi), %xmm1, %xmm0
494 ; CHECK-XOP-NEXT: retq
495 %x = load <4 x i32>, ptr%px, align 16
496 %y = load <4 x i32>, ptr%py, align 16
497 %mask = load <4 x i32>, ptr%pmask, align 16
498 %n0 = xor <4 x i32> <i32 42, i32 42, i32 42, i32 42>, %y ; %x
499 %n1 = and <4 x i32> %n0, %mask
500 %r = xor <4 x i32> %n1, %y
504 ; This is not a canonical form. Testing for completeness only.
505 define <4 x i32> @out_constant_42_vary_invmask(ptr%px, ptr%py, ptr%pmask) {
506 ; CHECK-SSE1-LABEL: out_constant_42_vary_invmask:
507 ; CHECK-SSE1: # %bb.0:
508 ; CHECK-SSE1-NEXT: movq %rdi, %rax
509 ; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0
510 ; CHECK-SSE1-NEXT: movaps %xmm0, %xmm1
511 ; CHECK-SSE1-NEXT: andnps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
512 ; CHECK-SSE1-NEXT: andps (%rdx), %xmm0
513 ; CHECK-SSE1-NEXT: orps %xmm1, %xmm0
514 ; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi)
515 ; CHECK-SSE1-NEXT: retq
517 ; CHECK-SSE2-LABEL: out_constant_42_vary_invmask:
518 ; CHECK-SSE2: # %bb.0:
519 ; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0
520 ; CHECK-SSE2-NEXT: movaps %xmm0, %xmm1
521 ; CHECK-SSE2-NEXT: andnps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
522 ; CHECK-SSE2-NEXT: andps (%rsi), %xmm0
523 ; CHECK-SSE2-NEXT: orps %xmm1, %xmm0
524 ; CHECK-SSE2-NEXT: retq
526 ; CHECK-XOP-LABEL: out_constant_42_vary_invmask:
527 ; CHECK-XOP: # %bb.0:
528 ; CHECK-XOP-NEXT: vmovdqa (%rsi), %xmm0
529 ; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm1
530 ; CHECK-XOP-NEXT: vpcmov %xmm1, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
531 ; CHECK-XOP-NEXT: retq
532 %x = load <4 x i32>, ptr%px, align 16
533 %y = load <4 x i32>, ptr%py, align 16
534 %mask = load <4 x i32>, ptr%pmask, align 16
535 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
536 %mx = and <4 x i32> %notmask, <i32 42, i32 42, i32 42, i32 42>
537 %my = and <4 x i32> %mask, %y
538 %r = or <4 x i32> %mx, %my
542 ; This is not a canonical form. Testing for completeness only.
543 define <4 x i32> @in_constant_42_vary_invmask(ptr%px, ptr%py, ptr%pmask) {
544 ; CHECK-SSE1-LABEL: in_constant_42_vary_invmask:
545 ; CHECK-SSE1: # %bb.0:
546 ; CHECK-SSE1-NEXT: movq %rdi, %rax
547 ; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0
548 ; CHECK-SSE1-NEXT: movaps (%rdx), %xmm1
549 ; CHECK-SSE1-NEXT: andps %xmm0, %xmm1
550 ; CHECK-SSE1-NEXT: andnps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
551 ; CHECK-SSE1-NEXT: orps %xmm1, %xmm0
552 ; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi)
553 ; CHECK-SSE1-NEXT: retq
555 ; CHECK-SSE2-LABEL: in_constant_42_vary_invmask:
556 ; CHECK-SSE2: # %bb.0:
557 ; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0
558 ; CHECK-SSE2-NEXT: movaps (%rsi), %xmm1
559 ; CHECK-SSE2-NEXT: andps %xmm0, %xmm1
560 ; CHECK-SSE2-NEXT: andnps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
561 ; CHECK-SSE2-NEXT: orps %xmm1, %xmm0
562 ; CHECK-SSE2-NEXT: retq
564 ; CHECK-XOP-LABEL: in_constant_42_vary_invmask:
565 ; CHECK-XOP: # %bb.0:
566 ; CHECK-XOP-NEXT: vmovdqa (%rsi), %xmm0
567 ; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm1
568 ; CHECK-XOP-NEXT: vpcmov %xmm1, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
569 ; CHECK-XOP-NEXT: retq
570 %x = load <4 x i32>, ptr%px, align 16
571 %y = load <4 x i32>, ptr%py, align 16
572 %mask = load <4 x i32>, ptr%pmask, align 16
573 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
574 %n0 = xor <4 x i32> <i32 42, i32 42, i32 42, i32 42>, %y ; %x
575 %n1 = and <4 x i32> %n0, %notmask
576 %r = xor <4 x i32> %n1, %y