1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse,-sse2 < %s | FileCheck %s --check-prefix=CHECK-SSE1
3 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse,+sse2 < %s | FileCheck %s --check-prefix=CHECK-SSE2
4 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+xop < %s | FileCheck %s --check-prefix=CHECK-XOP
6 ; ============================================================================ ;
7 ; Various cases with %x and/or %y being a constant
8 ; ============================================================================ ;
10 define <4 x i32> @out_constant_varx_mone(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
11 ; CHECK-SSE1-LABEL: out_constant_varx_mone:
12 ; CHECK-SSE1: # %bb.0:
13 ; CHECK-SSE1-NEXT: movq %rdi, %rax
14 ; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0
15 ; CHECK-SSE1-NEXT: movaps {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN]
16 ; CHECK-SSE1-NEXT: xorps %xmm0, %xmm1
17 ; CHECK-SSE1-NEXT: andps (%rsi), %xmm0
18 ; CHECK-SSE1-NEXT: orps %xmm1, %xmm0
19 ; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi)
20 ; CHECK-SSE1-NEXT: retq
22 ; CHECK-SSE2-LABEL: out_constant_varx_mone:
23 ; CHECK-SSE2: # %bb.0:
24 ; CHECK-SSE2-NEXT: movdqa (%rdx), %xmm0
25 ; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm1
26 ; CHECK-SSE2-NEXT: pxor %xmm0, %xmm1
27 ; CHECK-SSE2-NEXT: pand (%rdi), %xmm0
28 ; CHECK-SSE2-NEXT: por %xmm1, %xmm0
29 ; CHECK-SSE2-NEXT: retq
31 ; CHECK-XOP-LABEL: out_constant_varx_mone:
33 ; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm0
34 ; CHECK-XOP-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
35 ; CHECK-XOP-NEXT: vpxor %xmm1, %xmm0, %xmm1
36 ; CHECK-XOP-NEXT: vpand (%rdi), %xmm0, %xmm0
37 ; CHECK-XOP-NEXT: vpor %xmm1, %xmm0, %xmm0
38 ; CHECK-XOP-NEXT: retq
39 %x = load <4 x i32>, <4 x i32> *%px, align 16
40 %y = load <4 x i32>, <4 x i32> *%py, align 16
41 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16
42 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
43 %mx = and <4 x i32> %mask, %x
44 %my = and <4 x i32> %notmask, <i32 -1, i32 -1, i32 -1, i32 -1>
45 %r = or <4 x i32> %mx, %my
49 define <4 x i32> @in_constant_varx_mone(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
50 ; CHECK-SSE1-LABEL: in_constant_varx_mone:
51 ; CHECK-SSE1: # %bb.0:
52 ; CHECK-SSE1-NEXT: movq %rdi, %rax
53 ; CHECK-SSE1-NEXT: movaps (%rsi), %xmm0
54 ; CHECK-SSE1-NEXT: andnps (%rcx), %xmm0
55 ; CHECK-SSE1-NEXT: xorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
56 ; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi)
57 ; CHECK-SSE1-NEXT: retq
59 ; CHECK-SSE2-LABEL: in_constant_varx_mone:
60 ; CHECK-SSE2: # %bb.0:
61 ; CHECK-SSE2-NEXT: movdqa (%rdi), %xmm0
62 ; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm1
63 ; CHECK-SSE2-NEXT: pandn (%rdx), %xmm0
64 ; CHECK-SSE2-NEXT: pxor %xmm1, %xmm0
65 ; CHECK-SSE2-NEXT: retq
67 ; CHECK-XOP-LABEL: in_constant_varx_mone:
69 ; CHECK-XOP-NEXT: vmovdqa (%rdi), %xmm0
70 ; CHECK-XOP-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
71 ; CHECK-XOP-NEXT: vpandn (%rdx), %xmm0, %xmm0
72 ; CHECK-XOP-NEXT: vpxor %xmm1, %xmm0, %xmm0
73 ; CHECK-XOP-NEXT: retq
74 %x = load <4 x i32>, <4 x i32> *%px, align 16
75 %y = load <4 x i32>, <4 x i32> *%py, align 16
76 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16
77 %n0 = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> ; %x
78 %n1 = and <4 x i32> %n0, %mask
79 %r = xor <4 x i32> %n1, <i32 -1, i32 -1, i32 -1, i32 -1>
83 ; This is not a canonical form. Testing for completeness only.
84 define <4 x i32> @out_constant_varx_mone_invmask(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
85 ; CHECK-SSE1-LABEL: out_constant_varx_mone_invmask:
86 ; CHECK-SSE1: # %bb.0:
87 ; CHECK-SSE1-NEXT: movq %rdi, %rax
88 ; CHECK-SSE1-NEXT: movaps (%rsi), %xmm0
89 ; CHECK-SSE1-NEXT: orps (%rcx), %xmm0
90 ; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi)
91 ; CHECK-SSE1-NEXT: retq
93 ; CHECK-SSE2-LABEL: out_constant_varx_mone_invmask:
94 ; CHECK-SSE2: # %bb.0:
95 ; CHECK-SSE2-NEXT: movaps (%rdi), %xmm0
96 ; CHECK-SSE2-NEXT: orps (%rdx), %xmm0
97 ; CHECK-SSE2-NEXT: retq
99 ; CHECK-XOP-LABEL: out_constant_varx_mone_invmask:
100 ; CHECK-XOP: # %bb.0:
101 ; CHECK-XOP-NEXT: vmovaps (%rdi), %xmm0
102 ; CHECK-XOP-NEXT: vorps (%rdx), %xmm0, %xmm0
103 ; CHECK-XOP-NEXT: retq
104 %x = load <4 x i32>, <4 x i32> *%px, align 16
105 %y = load <4 x i32>, <4 x i32> *%py, align 16
106 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16
107 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
108 %mx = and <4 x i32> %notmask, %x
109 %my = and <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
110 %r = or <4 x i32> %mx, %my
114 ; This is not a canonical form. Testing for completeness only.
115 define <4 x i32> @in_constant_varx_mone_invmask(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
116 ; CHECK-SSE1-LABEL: in_constant_varx_mone_invmask:
117 ; CHECK-SSE1: # %bb.0:
118 ; CHECK-SSE1-NEXT: movq %rdi, %rax
119 ; CHECK-SSE1-NEXT: movaps (%rsi), %xmm0
120 ; CHECK-SSE1-NEXT: movaps {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN]
121 ; CHECK-SSE1-NEXT: movaps (%rcx), %xmm2
122 ; CHECK-SSE1-NEXT: xorps %xmm1, %xmm2
123 ; CHECK-SSE1-NEXT: andnps %xmm2, %xmm0
124 ; CHECK-SSE1-NEXT: xorps %xmm1, %xmm0
125 ; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi)
126 ; CHECK-SSE1-NEXT: retq
128 ; CHECK-SSE2-LABEL: in_constant_varx_mone_invmask:
129 ; CHECK-SSE2: # %bb.0:
130 ; CHECK-SSE2-NEXT: movdqa (%rdi), %xmm0
131 ; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm1
132 ; CHECK-SSE2-NEXT: movdqa (%rdx), %xmm2
133 ; CHECK-SSE2-NEXT: pxor %xmm1, %xmm2
134 ; CHECK-SSE2-NEXT: pandn %xmm2, %xmm0
135 ; CHECK-SSE2-NEXT: pxor %xmm1, %xmm0
136 ; CHECK-SSE2-NEXT: retq
138 ; CHECK-XOP-LABEL: in_constant_varx_mone_invmask:
139 ; CHECK-XOP: # %bb.0:
140 ; CHECK-XOP-NEXT: vmovdqa (%rdi), %xmm0
141 ; CHECK-XOP-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
142 ; CHECK-XOP-NEXT: vpxor (%rdx), %xmm1, %xmm2
143 ; CHECK-XOP-NEXT: vpandn %xmm2, %xmm0, %xmm0
144 ; CHECK-XOP-NEXT: vpxor %xmm1, %xmm0, %xmm0
145 ; CHECK-XOP-NEXT: retq
146 %x = load <4 x i32>, <4 x i32> *%px, align 16
147 %y = load <4 x i32>, <4 x i32> *%py, align 16
148 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16
149 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
150 %n0 = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> ; %x
151 %n1 = and <4 x i32> %n0, %notmask
152 %r = xor <4 x i32> %n1, <i32 -1, i32 -1, i32 -1, i32 -1>
156 define <4 x i32> @out_constant_varx_42(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
157 ; CHECK-SSE1-LABEL: out_constant_varx_42:
158 ; CHECK-SSE1: # %bb.0:
159 ; CHECK-SSE1-NEXT: movq %rdi, %rax
160 ; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0
161 ; CHECK-SSE1-NEXT: movaps (%rsi), %xmm1
162 ; CHECK-SSE1-NEXT: andps %xmm0, %xmm1
163 ; CHECK-SSE1-NEXT: andnps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
164 ; CHECK-SSE1-NEXT: orps %xmm1, %xmm0
165 ; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi)
166 ; CHECK-SSE1-NEXT: retq
168 ; CHECK-SSE2-LABEL: out_constant_varx_42:
169 ; CHECK-SSE2: # %bb.0:
170 ; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0
171 ; CHECK-SSE2-NEXT: movaps (%rdi), %xmm1
172 ; CHECK-SSE2-NEXT: andps %xmm0, %xmm1
173 ; CHECK-SSE2-NEXT: andnps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
174 ; CHECK-SSE2-NEXT: orps %xmm1, %xmm0
175 ; CHECK-SSE2-NEXT: retq
177 ; CHECK-XOP-LABEL: out_constant_varx_42:
178 ; CHECK-XOP: # %bb.0:
179 ; CHECK-XOP-NEXT: vmovdqa (%rdi), %xmm0
180 ; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm1
181 ; CHECK-XOP-NEXT: vpcmov %xmm1, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
182 ; CHECK-XOP-NEXT: retq
183 %x = load <4 x i32>, <4 x i32> *%px, align 16
184 %y = load <4 x i32>, <4 x i32> *%py, align 16
185 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16
186 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
187 %mx = and <4 x i32> %mask, %x
188 %my = and <4 x i32> %notmask, <i32 42, i32 42, i32 42, i32 42>
189 %r = or <4 x i32> %mx, %my
193 define <4 x i32> @in_constant_varx_42(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
194 ; CHECK-SSE1-LABEL: in_constant_varx_42:
195 ; CHECK-SSE1: # %bb.0:
196 ; CHECK-SSE1-NEXT: movq %rdi, %rax
197 ; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0
198 ; CHECK-SSE1-NEXT: movaps (%rsi), %xmm1
199 ; CHECK-SSE1-NEXT: andps %xmm0, %xmm1
200 ; CHECK-SSE1-NEXT: andnps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
201 ; CHECK-SSE1-NEXT: orps %xmm1, %xmm0
202 ; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi)
203 ; CHECK-SSE1-NEXT: retq
205 ; CHECK-SSE2-LABEL: in_constant_varx_42:
206 ; CHECK-SSE2: # %bb.0:
207 ; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0
208 ; CHECK-SSE2-NEXT: movaps (%rdi), %xmm1
209 ; CHECK-SSE2-NEXT: andps %xmm0, %xmm1
210 ; CHECK-SSE2-NEXT: andnps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
211 ; CHECK-SSE2-NEXT: orps %xmm1, %xmm0
212 ; CHECK-SSE2-NEXT: retq
214 ; CHECK-XOP-LABEL: in_constant_varx_42:
215 ; CHECK-XOP: # %bb.0:
216 ; CHECK-XOP-NEXT: vmovdqa (%rdi), %xmm0
217 ; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm1
218 ; CHECK-XOP-NEXT: vpcmov %xmm1, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
219 ; CHECK-XOP-NEXT: retq
220 %x = load <4 x i32>, <4 x i32> *%px, align 16
221 %y = load <4 x i32>, <4 x i32> *%py, align 16
222 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16
223 %n0 = xor <4 x i32> %x, <i32 42, i32 42, i32 42, i32 42> ; %x
224 %n1 = and <4 x i32> %n0, %mask
225 %r = xor <4 x i32> %n1, <i32 42, i32 42, i32 42, i32 42>
229 ; This is not a canonical form. Testing for completeness only.
230 define <4 x i32> @out_constant_varx_42_invmask(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
231 ; CHECK-SSE1-LABEL: out_constant_varx_42_invmask:
232 ; CHECK-SSE1: # %bb.0:
233 ; CHECK-SSE1-NEXT: movq %rdi, %rax
234 ; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0
235 ; CHECK-SSE1-NEXT: movaps %xmm0, %xmm1
236 ; CHECK-SSE1-NEXT: andnps (%rsi), %xmm1
237 ; CHECK-SSE1-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
238 ; CHECK-SSE1-NEXT: orps %xmm1, %xmm0
239 ; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi)
240 ; CHECK-SSE1-NEXT: retq
242 ; CHECK-SSE2-LABEL: out_constant_varx_42_invmask:
243 ; CHECK-SSE2: # %bb.0:
244 ; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0
245 ; CHECK-SSE2-NEXT: movaps %xmm0, %xmm1
246 ; CHECK-SSE2-NEXT: andnps (%rdi), %xmm1
247 ; CHECK-SSE2-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
248 ; CHECK-SSE2-NEXT: orps %xmm1, %xmm0
249 ; CHECK-SSE2-NEXT: retq
251 ; CHECK-XOP-LABEL: out_constant_varx_42_invmask:
252 ; CHECK-XOP: # %bb.0:
253 ; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm0
254 ; CHECK-XOP-NEXT: vmovdqa {{.*#+}} xmm1 = [42,42,42,42]
255 ; CHECK-XOP-NEXT: vpcmov %xmm0, (%rdi), %xmm1, %xmm0
256 ; CHECK-XOP-NEXT: retq
257 %x = load <4 x i32>, <4 x i32> *%px, align 16
258 %y = load <4 x i32>, <4 x i32> *%py, align 16
259 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16
260 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
261 %mx = and <4 x i32> %notmask, %x
262 %my = and <4 x i32> %mask, <i32 42, i32 42, i32 42, i32 42>
263 %r = or <4 x i32> %mx, %my
267 ; This is not a canonical form. Testing for completeness only.
268 define <4 x i32> @in_constant_varx_42_invmask(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
269 ; CHECK-SSE1-LABEL: in_constant_varx_42_invmask:
270 ; CHECK-SSE1: # %bb.0:
271 ; CHECK-SSE1-NEXT: movq %rdi, %rax
272 ; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0
273 ; CHECK-SSE1-NEXT: movaps %xmm0, %xmm1
274 ; CHECK-SSE1-NEXT: andnps (%rsi), %xmm1
275 ; CHECK-SSE1-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
276 ; CHECK-SSE1-NEXT: orps %xmm1, %xmm0
277 ; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi)
278 ; CHECK-SSE1-NEXT: retq
280 ; CHECK-SSE2-LABEL: in_constant_varx_42_invmask:
281 ; CHECK-SSE2: # %bb.0:
282 ; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0
283 ; CHECK-SSE2-NEXT: movaps %xmm0, %xmm1
284 ; CHECK-SSE2-NEXT: andnps (%rdi), %xmm1
285 ; CHECK-SSE2-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
286 ; CHECK-SSE2-NEXT: orps %xmm1, %xmm0
287 ; CHECK-SSE2-NEXT: retq
289 ; CHECK-XOP-LABEL: in_constant_varx_42_invmask:
290 ; CHECK-XOP: # %bb.0:
291 ; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm0
292 ; CHECK-XOP-NEXT: vmovdqa {{.*#+}} xmm1 = [42,42,42,42]
293 ; CHECK-XOP-NEXT: vpcmov %xmm0, (%rdi), %xmm1, %xmm0
294 ; CHECK-XOP-NEXT: retq
295 %x = load <4 x i32>, <4 x i32> *%px, align 16
296 %y = load <4 x i32>, <4 x i32> *%py, align 16
297 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16
298 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
299 %n0 = xor <4 x i32> %x, <i32 42, i32 42, i32 42, i32 42> ; %x
300 %n1 = and <4 x i32> %n0, %notmask
301 %r = xor <4 x i32> %n1, <i32 42, i32 42, i32 42, i32 42>
305 define <4 x i32> @out_constant_mone_vary(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
306 ; CHECK-SSE1-LABEL: out_constant_mone_vary:
307 ; CHECK-SSE1: # %bb.0:
308 ; CHECK-SSE1-NEXT: movq %rdi, %rax
309 ; CHECK-SSE1-NEXT: movaps (%rdx), %xmm0
310 ; CHECK-SSE1-NEXT: orps (%rcx), %xmm0
311 ; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi)
312 ; CHECK-SSE1-NEXT: retq
314 ; CHECK-SSE2-LABEL: out_constant_mone_vary:
315 ; CHECK-SSE2: # %bb.0:
316 ; CHECK-SSE2-NEXT: movaps (%rsi), %xmm0
317 ; CHECK-SSE2-NEXT: orps (%rdx), %xmm0
318 ; CHECK-SSE2-NEXT: retq
320 ; CHECK-XOP-LABEL: out_constant_mone_vary:
321 ; CHECK-XOP: # %bb.0:
322 ; CHECK-XOP-NEXT: vmovaps (%rsi), %xmm0
323 ; CHECK-XOP-NEXT: vorps (%rdx), %xmm0, %xmm0
324 ; CHECK-XOP-NEXT: retq
325 %x = load <4 x i32>, <4 x i32> *%px, align 16
326 %y = load <4 x i32>, <4 x i32> *%py, align 16
327 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16
328 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
329 %mx = and <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
330 %my = and <4 x i32> %notmask, %y
331 %r = or <4 x i32> %mx, %my
335 define <4 x i32> @in_constant_mone_vary(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
336 ; CHECK-SSE1-LABEL: in_constant_mone_vary:
337 ; CHECK-SSE1: # %bb.0:
338 ; CHECK-SSE1-NEXT: movq %rdi, %rax
339 ; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0
340 ; CHECK-SSE1-NEXT: movaps %xmm0, %xmm1
341 ; CHECK-SSE1-NEXT: andnps (%rdx), %xmm1
342 ; CHECK-SSE1-NEXT: orps %xmm0, %xmm1
343 ; CHECK-SSE1-NEXT: movaps %xmm1, (%rdi)
344 ; CHECK-SSE1-NEXT: retq
346 ; CHECK-SSE2-LABEL: in_constant_mone_vary:
347 ; CHECK-SSE2: # %bb.0:
348 ; CHECK-SSE2-NEXT: movaps (%rsi), %xmm0
349 ; CHECK-SSE2-NEXT: orps (%rdx), %xmm0
350 ; CHECK-SSE2-NEXT: retq
352 ; CHECK-XOP-LABEL: in_constant_mone_vary:
353 ; CHECK-XOP: # %bb.0:
354 ; CHECK-XOP-NEXT: vmovaps (%rsi), %xmm0
355 ; CHECK-XOP-NEXT: vorps (%rdx), %xmm0, %xmm0
356 ; CHECK-XOP-NEXT: retq
357 %x = load <4 x i32>, <4 x i32> *%px, align 16
358 %y = load <4 x i32>, <4 x i32> *%py, align 16
359 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16
360 %n0 = xor <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, %y ; %x
361 %n1 = and <4 x i32> %n0, %mask
362 %r = xor <4 x i32> %n1, %y
366 ; This is not a canonical form. Testing for completeness only.
367 define <4 x i32> @out_constant_mone_vary_invmask(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
368 ; CHECK-SSE1-LABEL: out_constant_mone_vary_invmask:
369 ; CHECK-SSE1: # %bb.0:
370 ; CHECK-SSE1-NEXT: movq %rdi, %rax
371 ; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0
372 ; CHECK-SSE1-NEXT: movaps {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN]
373 ; CHECK-SSE1-NEXT: xorps %xmm0, %xmm1
374 ; CHECK-SSE1-NEXT: andps (%rdx), %xmm0
375 ; CHECK-SSE1-NEXT: orps %xmm1, %xmm0
376 ; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi)
377 ; CHECK-SSE1-NEXT: retq
379 ; CHECK-SSE2-LABEL: out_constant_mone_vary_invmask:
380 ; CHECK-SSE2: # %bb.0:
381 ; CHECK-SSE2-NEXT: movdqa (%rdx), %xmm0
382 ; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm1
383 ; CHECK-SSE2-NEXT: pxor %xmm0, %xmm1
384 ; CHECK-SSE2-NEXT: pand (%rsi), %xmm0
385 ; CHECK-SSE2-NEXT: por %xmm1, %xmm0
386 ; CHECK-SSE2-NEXT: retq
388 ; CHECK-XOP-LABEL: out_constant_mone_vary_invmask:
389 ; CHECK-XOP: # %bb.0:
390 ; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm0
391 ; CHECK-XOP-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
392 ; CHECK-XOP-NEXT: vpxor %xmm1, %xmm0, %xmm1
393 ; CHECK-XOP-NEXT: vpand (%rsi), %xmm0, %xmm0
394 ; CHECK-XOP-NEXT: vpor %xmm0, %xmm1, %xmm0
395 ; CHECK-XOP-NEXT: retq
396 %x = load <4 x i32>, <4 x i32> *%px, align 16
397 %y = load <4 x i32>, <4 x i32> *%py, align 16
398 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16
399 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
400 %mx = and <4 x i32> %notmask, <i32 -1, i32 -1, i32 -1, i32 -1>
401 %my = and <4 x i32> %mask, %y
402 %r = or <4 x i32> %mx, %my
406 ; This is not a canonical form. Testing for completeness only.
407 define <4 x i32> @in_constant_mone_vary_invmask(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
408 ; CHECK-SSE1-LABEL: in_constant_mone_vary_invmask:
409 ; CHECK-SSE1: # %bb.0:
410 ; CHECK-SSE1-NEXT: movq %rdi, %rax
411 ; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0
412 ; CHECK-SSE1-NEXT: movaps {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN]
413 ; CHECK-SSE1-NEXT: xorps %xmm0, %xmm1
414 ; CHECK-SSE1-NEXT: andps (%rdx), %xmm0
415 ; CHECK-SSE1-NEXT: orps %xmm1, %xmm0
416 ; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi)
417 ; CHECK-SSE1-NEXT: retq
419 ; CHECK-SSE2-LABEL: in_constant_mone_vary_invmask:
420 ; CHECK-SSE2: # %bb.0:
421 ; CHECK-SSE2-NEXT: movdqa (%rdx), %xmm0
422 ; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm1
423 ; CHECK-SSE2-NEXT: pxor %xmm0, %xmm1
424 ; CHECK-SSE2-NEXT: pand (%rsi), %xmm0
425 ; CHECK-SSE2-NEXT: por %xmm1, %xmm0
426 ; CHECK-SSE2-NEXT: retq
428 ; CHECK-XOP-LABEL: in_constant_mone_vary_invmask:
429 ; CHECK-XOP: # %bb.0:
430 ; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm0
431 ; CHECK-XOP-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
432 ; CHECK-XOP-NEXT: vpxor %xmm1, %xmm0, %xmm1
433 ; CHECK-XOP-NEXT: vpand (%rsi), %xmm0, %xmm0
434 ; CHECK-XOP-NEXT: vpor %xmm0, %xmm1, %xmm0
435 ; CHECK-XOP-NEXT: retq
436 %x = load <4 x i32>, <4 x i32> *%px, align 16
437 %y = load <4 x i32>, <4 x i32> *%py, align 16
438 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16
439 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
440 %n0 = xor <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, %y ; %x
441 %n1 = and <4 x i32> %n0, %notmask
442 %r = xor <4 x i32> %n1, %y
446 define <4 x i32> @out_constant_42_vary(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
447 ; CHECK-SSE1-LABEL: out_constant_42_vary:
448 ; CHECK-SSE1: # %bb.0:
449 ; CHECK-SSE1-NEXT: movq %rdi, %rax
450 ; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0
451 ; CHECK-SSE1-NEXT: movaps {{.*#+}} xmm1 = [5.88545355E-44,5.88545355E-44,5.88545355E-44,5.88545355E-44]
452 ; CHECK-SSE1-NEXT: andps %xmm0, %xmm1
453 ; CHECK-SSE1-NEXT: andnps (%rdx), %xmm0
454 ; CHECK-SSE1-NEXT: orps %xmm1, %xmm0
455 ; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi)
456 ; CHECK-SSE1-NEXT: retq
458 ; CHECK-SSE2-LABEL: out_constant_42_vary:
459 ; CHECK-SSE2: # %bb.0:
460 ; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0
461 ; CHECK-SSE2-NEXT: movaps {{.*#+}} xmm1 = [42,42,42,42]
462 ; CHECK-SSE2-NEXT: andps %xmm0, %xmm1
463 ; CHECK-SSE2-NEXT: andnps (%rsi), %xmm0
464 ; CHECK-SSE2-NEXT: orps %xmm1, %xmm0
465 ; CHECK-SSE2-NEXT: retq
467 ; CHECK-XOP-LABEL: out_constant_42_vary:
468 ; CHECK-XOP: # %bb.0:
469 ; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm0
470 ; CHECK-XOP-NEXT: vmovdqa {{.*#+}} xmm1 = [42,42,42,42]
471 ; CHECK-XOP-NEXT: vpcmov %xmm0, (%rsi), %xmm1, %xmm0
472 ; CHECK-XOP-NEXT: retq
473 %x = load <4 x i32>, <4 x i32> *%px, align 16
474 %y = load <4 x i32>, <4 x i32> *%py, align 16
475 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16
476 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
477 %mx = and <4 x i32> %mask, <i32 42, i32 42, i32 42, i32 42>
478 %my = and <4 x i32> %notmask, %y
479 %r = or <4 x i32> %mx, %my
483 define <4 x i32> @in_constant_42_vary(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
484 ; CHECK-SSE1-LABEL: in_constant_42_vary:
485 ; CHECK-SSE1: # %bb.0:
486 ; CHECK-SSE1-NEXT: movq %rdi, %rax
487 ; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0
488 ; CHECK-SSE1-NEXT: movaps %xmm0, %xmm1
489 ; CHECK-SSE1-NEXT: andnps (%rdx), %xmm1
490 ; CHECK-SSE1-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
491 ; CHECK-SSE1-NEXT: orps %xmm1, %xmm0
492 ; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi)
493 ; CHECK-SSE1-NEXT: retq
495 ; CHECK-SSE2-LABEL: in_constant_42_vary:
496 ; CHECK-SSE2: # %bb.0:
497 ; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0
498 ; CHECK-SSE2-NEXT: movaps %xmm0, %xmm1
499 ; CHECK-SSE2-NEXT: andnps (%rsi), %xmm1
500 ; CHECK-SSE2-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
501 ; CHECK-SSE2-NEXT: orps %xmm1, %xmm0
502 ; CHECK-SSE2-NEXT: retq
504 ; CHECK-XOP-LABEL: in_constant_42_vary:
505 ; CHECK-XOP: # %bb.0:
506 ; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm0
507 ; CHECK-XOP-NEXT: vmovdqa {{.*#+}} xmm1 = [42,42,42,42]
508 ; CHECK-XOP-NEXT: vpcmov %xmm0, (%rsi), %xmm1, %xmm0
509 ; CHECK-XOP-NEXT: retq
510 %x = load <4 x i32>, <4 x i32> *%px, align 16
511 %y = load <4 x i32>, <4 x i32> *%py, align 16
512 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16
513 %n0 = xor <4 x i32> <i32 42, i32 42, i32 42, i32 42>, %y ; %x
514 %n1 = and <4 x i32> %n0, %mask
515 %r = xor <4 x i32> %n1, %y
519 ; This is not a canonical form. Testing for completeness only.
520 define <4 x i32> @out_constant_42_vary_invmask(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
521 ; CHECK-SSE1-LABEL: out_constant_42_vary_invmask:
522 ; CHECK-SSE1: # %bb.0:
523 ; CHECK-SSE1-NEXT: movq %rdi, %rax
524 ; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0
525 ; CHECK-SSE1-NEXT: movaps %xmm0, %xmm1
526 ; CHECK-SSE1-NEXT: andnps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
527 ; CHECK-SSE1-NEXT: andps (%rdx), %xmm0
528 ; CHECK-SSE1-NEXT: orps %xmm1, %xmm0
529 ; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi)
530 ; CHECK-SSE1-NEXT: retq
532 ; CHECK-SSE2-LABEL: out_constant_42_vary_invmask:
533 ; CHECK-SSE2: # %bb.0:
534 ; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0
535 ; CHECK-SSE2-NEXT: movaps %xmm0, %xmm1
536 ; CHECK-SSE2-NEXT: andnps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
537 ; CHECK-SSE2-NEXT: andps (%rsi), %xmm0
538 ; CHECK-SSE2-NEXT: orps %xmm1, %xmm0
539 ; CHECK-SSE2-NEXT: retq
541 ; CHECK-XOP-LABEL: out_constant_42_vary_invmask:
542 ; CHECK-XOP: # %bb.0:
543 ; CHECK-XOP-NEXT: vmovdqa (%rsi), %xmm0
544 ; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm1
545 ; CHECK-XOP-NEXT: vpcmov %xmm1, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
546 ; CHECK-XOP-NEXT: retq
547 %x = load <4 x i32>, <4 x i32> *%px, align 16
548 %y = load <4 x i32>, <4 x i32> *%py, align 16
549 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16
550 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
551 %mx = and <4 x i32> %notmask, <i32 42, i32 42, i32 42, i32 42>
552 %my = and <4 x i32> %mask, %y
553 %r = or <4 x i32> %mx, %my
557 ; This is not a canonical form. Testing for completeness only.
558 define <4 x i32> @in_constant_42_vary_invmask(<4 x i32> *%px, <4 x i32> *%py, <4 x i32> *%pmask) {
559 ; CHECK-SSE1-LABEL: in_constant_42_vary_invmask:
560 ; CHECK-SSE1: # %bb.0:
561 ; CHECK-SSE1-NEXT: movq %rdi, %rax
562 ; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0
563 ; CHECK-SSE1-NEXT: movaps (%rdx), %xmm1
564 ; CHECK-SSE1-NEXT: andps %xmm0, %xmm1
565 ; CHECK-SSE1-NEXT: andnps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
566 ; CHECK-SSE1-NEXT: orps %xmm1, %xmm0
567 ; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi)
568 ; CHECK-SSE1-NEXT: retq
570 ; CHECK-SSE2-LABEL: in_constant_42_vary_invmask:
571 ; CHECK-SSE2: # %bb.0:
572 ; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0
573 ; CHECK-SSE2-NEXT: movaps (%rsi), %xmm1
574 ; CHECK-SSE2-NEXT: andps %xmm0, %xmm1
575 ; CHECK-SSE2-NEXT: andnps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
576 ; CHECK-SSE2-NEXT: orps %xmm1, %xmm0
577 ; CHECK-SSE2-NEXT: retq
579 ; CHECK-XOP-LABEL: in_constant_42_vary_invmask:
580 ; CHECK-XOP: # %bb.0:
581 ; CHECK-XOP-NEXT: vmovdqa (%rsi), %xmm0
582 ; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm1
583 ; CHECK-XOP-NEXT: vpcmov %xmm1, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
584 ; CHECK-XOP-NEXT: retq
585 %x = load <4 x i32>, <4 x i32> *%px, align 16
586 %y = load <4 x i32>, <4 x i32> *%py, align 16
587 %mask = load <4 x i32>, <4 x i32> *%pmask, align 16
588 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
589 %n0 = xor <4 x i32> <i32 42, i32 42, i32 42, i32 42>, %y ; %x
590 %n1 = and <4 x i32> %n0, %notmask
591 %r = xor <4 x i32> %n1, %y