1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE,SSE42
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX
7 declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>)
8 declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>)
9 declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>)
11 ; Use widest possible vector for movmsk comparisons (PR37087)
13 define i1 @movmskps_noneof_bitcast_v2f64(<2 x double> %a0) {
14 ; SSE-LABEL: movmskps_noneof_bitcast_v2f64:
16 ; SSE-NEXT: xorpd %xmm1, %xmm1
17 ; SSE-NEXT: cmpeqpd %xmm0, %xmm1
18 ; SSE-NEXT: movmskpd %xmm1, %eax
19 ; SSE-NEXT: testl %eax, %eax
23 ; AVX-LABEL: movmskps_noneof_bitcast_v2f64:
25 ; AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1
26 ; AVX-NEXT: vcmpeqpd %xmm0, %xmm1, %xmm0
27 ; AVX-NEXT: vmovmskpd %xmm0, %eax
28 ; AVX-NEXT: testl %eax, %eax
31 %1 = fcmp oeq <2 x double> zeroinitializer, %a0
32 %2 = sext <2 x i1> %1 to <2 x i64>
33 %3 = bitcast <2 x i64> %2 to <4 x float>
34 %4 = tail call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %3)
35 %5 = icmp eq i32 %4, 0
39 define i1 @movmskps_allof_bitcast_v2f64(<2 x double> %a0) {
40 ; SSE-LABEL: movmskps_allof_bitcast_v2f64:
42 ; SSE-NEXT: xorpd %xmm1, %xmm1
43 ; SSE-NEXT: cmpeqpd %xmm0, %xmm1
44 ; SSE-NEXT: movmskpd %xmm1, %eax
45 ; SSE-NEXT: cmpl $3, %eax
49 ; AVX-LABEL: movmskps_allof_bitcast_v2f64:
51 ; AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1
52 ; AVX-NEXT: vcmpeqpd %xmm0, %xmm1, %xmm0
53 ; AVX-NEXT: vmovmskpd %xmm0, %eax
54 ; AVX-NEXT: cmpl $3, %eax
57 %1 = fcmp oeq <2 x double> zeroinitializer, %a0
58 %2 = sext <2 x i1> %1 to <2 x i64>
59 %3 = bitcast <2 x i64> %2 to <4 x float>
60 %4 = tail call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %3)
61 %5 = icmp eq i32 %4, 15
65 define i1 @pmovmskb_noneof_bitcast_v2i64(<2 x i64> %a0) {
66 ; SSE2-LABEL: pmovmskb_noneof_bitcast_v2i64:
68 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
69 ; SSE2-NEXT: movmskps %xmm0, %eax
70 ; SSE2-NEXT: testl %eax, %eax
74 ; SSE42-LABEL: pmovmskb_noneof_bitcast_v2i64:
76 ; SSE42-NEXT: movmskpd %xmm0, %eax
77 ; SSE42-NEXT: testl %eax, %eax
78 ; SSE42-NEXT: sete %al
81 ; AVX-LABEL: pmovmskb_noneof_bitcast_v2i64:
83 ; AVX-NEXT: vmovmskpd %xmm0, %eax
84 ; AVX-NEXT: testl %eax, %eax
87 %1 = icmp sgt <2 x i64> zeroinitializer, %a0
88 %2 = sext <2 x i1> %1 to <2 x i64>
89 %3 = bitcast <2 x i64> %2 to <16 x i8>
90 %4 = tail call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %3)
91 %5 = icmp eq i32 %4, 0
95 define i1 @pmovmskb_allof_bitcast_v2i64(<2 x i64> %a0) {
96 ; SSE2-LABEL: pmovmskb_allof_bitcast_v2i64:
98 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
99 ; SSE2-NEXT: movmskps %xmm0, %eax
100 ; SSE2-NEXT: cmpl $15, %eax
101 ; SSE2-NEXT: sete %al
104 ; SSE42-LABEL: pmovmskb_allof_bitcast_v2i64:
106 ; SSE42-NEXT: movmskpd %xmm0, %eax
107 ; SSE42-NEXT: cmpl $3, %eax
108 ; SSE42-NEXT: sete %al
111 ; AVX-LABEL: pmovmskb_allof_bitcast_v2i64:
113 ; AVX-NEXT: vmovmskpd %xmm0, %eax
114 ; AVX-NEXT: cmpl $3, %eax
117 %1 = icmp sgt <2 x i64> zeroinitializer, %a0
118 %2 = sext <2 x i1> %1 to <2 x i64>
119 %3 = bitcast <2 x i64> %2 to <16 x i8>
120 %4 = tail call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %3)
121 %5 = icmp eq i32 %4, 65535
125 define i1 @pmovmskb_noneof_bitcast_v4f32(<4 x float> %a0) {
126 ; SSE-LABEL: pmovmskb_noneof_bitcast_v4f32:
128 ; SSE-NEXT: xorps %xmm1, %xmm1
129 ; SSE-NEXT: cmpeqps %xmm0, %xmm1
130 ; SSE-NEXT: movmskps %xmm1, %eax
131 ; SSE-NEXT: testl %eax, %eax
135 ; AVX-LABEL: pmovmskb_noneof_bitcast_v4f32:
137 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
138 ; AVX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0
139 ; AVX-NEXT: vmovmskps %xmm0, %eax
140 ; AVX-NEXT: testl %eax, %eax
143 %1 = fcmp oeq <4 x float> %a0, zeroinitializer
144 %2 = sext <4 x i1> %1 to <4 x i32>
145 %3 = bitcast <4 x i32> %2 to <16 x i8>
146 %4 = tail call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %3)
147 %5 = icmp eq i32 %4, 0
151 define i1 @pmovmskb_allof_bitcast_v4f32(<4 x float> %a0) {
152 ; SSE-LABEL: pmovmskb_allof_bitcast_v4f32:
154 ; SSE-NEXT: xorps %xmm1, %xmm1
155 ; SSE-NEXT: cmpeqps %xmm0, %xmm1
156 ; SSE-NEXT: movmskps %xmm1, %eax
157 ; SSE-NEXT: cmpl $15, %eax
161 ; AVX-LABEL: pmovmskb_allof_bitcast_v4f32:
163 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
164 ; AVX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0
165 ; AVX-NEXT: vmovmskps %xmm0, %eax
166 ; AVX-NEXT: cmpl $15, %eax
169 %1 = fcmp oeq <4 x float> %a0, zeroinitializer
170 %2 = sext <4 x i1> %1 to <4 x i32>
171 %3 = bitcast <4 x i32> %2 to <16 x i8>
172 %4 = tail call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %3)
173 %5 = icmp eq i32 %4, 65535
177 ; MOVMSK(ICMP_SGT(X,-1)) -> NOT(MOVMSK(X)))
178 define i1 @movmskps_allof_v4i32_positive(<4 x i32> %a0) {
179 ; SSE-LABEL: movmskps_allof_v4i32_positive:
181 ; SSE-NEXT: movmskps %xmm0, %eax
182 ; SSE-NEXT: xorl $15, %eax
183 ; SSE-NEXT: cmpl $15, %eax
187 ; AVX-LABEL: movmskps_allof_v4i32_positive:
189 ; AVX-NEXT: vmovmskps %xmm0, %eax
190 ; AVX-NEXT: xorl $15, %eax
191 ; AVX-NEXT: cmpl $15, %eax
194 %1 = icmp sgt <4 x i32> %a0, <i32 -1, i32 -1, i32 -1, i32 -1>
195 %2 = sext <4 x i1> %1 to <4 x i32>
196 %3 = bitcast <4 x i32> %2 to <4 x float>
197 %4 = tail call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %3)
198 %5 = icmp eq i32 %4, 15
202 define i1 @pmovmskb_noneof_v16i8_positive(<16 x i8> %a0) {
203 ; SSE-LABEL: pmovmskb_noneof_v16i8_positive:
205 ; SSE-NEXT: pmovmskb %xmm0, %eax
206 ; SSE-NEXT: xorl $65535, %eax # imm = 0xFFFF
210 ; AVX-LABEL: pmovmskb_noneof_v16i8_positive:
212 ; AVX-NEXT: vpmovmskb %xmm0, %eax
213 ; AVX-NEXT: xorl $65535, %eax # imm = 0xFFFF
216 %1 = icmp sgt <16 x i8> %a0, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
217 %2 = sext <16 x i1> %1 to <16 x i8>
218 %3 = tail call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %2)
219 %4 = icmp eq i32 %3, 0
223 ; AND(MOVMSK(X),MOVMSK(Y)) -> MOVMSK(AND(X,Y))
224 ; XOR(MOVMSK(X),MOVMSK(Y)) -> MOVMSK(XOR(X,Y))
225 ; OR(MOVMSK(X),MOVMSK(Y)) -> MOVMSK(OR(X,Y))
226 ; if the elements are the same width.
228 define i32 @and_movmskpd_movmskpd(<2 x double> %a0, <2 x i64> %a1) {
229 ; SSE-LABEL: and_movmskpd_movmskpd:
231 ; SSE-NEXT: xorpd %xmm2, %xmm2
232 ; SSE-NEXT: cmpeqpd %xmm0, %xmm2
233 ; SSE-NEXT: andpd %xmm1, %xmm2
234 ; SSE-NEXT: movmskpd %xmm2, %eax
237 ; AVX-LABEL: and_movmskpd_movmskpd:
239 ; AVX-NEXT: vxorpd %xmm2, %xmm2, %xmm2
240 ; AVX-NEXT: vcmpeqpd %xmm0, %xmm2, %xmm0
241 ; AVX-NEXT: vandpd %xmm1, %xmm0, %xmm0
242 ; AVX-NEXT: vmovmskpd %xmm0, %eax
244 %1 = fcmp oeq <2 x double> zeroinitializer, %a0
245 %2 = sext <2 x i1> %1 to <2 x i64>
246 %3 = bitcast <2 x i64> %2 to <2 x double>
247 %4 = tail call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %3)
248 %5 = icmp sgt <2 x i64> zeroinitializer, %a1
249 %6 = bitcast <2 x i1> %5 to i2
250 %7 = zext i2 %6 to i32
255 define i32 @xor_movmskps_movmskps(<4 x float> %a0, <4 x i32> %a1) {
256 ; SSE-LABEL: xor_movmskps_movmskps:
258 ; SSE-NEXT: xorps %xmm2, %xmm2
259 ; SSE-NEXT: cmpeqps %xmm0, %xmm2
260 ; SSE-NEXT: xorps %xmm1, %xmm2
261 ; SSE-NEXT: movmskps %xmm2, %eax
264 ; AVX-LABEL: xor_movmskps_movmskps:
266 ; AVX-NEXT: vxorps %xmm2, %xmm2, %xmm2
267 ; AVX-NEXT: vcmpeqps %xmm0, %xmm2, %xmm0
268 ; AVX-NEXT: vxorps %xmm1, %xmm0, %xmm0
269 ; AVX-NEXT: vmovmskps %xmm0, %eax
271 %1 = fcmp oeq <4 x float> zeroinitializer, %a0
272 %2 = sext <4 x i1> %1 to <4 x i32>
273 %3 = bitcast <4 x i32> %2 to <4 x float>
274 %4 = tail call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %3)
275 %5 = ashr <4 x i32> %a1, <i32 31, i32 31, i32 31, i32 31>
276 %6 = bitcast <4 x i32> %5 to <4 x float>
277 %7 = tail call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %6)
282 define i32 @or_pmovmskb_pmovmskb(<16 x i8> %a0, <8 x i16> %a1) {
283 ; SSE-LABEL: or_pmovmskb_pmovmskb:
285 ; SSE-NEXT: pxor %xmm2, %xmm2
286 ; SSE-NEXT: pcmpeqb %xmm0, %xmm2
287 ; SSE-NEXT: psraw $15, %xmm1
288 ; SSE-NEXT: por %xmm2, %xmm1
289 ; SSE-NEXT: pmovmskb %xmm1, %eax
292 ; AVX-LABEL: or_pmovmskb_pmovmskb:
294 ; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2
295 ; AVX-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0
296 ; AVX-NEXT: vpsraw $15, %xmm1, %xmm1
297 ; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
298 ; AVX-NEXT: vpmovmskb %xmm0, %eax
300 %1 = icmp eq <16 x i8> zeroinitializer, %a0
301 %2 = sext <16 x i1> %1 to <16 x i8>
302 %3 = tail call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %2)
303 %4 = ashr <8 x i16> %a1, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
304 %5 = bitcast <8 x i16> %4 to <16 x i8>
305 %6 = tail call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %5)