1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt < %s -passes=instcombine -mtriple=x86_64-unknown-unknown -S | FileCheck %s
4 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
7 ; DemandedBits - MOVMSK zeros the upper bits of the result.
10 define i32 @test_upper_x86_mmx_pmovmskb(<1 x i64> %a0) {
11 ; CHECK-LABEL: @test_upper_x86_mmx_pmovmskb(
12 ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.x86.mmx.pmovmskb(<1 x i64> [[A0:%.*]])
13 ; CHECK-NEXT: ret i32 [[TMP1]]
15 %1 = call i32 @llvm.x86.mmx.pmovmskb(<1 x i64> %a0)
20 define i32 @test_upper_x86_sse_movmsk_ps(<4 x float> %a0) {
21 ; CHECK-LABEL: @test_upper_x86_sse_movmsk_ps(
22 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[A0:%.*]] to <4 x i32>
23 ; CHECK-NEXT: [[TMP2:%.*]] = icmp slt <4 x i32> [[TMP1]], zeroinitializer
24 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i1> [[TMP2]] to i4
25 ; CHECK-NEXT: [[TMP4:%.*]] = zext i4 [[TMP3]] to i32
26 ; CHECK-NEXT: ret i32 [[TMP4]]
28 %1 = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %a0)
33 define i32 @test_upper_x86_sse2_movmsk_pd(<2 x double> %a0) {
34 ; CHECK-LABEL: @test_upper_x86_sse2_movmsk_pd(
35 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[A0:%.*]] to <2 x i64>
36 ; CHECK-NEXT: [[TMP2:%.*]] = icmp slt <2 x i64> [[TMP1]], zeroinitializer
37 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i1> [[TMP2]] to i2
38 ; CHECK-NEXT: [[TMP4:%.*]] = zext i2 [[TMP3]] to i32
39 ; CHECK-NEXT: ret i32 [[TMP4]]
41 %1 = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0)
46 define i32 @test_upper_x86_sse2_pmovmskb_128(<16 x i8> %a0) {
47 ; CHECK-LABEL: @test_upper_x86_sse2_pmovmskb_128(
48 ; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <16 x i8> [[A0:%.*]], zeroinitializer
49 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i1> [[TMP1]] to i16
50 ; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[TMP2]] to i32
51 ; CHECK-NEXT: ret i32 [[TMP3]]
53 %1 = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %a0)
54 %2 = and i32 %1, 65535
58 define i32 @test_upper_x86_avx_movmsk_ps_256(<8 x float> %a0) {
59 ; CHECK-LABEL: @test_upper_x86_avx_movmsk_ps_256(
60 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x float> [[A0:%.*]] to <8 x i32>
61 ; CHECK-NEXT: [[TMP2:%.*]] = icmp slt <8 x i32> [[TMP1]], zeroinitializer
62 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i1> [[TMP2]] to i8
63 ; CHECK-NEXT: [[TMP4:%.*]] = zext i8 [[TMP3]] to i32
64 ; CHECK-NEXT: ret i32 [[TMP4]]
66 %1 = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %a0)
71 define i32 @test_upper_x86_avx_movmsk_pd_256(<4 x double> %a0) {
72 ; CHECK-LABEL: @test_upper_x86_avx_movmsk_pd_256(
73 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x double> [[A0:%.*]] to <4 x i64>
74 ; CHECK-NEXT: [[TMP2:%.*]] = icmp slt <4 x i64> [[TMP1]], zeroinitializer
75 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i1> [[TMP2]] to i4
76 ; CHECK-NEXT: [[TMP4:%.*]] = zext i4 [[TMP3]] to i32
77 ; CHECK-NEXT: ret i32 [[TMP4]]
79 %1 = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> %a0)
84 ; llvm.x86.avx2.pmovmskb uses the whole of the 32-bit register.
87 ; DemandedBits - If we don't use the lower bits then we just return zero.
90 define i32 @test_lower_x86_mmx_pmovmskb(<1 x i64> %a0) {
91 ; CHECK-LABEL: @test_lower_x86_mmx_pmovmskb(
92 ; CHECK-NEXT: ret i32 0
94 %1 = call i32 @llvm.x86.mmx.pmovmskb(<1 x i64> %a0)
99 define i32 @test_lower_x86_sse_movmsk_ps(<4 x float> %a0) {
100 ; CHECK-LABEL: @test_lower_x86_sse_movmsk_ps(
101 ; CHECK-NEXT: ret i32 0
103 %1 = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %a0)
108 define i32 @test_lower_x86_sse2_movmsk_pd(<2 x double> %a0) {
109 ; CHECK-LABEL: @test_lower_x86_sse2_movmsk_pd(
110 ; CHECK-NEXT: ret i32 0
112 %1 = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0)
117 define i32 @test_lower_x86_sse2_pmovmskb_128(<16 x i8> %a0) {
118 ; CHECK-LABEL: @test_lower_x86_sse2_pmovmskb_128(
119 ; CHECK-NEXT: ret i32 0
121 %1 = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %a0)
122 %2 = and i32 %1, -65536
126 define i32 @test_lower_x86_avx_movmsk_ps_256(<8 x float> %a0) {
127 ; CHECK-LABEL: @test_lower_x86_avx_movmsk_ps_256(
128 ; CHECK-NEXT: ret i32 0
130 %1 = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %a0)
131 %2 = and i32 %1, -256
135 define i32 @test_lower_x86_avx_movmsk_pd_256(<4 x double> %a0) {
136 ; CHECK-LABEL: @test_lower_x86_avx_movmsk_pd_256(
137 ; CHECK-NEXT: ret i32 0
139 %1 = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> %a0)
144 ; llvm.x86.avx2.pmovmskb uses the whole of the 32-bit register.
147 ; Constant Folding (UNDEF -> ZERO)
150 define i32 @undef_x86_mmx_pmovmskb() {
151 ; CHECK-LABEL: @undef_x86_mmx_pmovmskb(
152 ; CHECK-NEXT: ret i32 0
154 %1 = call i32 @llvm.x86.mmx.pmovmskb(<1 x i64> undef)
158 define i32 @undef_x86_sse_movmsk_ps() {
159 ; CHECK-LABEL: @undef_x86_sse_movmsk_ps(
160 ; CHECK-NEXT: ret i32 0
162 %1 = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> undef)
166 define i32 @undef_x86_sse2_movmsk_pd() {
167 ; CHECK-LABEL: @undef_x86_sse2_movmsk_pd(
168 ; CHECK-NEXT: ret i32 0
170 %1 = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> undef)
174 define i32 @undef_x86_sse2_pmovmskb_128() {
175 ; CHECK-LABEL: @undef_x86_sse2_pmovmskb_128(
176 ; CHECK-NEXT: ret i32 0
178 %1 = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> undef)
182 define i32 @undef_x86_avx_movmsk_ps_256() {
183 ; CHECK-LABEL: @undef_x86_avx_movmsk_ps_256(
184 ; CHECK-NEXT: ret i32 0
186 %1 = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> undef)
190 define i32 @undef_x86_avx_movmsk_pd_256() {
191 ; CHECK-LABEL: @undef_x86_avx_movmsk_pd_256(
192 ; CHECK-NEXT: ret i32 0
194 %1 = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> undef)
198 define i32 @undef_x86_avx2_pmovmskb() {
199 ; CHECK-LABEL: @undef_x86_avx2_pmovmskb(
200 ; CHECK-NEXT: ret i32 0
202 %1 = call i32 @llvm.x86.avx2.pmovmskb(<32 x i8> undef)
207 ; Constant Folding (ZERO -> ZERO)
210 define i32 @zero_x86_sse_movmsk_ps() {
211 ; CHECK-LABEL: @zero_x86_sse_movmsk_ps(
212 ; CHECK-NEXT: ret i32 0
214 %1 = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> zeroinitializer)
218 define i32 @zero_x86_sse2_movmsk_pd() {
219 ; CHECK-LABEL: @zero_x86_sse2_movmsk_pd(
220 ; CHECK-NEXT: ret i32 0
222 %1 = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> zeroinitializer)
226 define i32 @zero_x86_sse2_pmovmskb_128() {
227 ; CHECK-LABEL: @zero_x86_sse2_pmovmskb_128(
228 ; CHECK-NEXT: ret i32 0
230 %1 = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> zeroinitializer)
234 define i32 @zero_x86_avx_movmsk_ps_256() {
235 ; CHECK-LABEL: @zero_x86_avx_movmsk_ps_256(
236 ; CHECK-NEXT: ret i32 0
238 %1 = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> zeroinitializer)
242 define i32 @zero_x86_avx_movmsk_pd_256() {
243 ; CHECK-LABEL: @zero_x86_avx_movmsk_pd_256(
244 ; CHECK-NEXT: ret i32 0
246 %1 = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> zeroinitializer)
250 define i32 @zero_x86_avx2_pmovmskb() {
251 ; CHECK-LABEL: @zero_x86_avx2_pmovmskb(
252 ; CHECK-NEXT: ret i32 0
254 %1 = call i32 @llvm.x86.avx2.pmovmskb(<32 x i8> zeroinitializer)
262 define i32 @fold_x86_mmx_pmovmskb() {
263 ; CHECK-LABEL: @fold_x86_mmx_pmovmskb(
264 ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.x86.mmx.pmovmskb(<1 x i64> splat (i64 18084223940296448))
265 ; CHECK-NEXT: ret i32 [[TMP1]]
267 %1 = bitcast <8 x i8> <i8 0, i8 255, i8 -1, i8 127, i8 -127, i8 63, i8 64, i8 256> to <1 x i64>
268 %2 = call i32 @llvm.x86.mmx.pmovmskb(<1 x i64> %1)
272 define i32 @fold_x86_sse_movmsk_ps() {
273 ; CHECK-LABEL: @fold_x86_sse_movmsk_ps(
274 ; CHECK-NEXT: ret i32 10
276 %1 = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> <float 1.0, float -1.0, float 100.0, float -200.0>)
280 define i32 @fold_x86_sse2_movmsk_pd() {
281 ; CHECK-LABEL: @fold_x86_sse2_movmsk_pd(
282 ; CHECK-NEXT: ret i32 2
284 %1 = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> <double 1.0, double -1.0>)
288 define i32 @fold_x86_sse2_pmovmskb_128() {
289 ; CHECK-LABEL: @fold_x86_sse2_pmovmskb_128(
290 ; CHECK-NEXT: ret i32 5654
292 %1 = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> <i8 0, i8 255, i8 -1, i8 127, i8 -127, i8 63, i8 64, i8 256, i8 0, i8 255, i8 -1, i8 127, i8 -127, i8 63, i8 64, i8 256>)
296 define i32 @fold_x86_avx_movmsk_ps_256() {
297 ; CHECK-LABEL: @fold_x86_avx_movmsk_ps_256(
298 ; CHECK-NEXT: ret i32 170
300 %1 = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> <float 1.0, float -1.0, float 100.0, float -200.0, float +0.0, float -0.0, float 100000.0, float -5000000.0>)
304 define i32 @fold_x86_avx_movmsk_pd_256() {
305 ; CHECK-LABEL: @fold_x86_avx_movmsk_pd_256(
306 ; CHECK-NEXT: ret i32 10
308 %1 = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> <double 1.0, double -1.0, double 100.0, double -200.0>)
312 define i32 @fold_x86_avx2_pmovmskb() {
313 ; CHECK-LABEL: @fold_x86_avx2_pmovmskb(
314 ; CHECK-NEXT: ret i32 370546176
316 %1 = call i32 @llvm.x86.avx2.pmovmskb(<32 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 255, i8 -1, i8 127, i8 -127, i8 63, i8 64, i8 256, i8 0, i8 255, i8 -1, i8 127, i8 -127, i8 63, i8 64, i8 256, i8 0, i8 255, i8 -1, i8 127, i8 -127, i8 63, i8 64, i8 256>)
320 define i32 @sext_sse_movmsk_ps(<4 x i1> %x) {
321 ; CHECK-LABEL: @sext_sse_movmsk_ps(
322 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i1> [[X:%.*]] to i4
323 ; CHECK-NEXT: [[TMP2:%.*]] = zext i4 [[TMP1]] to i32
324 ; CHECK-NEXT: ret i32 [[TMP2]]
326 %sext = sext <4 x i1> %x to <4 x i32>
327 %bc = bitcast <4 x i32> %sext to <4 x float>
328 %r = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %bc)
332 define i32 @sext_sse2_movmsk_pd(<2 x i1> %x) {
333 ; CHECK-LABEL: @sext_sse2_movmsk_pd(
334 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i1> [[X:%.*]] to i2
335 ; CHECK-NEXT: [[TMP2:%.*]] = zext i2 [[TMP1]] to i32
336 ; CHECK-NEXT: ret i32 [[TMP2]]
338 %sext = sext <2 x i1> %x to <2 x i64>
339 %bc = bitcast <2 x i64> %sext to <2 x double>
340 %r = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %bc)
344 define i32 @sext_sse2_pmovmskb_128(<16 x i1> %x) {
345 ; CHECK-LABEL: @sext_sse2_pmovmskb_128(
346 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i1> [[X:%.*]] to i16
347 ; CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[TMP1]] to i32
348 ; CHECK-NEXT: ret i32 [[TMP2]]
350 %sext = sext <16 x i1> %x to <16 x i8>
351 %r = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %sext)
355 define i32 @sext_avx_movmsk_ps_256(<8 x i1> %x) {
356 ; CHECK-LABEL: @sext_avx_movmsk_ps_256(
357 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i1> [[X:%.*]] to i8
358 ; CHECK-NEXT: [[TMP2:%.*]] = zext i8 [[TMP1]] to i32
359 ; CHECK-NEXT: ret i32 [[TMP2]]
361 %sext = sext <8 x i1> %x to <8 x i32>
362 %bc = bitcast <8 x i32> %sext to <8 x float>
363 %r = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %bc)
367 define i32 @sext_avx_movmsk_pd_256(<4 x i1> %x) {
368 ; CHECK-LABEL: @sext_avx_movmsk_pd_256(
369 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i1> [[X:%.*]] to i4
370 ; CHECK-NEXT: [[TMP2:%.*]] = zext i4 [[TMP1]] to i32
371 ; CHECK-NEXT: ret i32 [[TMP2]]
373 %sext = sext <4 x i1> %x to <4 x i64>
374 %bc = bitcast <4 x i64> %sext to <4 x double>
375 %r = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> %bc)
379 define i32 @sext_avx2_pmovmskb(<32 x i1> %x) {
380 ; CHECK-LABEL: @sext_avx2_pmovmskb(
381 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <32 x i1> [[X:%.*]] to i32
382 ; CHECK-NEXT: ret i32 [[TMP1]]
384 %sext = sext <32 x i1> %x to <32 x i8>
385 %r = call i32 @llvm.x86.avx2.pmovmskb(<32 x i8> %sext)
389 ; Bitcast from sign-extended scalar.
391 define i32 @sext_sse_movmsk_ps_scalar_source(i1 %x) {
392 ; CHECK-LABEL: @sext_sse_movmsk_ps_scalar_source(
393 ; CHECK-NEXT: [[SEXT:%.*]] = sext i1 [[X:%.*]] to i128
394 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[SEXT]] to <4 x i32>
395 ; CHECK-NEXT: [[TMP2:%.*]] = icmp slt <4 x i32> [[TMP1]], zeroinitializer
396 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i1> [[TMP2]] to i4
397 ; CHECK-NEXT: [[TMP4:%.*]] = zext i4 [[TMP3]] to i32
398 ; CHECK-NEXT: ret i32 [[TMP4]]
400 %sext = sext i1 %x to i128
401 %bc = bitcast i128 %sext to <4 x float>
402 %r = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %bc)
406 ; Bitcast from vector type with more elements.
408 define i32 @sext_sse_movmsk_ps_too_many_elts(<8 x i1> %x) {
409 ; CHECK-LABEL: @sext_sse_movmsk_ps_too_many_elts(
410 ; CHECK-NEXT: [[SEXT:%.*]] = sext <8 x i1> [[X:%.*]] to <8 x i16>
411 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[SEXT]] to <4 x i32>
412 ; CHECK-NEXT: [[TMP2:%.*]] = icmp slt <4 x i32> [[TMP1]], zeroinitializer
413 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i1> [[TMP2]] to i4
414 ; CHECK-NEXT: [[TMP4:%.*]] = zext i4 [[TMP3]] to i32
415 ; CHECK-NEXT: ret i32 [[TMP4]]
417 %sext = sext <8 x i1> %x to <8 x i16>
418 %bc = bitcast <8 x i16> %sext to <4 x float>
419 %r = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %bc)
423 ; Handle this by doing a bitcasted sign-bit test after the sext.
425 define i32 @sext_sse_movmsk_ps_must_replicate_bits(<2 x i1> %x) {
426 ; CHECK-LABEL: @sext_sse_movmsk_ps_must_replicate_bits(
427 ; CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[X:%.*]] to <2 x i64>
428 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[SEXT]] to <4 x i32>
429 ; CHECK-NEXT: [[TMP2:%.*]] = icmp slt <4 x i32> [[TMP1]], zeroinitializer
430 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i1> [[TMP2]] to i4
431 ; CHECK-NEXT: [[TMP4:%.*]] = zext i4 [[TMP3]] to i32
432 ; CHECK-NEXT: ret i32 [[TMP4]]
434 %sext = sext <2 x i1> %x to <2 x i64>
435 %bc = bitcast <2 x i64> %sext to <4 x float>
436 %r = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %bc)
440 declare i32 @llvm.x86.mmx.pmovmskb(<1 x i64>)
442 declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>)
443 declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>)
444 declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>)
446 declare i32 @llvm.x86.avx.movmsk.ps.256(<8 x float>)
447 declare i32 @llvm.x86.avx.movmsk.pd.256(<4 x double>)
448 declare i32 @llvm.x86.avx2.pmovmskb(<32 x i8>)