1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512cd,+fast-variable-shuffle | FileCheck %s --check-prefixes=ALL,AVX512CD
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,avx512cd,+avx512bw,+fast-variable-shuffle | FileCheck %s --check-prefixes=ALL,AVX512VLCDBW
4 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vl,avx512cd,+avx512bw,+fast-variable-shuffle | FileCheck %s --check-prefixes=ALL,X86-AVX512VLCDBW
6 define <2 x i64> @test_mm_epi64(<8 x i16> %a, <8 x i16> %b) {
7 ; AVX512CD-LABEL: test_mm_epi64:
8 ; AVX512CD: # %bb.0: # %entry
9 ; AVX512CD-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
10 ; AVX512CD-NEXT: vpmovsxwq %xmm0, %zmm0
11 ; AVX512CD-NEXT: vptestmq %zmm0, %zmm0, %k0
12 ; AVX512CD-NEXT: kmovw %k0, %eax
13 ; AVX512CD-NEXT: vpxor %xmm0, %xmm0, %xmm0
14 ; AVX512CD-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
15 ; AVX512CD-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
16 ; AVX512CD-NEXT: vzeroupper
19 ; AVX512VLCDBW-LABEL: test_mm_epi64:
20 ; AVX512VLCDBW: # %bb.0: # %entry
21 ; AVX512VLCDBW-NEXT: vpcmpeqw %xmm1, %xmm0, %k0
22 ; AVX512VLCDBW-NEXT: vpbroadcastmb2q %k0, %xmm0
23 ; AVX512VLCDBW-NEXT: retq
25 ; X86-AVX512VLCDBW-LABEL: test_mm_epi64:
26 ; X86-AVX512VLCDBW: # %bb.0: # %entry
27 ; X86-AVX512VLCDBW-NEXT: vpcmpeqw %xmm1, %xmm0, %k0
28 ; X86-AVX512VLCDBW-NEXT: kmovd %k0, %eax
29 ; X86-AVX512VLCDBW-NEXT: movzbl %al, %eax
30 ; X86-AVX512VLCDBW-NEXT: vmovd %eax, %xmm0
31 ; X86-AVX512VLCDBW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3],zero,zero,zero,zero,xmm0[0,1,2,3],zero,zero,zero,zero
32 ; X86-AVX512VLCDBW-NEXT: retl
34 %0 = icmp eq <8 x i16> %a, %b
35 %1 = bitcast <8 x i1> %0 to i8
36 %conv.i = zext i8 %1 to i64
37 %vecinit.i.i = insertelement <2 x i64> undef, i64 %conv.i, i32 0
38 %vecinit1.i.i = shufflevector <2 x i64> %vecinit.i.i, <2 x i64> undef, <2 x i32> zeroinitializer
39 ret <2 x i64> %vecinit1.i.i
42 define <4 x i32> @test_mm_epi32(<16 x i8> %a, <16 x i8> %b) {
43 ; AVX512CD-LABEL: test_mm_epi32:
44 ; AVX512CD: # %bb.0: # %entry
45 ; AVX512CD-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
46 ; AVX512CD-NEXT: vpmovmskb %xmm0, %eax
47 ; AVX512CD-NEXT: vmovd %eax, %xmm0
48 ; AVX512CD-NEXT: vpbroadcastd %xmm0, %xmm0
51 ; AVX512VLCDBW-LABEL: test_mm_epi32:
52 ; AVX512VLCDBW: # %bb.0: # %entry
53 ; AVX512VLCDBW-NEXT: vpcmpeqb %xmm1, %xmm0, %k0
54 ; AVX512VLCDBW-NEXT: vpbroadcastmw2d %k0, %xmm0
55 ; AVX512VLCDBW-NEXT: retq
57 ; X86-AVX512VLCDBW-LABEL: test_mm_epi32:
58 ; X86-AVX512VLCDBW: # %bb.0: # %entry
59 ; X86-AVX512VLCDBW-NEXT: vpcmpeqb %xmm1, %xmm0, %k0
60 ; X86-AVX512VLCDBW-NEXT: vpbroadcastmw2d %k0, %xmm0
61 ; X86-AVX512VLCDBW-NEXT: retl
63 %0 = icmp eq <16 x i8> %a, %b
64 %1 = bitcast <16 x i1> %0 to i16
65 %conv.i = zext i16 %1 to i32
66 %vecinit.i.i = insertelement <4 x i32> undef, i32 %conv.i, i32 0
67 %vecinit3.i.i = shufflevector <4 x i32> %vecinit.i.i, <4 x i32> undef, <4 x i32> zeroinitializer
68 ret <4 x i32> %vecinit3.i.i
71 define <16 x i32> @test_mm512_epi32(<16 x i32> %a, <16 x i32> %b) {
72 ; ALL-LABEL: test_mm512_epi32:
73 ; ALL: # %bb.0: # %entry
74 ; ALL-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
75 ; ALL-NEXT: vpbroadcastmw2d %k0, %zmm0
76 ; ALL-NEXT: ret{{[l|q]}}
78 %0 = icmp eq <16 x i32> %a, %b
79 %1 = bitcast <16 x i1> %0 to i16
80 %conv.i = zext i16 %1 to i32
81 %vecinit.i.i = insertelement <16 x i32> undef, i32 %conv.i, i32 0
82 %vecinit15.i.i = shufflevector <16 x i32> %vecinit.i.i, <16 x i32> undef, <16 x i32> zeroinitializer
83 ret <16 x i32> %vecinit15.i.i
86 define <8 x i64> @test_mm512_epi64(<8 x i32> %a, <8 x i32> %b) {
87 ; AVX512CD-LABEL: test_mm512_epi64:
88 ; AVX512CD: # %bb.0: # %entry
89 ; AVX512CD-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
90 ; AVX512CD-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
91 ; AVX512CD-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
92 ; AVX512CD-NEXT: vpbroadcastmb2q %k0, %zmm0
95 ; AVX512VLCDBW-LABEL: test_mm512_epi64:
96 ; AVX512VLCDBW: # %bb.0: # %entry
97 ; AVX512VLCDBW-NEXT: vpcmpeqd %ymm1, %ymm0, %k0
98 ; AVX512VLCDBW-NEXT: vpbroadcastmb2q %k0, %zmm0
99 ; AVX512VLCDBW-NEXT: retq
101 ; X86-AVX512VLCDBW-LABEL: test_mm512_epi64:
102 ; X86-AVX512VLCDBW: # %bb.0: # %entry
103 ; X86-AVX512VLCDBW-NEXT: vpcmpeqd %ymm1, %ymm0, %k0
104 ; X86-AVX512VLCDBW-NEXT: kmovd %k0, %eax
105 ; X86-AVX512VLCDBW-NEXT: movzbl %al, %eax
106 ; X86-AVX512VLCDBW-NEXT: vmovd %eax, %xmm0
107 ; X86-AVX512VLCDBW-NEXT: vpbroadcastq %xmm0, %zmm0
108 ; X86-AVX512VLCDBW-NEXT: retl
110 %0 = icmp eq <8 x i32> %a, %b
111 %1 = bitcast <8 x i1> %0 to i8
112 %conv.i = zext i8 %1 to i64
113 %vecinit.i.i = insertelement <8 x i64> undef, i64 %conv.i, i32 0
114 %vecinit7.i.i = shufflevector <8 x i64> %vecinit.i.i, <8 x i64> undef, <8 x i32> zeroinitializer
115 ret <8 x i64> %vecinit7.i.i
118 define <4 x i64> @test_mm256_epi64(<8 x i32> %a, <8 x i32> %b) {
119 ; AVX512CD-LABEL: test_mm256_epi64:
120 ; AVX512CD: # %bb.0: # %entry
121 ; AVX512CD-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
122 ; AVX512CD-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
123 ; AVX512CD-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
124 ; AVX512CD-NEXT: kmovw %k0, %eax
125 ; AVX512CD-NEXT: movzbl %al, %eax
126 ; AVX512CD-NEXT: vmovq %rax, %xmm0
127 ; AVX512CD-NEXT: vpbroadcastq %xmm0, %ymm0
128 ; AVX512CD-NEXT: retq
130 ; AVX512VLCDBW-LABEL: test_mm256_epi64:
131 ; AVX512VLCDBW: # %bb.0: # %entry
132 ; AVX512VLCDBW-NEXT: vpcmpeqd %ymm1, %ymm0, %k0
133 ; AVX512VLCDBW-NEXT: vpbroadcastmb2q %k0, %ymm0
134 ; AVX512VLCDBW-NEXT: retq
136 ; X86-AVX512VLCDBW-LABEL: test_mm256_epi64:
137 ; X86-AVX512VLCDBW: # %bb.0: # %entry
138 ; X86-AVX512VLCDBW-NEXT: vpcmpeqd %ymm1, %ymm0, %k0
139 ; X86-AVX512VLCDBW-NEXT: kmovd %k0, %eax
140 ; X86-AVX512VLCDBW-NEXT: movzbl %al, %eax
141 ; X86-AVX512VLCDBW-NEXT: vmovd %eax, %xmm0
142 ; X86-AVX512VLCDBW-NEXT: vpbroadcastq %xmm0, %ymm0
143 ; X86-AVX512VLCDBW-NEXT: retl
145 %0 = icmp eq <8 x i32> %a, %b
146 %1 = bitcast <8 x i1> %0 to i8
147 %conv.i = zext i8 %1 to i64
148 %vecinit.i.i = insertelement <4 x i64> undef, i64 %conv.i, i32 0
149 %vecinit3.i.i = shufflevector <4 x i64> %vecinit.i.i, <4 x i64> undef, <4 x i32> zeroinitializer
150 ret <4 x i64> %vecinit3.i.i
153 define <8 x i32> @test_mm256_epi32(<16 x i16> %a, <16 x i16> %b) {
154 ; AVX512CD-LABEL: test_mm256_epi32:
155 ; AVX512CD: # %bb.0: # %entry
156 ; AVX512CD-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
157 ; AVX512CD-NEXT: vpmovsxwd %ymm0, %zmm0
158 ; AVX512CD-NEXT: vptestmd %zmm0, %zmm0, %k0
159 ; AVX512CD-NEXT: kmovw %k0, %eax
160 ; AVX512CD-NEXT: vpxor %xmm0, %xmm0, %xmm0
161 ; AVX512CD-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
162 ; AVX512CD-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0
163 ; AVX512CD-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
164 ; AVX512CD-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0
165 ; AVX512CD-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
166 ; AVX512CD-NEXT: retq
168 ; AVX512VLCDBW-LABEL: test_mm256_epi32:
169 ; AVX512VLCDBW: # %bb.0: # %entry
170 ; AVX512VLCDBW-NEXT: vpcmpeqw %ymm1, %ymm0, %k0
171 ; AVX512VLCDBW-NEXT: vpbroadcastmw2d %k0, %ymm0
172 ; AVX512VLCDBW-NEXT: retq
174 ; X86-AVX512VLCDBW-LABEL: test_mm256_epi32:
175 ; X86-AVX512VLCDBW: # %bb.0: # %entry
176 ; X86-AVX512VLCDBW-NEXT: vpcmpeqw %ymm1, %ymm0, %k0
177 ; X86-AVX512VLCDBW-NEXT: vpbroadcastmw2d %k0, %ymm0
178 ; X86-AVX512VLCDBW-NEXT: retl
180 %0 = icmp eq <16 x i16> %a, %b
181 %1 = bitcast <16 x i1> %0 to i16
182 %conv.i = zext i16 %1 to i32
183 %vecinit.i.i = insertelement <8 x i32> undef, i32 %conv.i, i32 0
184 %vecinit7.i.i = shufflevector <8 x i32> %vecinit.i.i, <8 x i32> undef, <8 x i32> zeroinitializer
185 ret <8 x i32> %vecinit7.i.i