1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512cd,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=ALL,AVX512CD
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512cd,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=ALL,AVX512CD
4 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512cd,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=ALL,AVX512CD
5 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512cd,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=ALL,AVX512CD
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512cd,+avx512bw,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=ALL,AVX512CDBW
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512cd,+avx512bw,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=ALL,AVX512CDBW
8 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512cd,+avx512bw,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=ALL,AVX512CDBW
9 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512cd,+avx512bw,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=ALL,AVX512CDBW
10 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,avx512cd,+avx512bw,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=ALL,AVX512VLCDBW
11 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,avx512cd,+avx512bw,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=ALL,AVX512VLCDBW
12 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vl,avx512cd,+avx512bw,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=ALL,AVX512VLCDBW
13 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vl,avx512cd,+avx512bw,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=ALL,AVX512VLCDBW
15 define <2 x i64> @test_mm_epi64(<8 x i16> %a, <8 x i16> %b) {
16 ; AVX512CD-LABEL: test_mm_epi64:
17 ; AVX512CD: # %bb.0: # %entry
18 ; AVX512CD-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
19 ; AVX512CD-NEXT: vpmovsxwq %xmm0, %zmm0
20 ; AVX512CD-NEXT: vptestmq %zmm0, %zmm0, %k0
21 ; AVX512CD-NEXT: vpbroadcastmb2q %k0, %zmm0
22 ; AVX512CD-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
23 ; AVX512CD-NEXT: vzeroupper
24 ; AVX512CD-NEXT: ret{{[l|q]}}
26 ; AVX512CDBW-LABEL: test_mm_epi64:
27 ; AVX512CDBW: # %bb.0: # %entry
28 ; AVX512CDBW-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
29 ; AVX512CDBW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
30 ; AVX512CDBW-NEXT: vpcmpeqw %zmm1, %zmm0, %k0
31 ; AVX512CDBW-NEXT: vpbroadcastmb2q %k0, %zmm0
32 ; AVX512CDBW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
33 ; AVX512CDBW-NEXT: vzeroupper
34 ; AVX512CDBW-NEXT: ret{{[l|q]}}
36 ; AVX512VLCDBW-LABEL: test_mm_epi64:
37 ; AVX512VLCDBW: # %bb.0: # %entry
38 ; AVX512VLCDBW-NEXT: vpcmpeqw %xmm1, %xmm0, %k0
39 ; AVX512VLCDBW-NEXT: vpbroadcastmb2q %k0, %xmm0
40 ; AVX512VLCDBW-NEXT: ret{{[l|q]}}
42 %0 = icmp eq <8 x i16> %a, %b
43 %1 = bitcast <8 x i1> %0 to i8
44 %conv.i = zext i8 %1 to i64
45 %vecinit.i.i = insertelement <2 x i64> undef, i64 %conv.i, i32 0
46 %vecinit1.i.i = shufflevector <2 x i64> %vecinit.i.i, <2 x i64> undef, <2 x i32> zeroinitializer
47 ret <2 x i64> %vecinit1.i.i
50 define <4 x i32> @test_mm_epi32(<16 x i8> %a, <16 x i8> %b) {
51 ; AVX512CD-LABEL: test_mm_epi32:
52 ; AVX512CD: # %bb.0: # %entry
53 ; AVX512CD-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
54 ; AVX512CD-NEXT: vpmovmskb %xmm0, %eax
55 ; AVX512CD-NEXT: vmovd %eax, %xmm0
56 ; AVX512CD-NEXT: vpbroadcastd %xmm0, %xmm0
57 ; AVX512CD-NEXT: ret{{[l|q]}}
59 ; AVX512CDBW-LABEL: test_mm_epi32:
60 ; AVX512CDBW: # %bb.0: # %entry
61 ; AVX512CDBW-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
62 ; AVX512CDBW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
63 ; AVX512CDBW-NEXT: vpcmpeqb %zmm1, %zmm0, %k0
64 ; AVX512CDBW-NEXT: vpbroadcastmw2d %k0, %zmm0
65 ; AVX512CDBW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
66 ; AVX512CDBW-NEXT: vzeroupper
67 ; AVX512CDBW-NEXT: ret{{[l|q]}}
69 ; AVX512VLCDBW-LABEL: test_mm_epi32:
70 ; AVX512VLCDBW: # %bb.0: # %entry
71 ; AVX512VLCDBW-NEXT: vpcmpeqb %xmm1, %xmm0, %k0
72 ; AVX512VLCDBW-NEXT: vpbroadcastmw2d %k0, %xmm0
73 ; AVX512VLCDBW-NEXT: ret{{[l|q]}}
75 %0 = icmp eq <16 x i8> %a, %b
76 %1 = bitcast <16 x i1> %0 to i16
77 %conv.i = zext i16 %1 to i32
78 %vecinit.i.i = insertelement <4 x i32> undef, i32 %conv.i, i32 0
79 %vecinit3.i.i = shufflevector <4 x i32> %vecinit.i.i, <4 x i32> undef, <4 x i32> zeroinitializer
80 ret <4 x i32> %vecinit3.i.i
83 define <16 x i32> @test_mm512_epi32(<16 x i32> %a, <16 x i32> %b) {
84 ; ALL-LABEL: test_mm512_epi32:
85 ; ALL: # %bb.0: # %entry
86 ; ALL-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
87 ; ALL-NEXT: vpbroadcastmw2d %k0, %zmm0
88 ; ALL-NEXT: ret{{[l|q]}}
90 %0 = icmp eq <16 x i32> %a, %b
91 %1 = bitcast <16 x i1> %0 to i16
92 %conv.i = zext i16 %1 to i32
93 %vecinit.i.i = insertelement <16 x i32> undef, i32 %conv.i, i32 0
94 %vecinit15.i.i = shufflevector <16 x i32> %vecinit.i.i, <16 x i32> undef, <16 x i32> zeroinitializer
95 ret <16 x i32> %vecinit15.i.i
98 define <8 x i64> @test_mm512_epi64(<8 x i32> %a, <8 x i32> %b) {
99 ; AVX512CD-LABEL: test_mm512_epi64:
100 ; AVX512CD: # %bb.0: # %entry
101 ; AVX512CD-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
102 ; AVX512CD-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
103 ; AVX512CD-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
104 ; AVX512CD-NEXT: vpbroadcastmb2q %k0, %zmm0
105 ; AVX512CD-NEXT: ret{{[l|q]}}
107 ; AVX512CDBW-LABEL: test_mm512_epi64:
108 ; AVX512CDBW: # %bb.0: # %entry
109 ; AVX512CDBW-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
110 ; AVX512CDBW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
111 ; AVX512CDBW-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
112 ; AVX512CDBW-NEXT: vpbroadcastmb2q %k0, %zmm0
113 ; AVX512CDBW-NEXT: ret{{[l|q]}}
115 ; AVX512VLCDBW-LABEL: test_mm512_epi64:
116 ; AVX512VLCDBW: # %bb.0: # %entry
117 ; AVX512VLCDBW-NEXT: vpcmpeqd %ymm1, %ymm0, %k0
118 ; AVX512VLCDBW-NEXT: vpbroadcastmb2q %k0, %zmm0
119 ; AVX512VLCDBW-NEXT: ret{{[l|q]}}
121 %0 = icmp eq <8 x i32> %a, %b
122 %1 = bitcast <8 x i1> %0 to i8
123 %conv.i = zext i8 %1 to i64
124 %vecinit.i.i = insertelement <8 x i64> undef, i64 %conv.i, i32 0
125 %vecinit7.i.i = shufflevector <8 x i64> %vecinit.i.i, <8 x i64> undef, <8 x i32> zeroinitializer
126 ret <8 x i64> %vecinit7.i.i
129 define <4 x i64> @test_mm256_epi64(<8 x i32> %a, <8 x i32> %b) {
130 ; AVX512CD-LABEL: test_mm256_epi64:
131 ; AVX512CD: # %bb.0: # %entry
132 ; AVX512CD-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
133 ; AVX512CD-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
134 ; AVX512CD-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
135 ; AVX512CD-NEXT: vpbroadcastmb2q %k0, %zmm0
136 ; AVX512CD-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
137 ; AVX512CD-NEXT: ret{{[l|q]}}
139 ; AVX512CDBW-LABEL: test_mm256_epi64:
140 ; AVX512CDBW: # %bb.0: # %entry
141 ; AVX512CDBW-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
142 ; AVX512CDBW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
143 ; AVX512CDBW-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
144 ; AVX512CDBW-NEXT: vpbroadcastmb2q %k0, %zmm0
145 ; AVX512CDBW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
146 ; AVX512CDBW-NEXT: ret{{[l|q]}}
148 ; AVX512VLCDBW-LABEL: test_mm256_epi64:
149 ; AVX512VLCDBW: # %bb.0: # %entry
150 ; AVX512VLCDBW-NEXT: vpcmpeqd %ymm1, %ymm0, %k0
151 ; AVX512VLCDBW-NEXT: vpbroadcastmb2q %k0, %ymm0
152 ; AVX512VLCDBW-NEXT: ret{{[l|q]}}
154 %0 = icmp eq <8 x i32> %a, %b
155 %1 = bitcast <8 x i1> %0 to i8
156 %conv.i = zext i8 %1 to i64
157 %vecinit.i.i = insertelement <4 x i64> undef, i64 %conv.i, i32 0
158 %vecinit3.i.i = shufflevector <4 x i64> %vecinit.i.i, <4 x i64> undef, <4 x i32> zeroinitializer
159 ret <4 x i64> %vecinit3.i.i
162 define <8 x i32> @test_mm256_epi32(<16 x i16> %a, <16 x i16> %b) {
163 ; AVX512CD-LABEL: test_mm256_epi32:
164 ; AVX512CD: # %bb.0: # %entry
165 ; AVX512CD-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
166 ; AVX512CD-NEXT: vpmovsxwd %ymm0, %zmm0
167 ; AVX512CD-NEXT: vptestmd %zmm0, %zmm0, %k0
168 ; AVX512CD-NEXT: vpbroadcastmw2d %k0, %zmm0
169 ; AVX512CD-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
170 ; AVX512CD-NEXT: ret{{[l|q]}}
172 ; AVX512CDBW-LABEL: test_mm256_epi32:
173 ; AVX512CDBW: # %bb.0: # %entry
174 ; AVX512CDBW-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
175 ; AVX512CDBW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
176 ; AVX512CDBW-NEXT: vpcmpeqw %zmm1, %zmm0, %k0
177 ; AVX512CDBW-NEXT: vpbroadcastmw2d %k0, %zmm0
178 ; AVX512CDBW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
179 ; AVX512CDBW-NEXT: ret{{[l|q]}}
181 ; AVX512VLCDBW-LABEL: test_mm256_epi32:
182 ; AVX512VLCDBW: # %bb.0: # %entry
183 ; AVX512VLCDBW-NEXT: vpcmpeqw %ymm1, %ymm0, %k0
184 ; AVX512VLCDBW-NEXT: vpbroadcastmw2d %k0, %ymm0
185 ; AVX512VLCDBW-NEXT: ret{{[l|q]}}
187 %0 = icmp eq <16 x i16> %a, %b
188 %1 = bitcast <16 x i1> %0 to i16
189 %conv.i = zext i16 %1 to i32
190 %vecinit.i.i = insertelement <8 x i32> undef, i32 %conv.i, i32 0
191 %vecinit7.i.i = shufflevector <8 x i32> %vecinit.i.i, <8 x i32> undef, <8 x i32> zeroinitializer
192 ret <8 x i32> %vecinit7.i.i