1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+prefer-256-bit | FileCheck %s --check-prefix=CHECK --check-prefix=AVX256
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,-prefer-256-bit | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512VL
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+prefer-256-bit | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512F
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,-prefer-256-bit | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512F
7 define <8 x i16> @testv8i1_sext_v8i16(<8 x i32>* %p) {
8 ; AVX256-LABEL: testv8i1_sext_v8i16:
10 ; AVX256-NEXT: vpxor %xmm0, %xmm0, %xmm0
11 ; AVX256-NEXT: vpcmpeqd (%rdi), %ymm0, %k1
12 ; AVX256-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
13 ; AVX256-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
14 ; AVX256-NEXT: vpmovdw %ymm0, %xmm0
15 ; AVX256-NEXT: vzeroupper
18 ; AVX512VL-LABEL: testv8i1_sext_v8i16:
20 ; AVX512VL-NEXT: vpxor %xmm0, %xmm0, %xmm0
21 ; AVX512VL-NEXT: vpcmpeqd (%rdi), %ymm0, %k1
22 ; AVX512VL-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
23 ; AVX512VL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
24 ; AVX512VL-NEXT: vpmovdw %ymm0, %xmm0
25 ; AVX512VL-NEXT: vzeroupper
28 ; AVX512F-LABEL: testv8i1_sext_v8i16:
30 ; AVX512F-NEXT: vpxor %xmm0, %xmm0, %xmm0
31 ; AVX512F-NEXT: vpcmpeqd (%rdi), %ymm0, %ymm0
32 ; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
33 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
34 ; AVX512F-NEXT: vzeroupper
36 %in = load <8 x i32>, <8 x i32>* %p
37 %cmp = icmp eq <8 x i32> %in, zeroinitializer
38 %ext = sext <8 x i1> %cmp to <8 x i16>
42 define <16 x i8> @testv16i1_sext_v16i8(<8 x i32>* %p, <8 x i32>* %q) {
43 ; AVX256-LABEL: testv16i1_sext_v16i8:
45 ; AVX256-NEXT: vpxor %xmm0, %xmm0, %xmm0
46 ; AVX256-NEXT: vpcmpeqd (%rdi), %ymm0, %k1
47 ; AVX256-NEXT: vpcmpeqd (%rsi), %ymm0, %k2
48 ; AVX256-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
49 ; AVX256-NEXT: vmovdqa32 %ymm0, %ymm1 {%k2} {z}
50 ; AVX256-NEXT: vpmovdw %ymm1, %xmm1
51 ; AVX256-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
52 ; AVX256-NEXT: vpmovdw %ymm0, %xmm0
53 ; AVX256-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
54 ; AVX256-NEXT: vzeroupper
57 ; AVX512VL-LABEL: testv16i1_sext_v16i8:
59 ; AVX512VL-NEXT: vpxor %xmm0, %xmm0, %xmm0
60 ; AVX512VL-NEXT: vpcmpeqd (%rdi), %ymm0, %k0
61 ; AVX512VL-NEXT: vpcmpeqd (%rsi), %ymm0, %k1
62 ; AVX512VL-NEXT: kunpckbw %k0, %k1, %k1
63 ; AVX512VL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
64 ; AVX512VL-NEXT: vpmovdb %zmm0, %xmm0
65 ; AVX512VL-NEXT: vzeroupper
68 ; AVX512F-LABEL: testv16i1_sext_v16i8:
70 ; AVX512F-NEXT: vmovdqa (%rdi), %ymm0
71 ; AVX512F-NEXT: vptestnmd %zmm0, %zmm0, %k0
72 ; AVX512F-NEXT: vmovdqa (%rsi), %ymm0
73 ; AVX512F-NEXT: vptestnmd %zmm0, %zmm0, %k1
74 ; AVX512F-NEXT: kunpckbw %k0, %k1, %k1
75 ; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
76 ; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
77 ; AVX512F-NEXT: vzeroupper
79 %in = load <8 x i32>, <8 x i32>* %p
80 %cmp = icmp eq <8 x i32> %in, zeroinitializer
81 %in2 = load <8 x i32>, <8 x i32>* %q
82 %cmp2 = icmp eq <8 x i32> %in2, zeroinitializer
83 %concat = shufflevector <8 x i1> %cmp, <8 x i1> %cmp2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
84 %ext = sext <16 x i1> %concat to <16 x i8>
88 define <16 x i16> @testv16i1_sext_v16i16(<8 x i32>* %p, <8 x i32>* %q) {
89 ; AVX256-LABEL: testv16i1_sext_v16i16:
91 ; AVX256-NEXT: vpxor %xmm0, %xmm0, %xmm0
92 ; AVX256-NEXT: vpcmpeqd (%rdi), %ymm0, %k1
93 ; AVX256-NEXT: vpcmpeqd (%rsi), %ymm0, %k2
94 ; AVX256-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
95 ; AVX256-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1} {z}
96 ; AVX256-NEXT: vpmovdw %ymm1, %xmm1
97 ; AVX256-NEXT: vmovdqa32 %ymm0, %ymm0 {%k2} {z}
98 ; AVX256-NEXT: vpmovdw %ymm0, %xmm0
99 ; AVX256-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
102 ; AVX512VL-LABEL: testv16i1_sext_v16i16:
104 ; AVX512VL-NEXT: vpxor %xmm0, %xmm0, %xmm0
105 ; AVX512VL-NEXT: vpcmpeqd (%rdi), %ymm0, %k0
106 ; AVX512VL-NEXT: vpcmpeqd (%rsi), %ymm0, %k1
107 ; AVX512VL-NEXT: kunpckbw %k0, %k1, %k1
108 ; AVX512VL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
109 ; AVX512VL-NEXT: vpmovdw %zmm0, %ymm0
110 ; AVX512VL-NEXT: retq
112 ; AVX512F-LABEL: testv16i1_sext_v16i16:
114 ; AVX512F-NEXT: vmovdqa (%rdi), %ymm0
115 ; AVX512F-NEXT: vptestnmd %zmm0, %zmm0, %k0
116 ; AVX512F-NEXT: vmovdqa (%rsi), %ymm0
117 ; AVX512F-NEXT: vptestnmd %zmm0, %zmm0, %k1
118 ; AVX512F-NEXT: kunpckbw %k0, %k1, %k1
119 ; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
120 ; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
122 %in = load <8 x i32>, <8 x i32>* %p
123 %cmp = icmp eq <8 x i32> %in, zeroinitializer
124 %in2 = load <8 x i32>, <8 x i32>* %q
125 %cmp2 = icmp eq <8 x i32> %in2, zeroinitializer
126 %concat = shufflevector <8 x i1> %cmp, <8 x i1> %cmp2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
127 %ext = sext <16 x i1> %concat to <16 x i16>
131 define <8 x i16> @testv8i1_zext_v8i16(<8 x i32>* %p) {
132 ; AVX256-LABEL: testv8i1_zext_v8i16:
134 ; AVX256-NEXT: vpxor %xmm0, %xmm0, %xmm0
135 ; AVX256-NEXT: vpcmpeqd (%rdi), %ymm0, %k1
136 ; AVX256-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
137 ; AVX256-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
138 ; AVX256-NEXT: vpmovdw %ymm0, %xmm0
139 ; AVX256-NEXT: vpsrlw $15, %xmm0, %xmm0
140 ; AVX256-NEXT: vzeroupper
143 ; AVX512VL-LABEL: testv8i1_zext_v8i16:
145 ; AVX512VL-NEXT: vpxor %xmm0, %xmm0, %xmm0
146 ; AVX512VL-NEXT: vpcmpeqd (%rdi), %ymm0, %k1
147 ; AVX512VL-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
148 ; AVX512VL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
149 ; AVX512VL-NEXT: vpmovdw %ymm0, %xmm0
150 ; AVX512VL-NEXT: vpsrlw $15, %xmm0, %xmm0
151 ; AVX512VL-NEXT: vzeroupper
152 ; AVX512VL-NEXT: retq
154 ; AVX512F-LABEL: testv8i1_zext_v8i16:
156 ; AVX512F-NEXT: vpxor %xmm0, %xmm0, %xmm0
157 ; AVX512F-NEXT: vpcmpeqd (%rdi), %ymm0, %ymm0
158 ; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
159 ; AVX512F-NEXT: vpsrlw $15, %xmm0, %xmm0
160 ; AVX512F-NEXT: vzeroupper
162 %in = load <8 x i32>, <8 x i32>* %p
163 %cmp = icmp eq <8 x i32> %in, zeroinitializer
164 %ext = zext <8 x i1> %cmp to <8 x i16>
168 define <16 x i8> @testv16i1_zext_v16i8(<8 x i32>* %p, <8 x i32>* %q) {
169 ; AVX256-LABEL: testv16i1_zext_v16i8:
171 ; AVX256-NEXT: vpxor %xmm0, %xmm0, %xmm0
172 ; AVX256-NEXT: vpcmpeqd (%rdi), %ymm0, %k1
173 ; AVX256-NEXT: vpcmpeqd (%rsi), %ymm0, %k2
174 ; AVX256-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
175 ; AVX256-NEXT: vmovdqa32 %ymm0, %ymm1 {%k2} {z}
176 ; AVX256-NEXT: vpmovdw %ymm1, %xmm1
177 ; AVX256-NEXT: vpsrlw $15, %xmm1, %xmm1
178 ; AVX256-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
179 ; AVX256-NEXT: vpmovdw %ymm0, %xmm0
180 ; AVX256-NEXT: vpsrlw $15, %xmm0, %xmm0
181 ; AVX256-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
182 ; AVX256-NEXT: vzeroupper
185 ; AVX512VL-LABEL: testv16i1_zext_v16i8:
187 ; AVX512VL-NEXT: vpxor %xmm0, %xmm0, %xmm0
188 ; AVX512VL-NEXT: vpcmpeqd (%rdi), %ymm0, %k0
189 ; AVX512VL-NEXT: vpcmpeqd (%rsi), %ymm0, %k1
190 ; AVX512VL-NEXT: kunpckbw %k0, %k1, %k1
191 ; AVX512VL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
192 ; AVX512VL-NEXT: vpmovdb %zmm0, %xmm0
193 ; AVX512VL-NEXT: vzeroupper
194 ; AVX512VL-NEXT: retq
196 ; AVX512F-LABEL: testv16i1_zext_v16i8:
198 ; AVX512F-NEXT: vmovdqa (%rdi), %ymm0
199 ; AVX512F-NEXT: vptestnmd %zmm0, %zmm0, %k0
200 ; AVX512F-NEXT: vmovdqa (%rsi), %ymm0
201 ; AVX512F-NEXT: vptestnmd %zmm0, %zmm0, %k1
202 ; AVX512F-NEXT: kunpckbw %k0, %k1, %k1
203 ; AVX512F-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
204 ; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
205 ; AVX512F-NEXT: vzeroupper
207 %in = load <8 x i32>, <8 x i32>* %p
208 %cmp = icmp eq <8 x i32> %in, zeroinitializer
209 %in2 = load <8 x i32>, <8 x i32>* %q
210 %cmp2 = icmp eq <8 x i32> %in2, zeroinitializer
211 %concat = shufflevector <8 x i1> %cmp, <8 x i1> %cmp2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
212 %ext = zext <16 x i1> %concat to <16 x i8>
216 define <16 x i16> @testv16i1_zext_v16i16(<8 x i32>* %p, <8 x i32>* %q) {
217 ; AVX256-LABEL: testv16i1_zext_v16i16:
219 ; AVX256-NEXT: vpxor %xmm0, %xmm0, %xmm0
220 ; AVX256-NEXT: vpcmpeqd (%rdi), %ymm0, %k1
221 ; AVX256-NEXT: vpcmpeqd (%rsi), %ymm0, %k2
222 ; AVX256-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
223 ; AVX256-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1} {z}
224 ; AVX256-NEXT: vpmovdw %ymm1, %xmm1
225 ; AVX256-NEXT: vmovdqa32 %ymm0, %ymm0 {%k2} {z}
226 ; AVX256-NEXT: vpmovdw %ymm0, %xmm0
227 ; AVX256-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
228 ; AVX256-NEXT: vpsrlw $15, %ymm0, %ymm0
231 ; AVX512VL-LABEL: testv16i1_zext_v16i16:
233 ; AVX512VL-NEXT: vpxor %xmm0, %xmm0, %xmm0
234 ; AVX512VL-NEXT: vpcmpeqd (%rdi), %ymm0, %k0
235 ; AVX512VL-NEXT: vpcmpeqd (%rsi), %ymm0, %k1
236 ; AVX512VL-NEXT: kunpckbw %k0, %k1, %k1
237 ; AVX512VL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
238 ; AVX512VL-NEXT: vpmovdw %zmm0, %ymm0
239 ; AVX512VL-NEXT: vpsrlw $15, %ymm0, %ymm0
240 ; AVX512VL-NEXT: retq
242 ; AVX512F-LABEL: testv16i1_zext_v16i16:
244 ; AVX512F-NEXT: vmovdqa (%rdi), %ymm0
245 ; AVX512F-NEXT: vptestnmd %zmm0, %zmm0, %k0
246 ; AVX512F-NEXT: vmovdqa (%rsi), %ymm0
247 ; AVX512F-NEXT: vptestnmd %zmm0, %zmm0, %k1
248 ; AVX512F-NEXT: kunpckbw %k0, %k1, %k1
249 ; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
250 ; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
251 ; AVX512F-NEXT: vpsrlw $15, %ymm0, %ymm0
253 %in = load <8 x i32>, <8 x i32>* %p
254 %cmp = icmp eq <8 x i32> %in, zeroinitializer
255 %in2 = load <8 x i32>, <8 x i32>* %q
256 %cmp2 = icmp eq <8 x i32> %in2, zeroinitializer
257 %concat = shufflevector <8 x i1> %cmp, <8 x i1> %cmp2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
258 %ext = zext <16 x i1> %concat to <16 x i16>