1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw -mattr=+avx512dq -mattr=+avx512vl| FileCheck %s
4 define <8 x i1> @test(<2 x i1> %a) {
7 ; CHECK-NEXT: vpsllq $63, %xmm0, %xmm0
8 ; CHECK-NEXT: vpmovq2m %xmm0, %k0
9 ; CHECK-NEXT: kshiftlb $2, %k0, %k0
10 ; CHECK-NEXT: vpmovm2w %k0, %xmm0
12 %res = shufflevector <2 x i1> %a, <2 x i1> undef, <8 x i32> <i32 undef, i32 undef, i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef>
16 define <8 x i1> @test1(<2 x i1> %a) {
19 ; CHECK-NEXT: vpsllq $63, %xmm0, %xmm0
20 ; CHECK-NEXT: vpmovq2m %xmm0, %k0
21 ; CHECK-NEXT: kshiftlb $4, %k0, %k0
22 ; CHECK-NEXT: vpmovm2w %k0, %xmm0
24 %res = shufflevector <2 x i1> %a, <2 x i1> undef, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 1, i32 undef, i32 undef>
28 define <8 x i1> @test2(<2 x i1> %a) {
31 ; CHECK-NEXT: vpsllq $63, %xmm0, %xmm0
32 ; CHECK-NEXT: vpmovq2m %xmm0, %k0
33 ; CHECK-NEXT: kshiftlb $4, %k0, %k0
34 ; CHECK-NEXT: vpmovm2w %k0, %xmm0
36 %res = shufflevector <2 x i1> %a, <2 x i1> zeroinitializer, <8 x i32> <i32 3, i32 3, i32 undef, i32 undef, i32 0, i32 1, i32 undef, i32 undef>
40 define <8 x i1> @test3(<4 x i1> %a) {
43 ; CHECK-NEXT: vpslld $31, %xmm0, %xmm0
44 ; CHECK-NEXT: vpmovd2m %xmm0, %k0
45 ; CHECK-NEXT: vpmovm2w %k0, %xmm0
48 %res = shufflevector <4 x i1> %a, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
52 define <8 x i1> @test4(<4 x i1> %a, <4 x i1>%b) {
55 ; CHECK-NEXT: vpslld $31, %xmm1, %xmm1
56 ; CHECK-NEXT: vpmovd2m %xmm1, %k0
57 ; CHECK-NEXT: vpslld $31, %xmm0, %xmm0
58 ; CHECK-NEXT: vpmovd2m %xmm0, %k1
59 ; CHECK-NEXT: kshiftlb $4, %k0, %k0
60 ; CHECK-NEXT: korb %k0, %k1, %k0
61 ; CHECK-NEXT: vpmovm2w %k0, %xmm0
64 %res = shufflevector <4 x i1> %a, <4 x i1> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
68 define <4 x i1> @test5(<2 x i1> %a, <2 x i1>%b) {
71 ; CHECK-NEXT: vpsllq $63, %xmm1, %xmm1
72 ; CHECK-NEXT: vpmovq2m %xmm1, %k0
73 ; CHECK-NEXT: vpsllq $63, %xmm0, %xmm0
74 ; CHECK-NEXT: vpmovq2m %xmm0, %k1
75 ; CHECK-NEXT: kshiftlb $2, %k0, %k0
76 ; CHECK-NEXT: korw %k0, %k1, %k0
77 ; CHECK-NEXT: vpmovm2d %k0, %xmm0
80 %res = shufflevector <2 x i1> %a, <2 x i1> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
84 define <16 x i1> @test6(<2 x i1> %a, <2 x i1>%b) {
87 ; CHECK-NEXT: vpsllq $63, %xmm1, %xmm1
88 ; CHECK-NEXT: vpmovq2m %xmm1, %k0
89 ; CHECK-NEXT: vpsllq $63, %xmm0, %xmm0
90 ; CHECK-NEXT: vpmovq2m %xmm0, %k1
91 ; CHECK-NEXT: kshiftlb $2, %k0, %k0
92 ; CHECK-NEXT: korw %k0, %k1, %k0
93 ; CHECK-NEXT: vpmovm2b %k0, %xmm0
96 %res = shufflevector <2 x i1> %a, <2 x i1> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
100 define <32 x i1> @test7(<4 x i1> %a, <4 x i1>%b) {
101 ; CHECK-LABEL: test7:
103 ; CHECK-NEXT: vpslld $31, %xmm1, %xmm1
104 ; CHECK-NEXT: vpmovd2m %xmm1, %k0
105 ; CHECK-NEXT: vpslld $31, %xmm0, %xmm0
106 ; CHECK-NEXT: vpmovd2m %xmm0, %k1
107 ; CHECK-NEXT: kshiftlb $4, %k0, %k0
108 ; CHECK-NEXT: korb %k0, %k1, %k0
109 ; CHECK-NEXT: vpmovm2b %k0, %ymm0
112 %res = shufflevector <4 x i1> %a, <4 x i1> %b, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
116 define <64 x i1> @test8(<8 x i1> %a, <8 x i1>%b) {
117 ; CHECK-LABEL: test8:
119 ; CHECK-NEXT: vpsllw $15, %xmm1, %xmm1
120 ; CHECK-NEXT: vpmovw2m %xmm1, %k0
121 ; CHECK-NEXT: vpsllw $15, %xmm0, %xmm0
122 ; CHECK-NEXT: vpmovw2m %xmm0, %k1
123 ; CHECK-NEXT: kunpckdq %k1, %k0, %k0
124 ; CHECK-NEXT: vpmovm2b %k0, %zmm0
127 %res = shufflevector <8 x i1> %a, <8 x i1> %b, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
131 define <4 x i1> @test9(<8 x i1> %a, <8 x i1> %b) {
132 ; CHECK-LABEL: test9:
134 ; CHECK-NEXT: vpsllw $15, %xmm0, %xmm0
135 ; CHECK-NEXT: vpmovw2m %xmm0, %k0
136 ; CHECK-NEXT: kshiftrb $4, %k0, %k0
137 ; CHECK-NEXT: vpmovm2d %k0, %xmm0
139 %res = shufflevector <8 x i1> %a, <8 x i1> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
143 define <2 x i1> @test10(<4 x i1> %a, <4 x i1> %b) {
144 ; CHECK-LABEL: test10:
146 ; CHECK-NEXT: vpslld $31, %xmm0, %xmm0
147 ; CHECK-NEXT: vpmovd2m %xmm0, %k0
148 ; CHECK-NEXT: kshiftrb $2, %k0, %k0
149 ; CHECK-NEXT: vpmovm2q %k0, %xmm0
151 %res = shufflevector <4 x i1> %a, <4 x i1> %b, <2 x i32> <i32 2, i32 3>
155 define <8 x i1> @test11(<4 x i1> %a, <4 x i1>%b) {
156 ; CHECK-LABEL: test11:
158 ; CHECK-NEXT: vpslld $31, %xmm0, %xmm0
159 ; CHECK-NEXT: vpmovd2m %xmm0, %k0
160 ; CHECK-NEXT: kshiftlb $4, %k0, %k0
161 ; CHECK-NEXT: vpmovm2w %k0, %xmm0
163 %res = shufflevector <4 x i1> %a, <4 x i1> undef, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 1, i32 2, i32 3>
167 define <16 x i1> @test12(<2 x i1> %a) {
168 ; CHECK-LABEL: test12:
170 ; CHECK-NEXT: vpsllq $63, %xmm0, %xmm0
171 ; CHECK-NEXT: vpmovq2m %xmm0, %k0
172 ; CHECK-NEXT: kshiftlw $10, %k0, %k0
173 ; CHECK-NEXT: vpmovm2b %k0, %xmm0
175 %res = shufflevector <2 x i1> %a, <2 x i1> zeroinitializer, <16 x i32> <i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef>
179 define <32 x i1> @test13(<2 x i1> %a) {
180 ; CHECK-LABEL: test13:
182 ; CHECK-NEXT: vpsllq $63, %xmm0, %xmm0
183 ; CHECK-NEXT: vpmovq2m %xmm0, %k0
184 ; CHECK-NEXT: kshiftld $10, %k0, %k0
185 ; CHECK-NEXT: vpmovm2b %k0, %ymm0
187 %res = shufflevector <2 x i1> %a, <2 x i1> zeroinitializer, <32 x i32> <i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
191 define <64 x i1> @test14(<2 x i1> %a) {
192 ; CHECK-LABEL: test14:
194 ; CHECK-NEXT: vpsllq $63, %xmm0, %xmm0
195 ; CHECK-NEXT: vpmovq2m %xmm0, %k0
196 ; CHECK-NEXT: kshiftlq $10, %k0, %k0
197 ; CHECK-NEXT: vpmovm2b %k0, %zmm0
199 %res = shufflevector <2 x i1> %a, <2 x i1> zeroinitializer, <64 x i32> <i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
203 ; Make sure we can recognize this shuffle as an insertion in to a zero vector.
204 define i8 @test15(<2 x i64> %x) {
205 ; CHECK-LABEL: test15:
207 ; CHECK-NEXT: vptestnmq %xmm0, %xmm0, %k0
208 ; CHECK-NEXT: kmovd %k0, %eax
209 ; CHECK-NEXT: # kill: def $al killed $al killed $eax
211 %a = icmp eq <2 x i64> %x, zeroinitializer
212 %b = shufflevector <2 x i1> %a, <2 x i1> <i1 false, i1 undef>, <8 x i32> <i32 0, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
213 %c = bitcast <8 x i1> %b to i8