1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2,+gfni | FileCheck %s --check-prefixes=GFNISSE
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+gfni | FileCheck %s --check-prefixes=GFNIAVX,GFNIAVX1OR2,GFNIAVX1
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+gfni | FileCheck %s --check-prefixes=GFNIAVX,GFNIAVX1OR2,GFNIAVX2
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,+gfni | FileCheck %s --check-prefixes=GFNIAVX,GFNIAVX512
8 ; 128 Bit Vector Shifts
11 define <16 x i8> @splatconstant_shl_v16i8(<16 x i8> %a) nounwind {
12 ; GFNISSE-LABEL: splatconstant_shl_v16i8:
14 ; GFNISSE-NEXT: psllw $3, %xmm0
15 ; GFNISSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
18 ; GFNIAVX1OR2-LABEL: splatconstant_shl_v16i8:
19 ; GFNIAVX1OR2: # %bb.0:
20 ; GFNIAVX1OR2-NEXT: vpsllw $3, %xmm0, %xmm0
21 ; GFNIAVX1OR2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
22 ; GFNIAVX1OR2-NEXT: retq
24 ; GFNIAVX512-LABEL: splatconstant_shl_v16i8:
25 ; GFNIAVX512: # %bb.0:
26 ; GFNIAVX512-NEXT: vpsllw $3, %xmm0, %xmm0
27 ; GFNIAVX512-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
28 ; GFNIAVX512-NEXT: retq
29 %shift = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
33 define <16 x i8> @splatconstant_lshr_v16i8(<16 x i8> %a) nounwind {
34 ; GFNISSE-LABEL: splatconstant_lshr_v16i8:
36 ; GFNISSE-NEXT: psrlw $7, %xmm0
37 ; GFNISSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
40 ; GFNIAVX1OR2-LABEL: splatconstant_lshr_v16i8:
41 ; GFNIAVX1OR2: # %bb.0:
42 ; GFNIAVX1OR2-NEXT: vpsrlw $7, %xmm0, %xmm0
43 ; GFNIAVX1OR2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
44 ; GFNIAVX1OR2-NEXT: retq
46 ; GFNIAVX512-LABEL: splatconstant_lshr_v16i8:
47 ; GFNIAVX512: # %bb.0:
48 ; GFNIAVX512-NEXT: vpsrlw $7, %xmm0, %xmm0
49 ; GFNIAVX512-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
50 ; GFNIAVX512-NEXT: retq
51 %shift = lshr <16 x i8> %a, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
55 define <16 x i8> @splatconstant_ashr_v16i8(<16 x i8> %a) nounwind {
56 ; GFNISSE-LABEL: splatconstant_ashr_v16i8:
58 ; GFNISSE-NEXT: psrlw $4, %xmm0
59 ; GFNISSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
60 ; GFNISSE-NEXT: movdqa {{.*#+}} xmm1 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
61 ; GFNISSE-NEXT: pxor %xmm1, %xmm0
62 ; GFNISSE-NEXT: psubb %xmm1, %xmm0
65 ; GFNIAVX1-LABEL: splatconstant_ashr_v16i8:
67 ; GFNIAVX1-NEXT: vpsrlw $4, %xmm0, %xmm0
68 ; GFNIAVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
69 ; GFNIAVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
70 ; GFNIAVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
71 ; GFNIAVX1-NEXT: vpsubb %xmm1, %xmm0, %xmm0
74 ; GFNIAVX2-LABEL: splatconstant_ashr_v16i8:
76 ; GFNIAVX2-NEXT: vpsrlw $4, %xmm0, %xmm0
77 ; GFNIAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
78 ; GFNIAVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
79 ; GFNIAVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
80 ; GFNIAVX2-NEXT: vpsubb %xmm1, %xmm0, %xmm0
83 ; GFNIAVX512-LABEL: splatconstant_ashr_v16i8:
84 ; GFNIAVX512: # %bb.0:
85 ; GFNIAVX512-NEXT: vpsrlw $4, %xmm0, %xmm0
86 ; GFNIAVX512-NEXT: vpbroadcastb {{.*#+}} xmm1 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
87 ; GFNIAVX512-NEXT: vpternlogd $108, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm0
88 ; GFNIAVX512-NEXT: vpsubb %xmm1, %xmm0, %xmm0
89 ; GFNIAVX512-NEXT: retq
90 %shift = ashr <16 x i8> %a, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
95 ; 256 Bit Vector Shifts
98 define <32 x i8> @splatconstant_shl_v32i8(<32 x i8> %a) nounwind {
99 ; GFNISSE-LABEL: splatconstant_shl_v32i8:
101 ; GFNISSE-NEXT: psllw $6, %xmm0
102 ; GFNISSE-NEXT: movdqa {{.*#+}} xmm2 = [192,192,192,192,192,192,192,192,192,192,192,192,192,192,192,192]
103 ; GFNISSE-NEXT: pand %xmm2, %xmm0
104 ; GFNISSE-NEXT: psllw $6, %xmm1
105 ; GFNISSE-NEXT: pand %xmm2, %xmm1
108 ; GFNIAVX1-LABEL: splatconstant_shl_v32i8:
110 ; GFNIAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
111 ; GFNIAVX1-NEXT: vpsllw $6, %xmm1, %xmm1
112 ; GFNIAVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [192,192,192,192,192,192,192,192,192,192,192,192,192,192,192,192]
113 ; GFNIAVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
114 ; GFNIAVX1-NEXT: vpsllw $6, %xmm0, %xmm0
115 ; GFNIAVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
116 ; GFNIAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
117 ; GFNIAVX1-NEXT: retq
119 ; GFNIAVX2-LABEL: splatconstant_shl_v32i8:
121 ; GFNIAVX2-NEXT: vpsllw $6, %ymm0, %ymm0
122 ; GFNIAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
123 ; GFNIAVX2-NEXT: retq
125 ; GFNIAVX512-LABEL: splatconstant_shl_v32i8:
126 ; GFNIAVX512: # %bb.0:
127 ; GFNIAVX512-NEXT: vpsllw $6, %ymm0, %ymm0
128 ; GFNIAVX512-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0
129 ; GFNIAVX512-NEXT: retq
130 %shift = shl <32 x i8> %a, <i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6>
134 define <32 x i8> @splatconstant_lshr_v32i8(<32 x i8> %a) nounwind {
135 ; GFNISSE-LABEL: splatconstant_lshr_v32i8:
137 ; GFNISSE-NEXT: psrlw $1, %xmm0
138 ; GFNISSE-NEXT: movdqa {{.*#+}} xmm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
139 ; GFNISSE-NEXT: pand %xmm2, %xmm0
140 ; GFNISSE-NEXT: psrlw $1, %xmm1
141 ; GFNISSE-NEXT: pand %xmm2, %xmm1
144 ; GFNIAVX1-LABEL: splatconstant_lshr_v32i8:
146 ; GFNIAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
147 ; GFNIAVX1-NEXT: vpsrlw $1, %xmm1, %xmm1
148 ; GFNIAVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
149 ; GFNIAVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
150 ; GFNIAVX1-NEXT: vpsrlw $1, %xmm0, %xmm0
151 ; GFNIAVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
152 ; GFNIAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
153 ; GFNIAVX1-NEXT: retq
155 ; GFNIAVX2-LABEL: splatconstant_lshr_v32i8:
157 ; GFNIAVX2-NEXT: vpsrlw $1, %ymm0, %ymm0
158 ; GFNIAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
159 ; GFNIAVX2-NEXT: retq
161 ; GFNIAVX512-LABEL: splatconstant_lshr_v32i8:
162 ; GFNIAVX512: # %bb.0:
163 ; GFNIAVX512-NEXT: vpsrlw $1, %ymm0, %ymm0
164 ; GFNIAVX512-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0
165 ; GFNIAVX512-NEXT: retq
166 %shift = lshr <32 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
170 define <32 x i8> @splatconstant_ashr_v32i8(<32 x i8> %a) nounwind {
171 ; GFNISSE-LABEL: splatconstant_ashr_v32i8:
173 ; GFNISSE-NEXT: psrlw $2, %xmm0
174 ; GFNISSE-NEXT: movdqa {{.*#+}} xmm2 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63]
175 ; GFNISSE-NEXT: pand %xmm2, %xmm0
176 ; GFNISSE-NEXT: movdqa {{.*#+}} xmm3 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
177 ; GFNISSE-NEXT: pxor %xmm3, %xmm0
178 ; GFNISSE-NEXT: psubb %xmm3, %xmm0
179 ; GFNISSE-NEXT: psrlw $2, %xmm1
180 ; GFNISSE-NEXT: pand %xmm2, %xmm1
181 ; GFNISSE-NEXT: pxor %xmm3, %xmm1
182 ; GFNISSE-NEXT: psubb %xmm3, %xmm1
185 ; GFNIAVX1-LABEL: splatconstant_ashr_v32i8:
187 ; GFNIAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
188 ; GFNIAVX1-NEXT: vpsrlw $2, %xmm1, %xmm1
189 ; GFNIAVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63]
190 ; GFNIAVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
191 ; GFNIAVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
192 ; GFNIAVX1-NEXT: vpxor %xmm3, %xmm1, %xmm1
193 ; GFNIAVX1-NEXT: vpsubb %xmm3, %xmm1, %xmm1
194 ; GFNIAVX1-NEXT: vpsrlw $2, %xmm0, %xmm0
195 ; GFNIAVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
196 ; GFNIAVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0
197 ; GFNIAVX1-NEXT: vpsubb %xmm3, %xmm0, %xmm0
198 ; GFNIAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
199 ; GFNIAVX1-NEXT: retq
201 ; GFNIAVX2-LABEL: splatconstant_ashr_v32i8:
203 ; GFNIAVX2-NEXT: vpsrlw $2, %ymm0, %ymm0
204 ; GFNIAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
205 ; GFNIAVX2-NEXT: vpbroadcastb {{.*#+}} ymm1 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
206 ; GFNIAVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0
207 ; GFNIAVX2-NEXT: vpsubb %ymm1, %ymm0, %ymm0
208 ; GFNIAVX2-NEXT: retq
210 ; GFNIAVX512-LABEL: splatconstant_ashr_v32i8:
211 ; GFNIAVX512: # %bb.0:
212 ; GFNIAVX512-NEXT: vpsrlw $2, %ymm0, %ymm0
213 ; GFNIAVX512-NEXT: vpbroadcastb {{.*#+}} ymm1 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
214 ; GFNIAVX512-NEXT: vpternlogd $108, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm1, %ymm0
215 ; GFNIAVX512-NEXT: vpsubb %ymm1, %ymm0, %ymm0
216 ; GFNIAVX512-NEXT: retq
217 %shift = ashr <32 x i8> %a, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
222 ; 512 Bit Vector Shifts
225 define <64 x i8> @splatconstant_shl_v64i8(<64 x i8> %a) nounwind {
226 ; GFNISSE-LABEL: splatconstant_shl_v64i8:
228 ; GFNISSE-NEXT: psllw $5, %xmm0
229 ; GFNISSE-NEXT: movdqa {{.*#+}} xmm4 = [224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224]
230 ; GFNISSE-NEXT: pand %xmm4, %xmm0
231 ; GFNISSE-NEXT: psllw $5, %xmm1
232 ; GFNISSE-NEXT: pand %xmm4, %xmm1
233 ; GFNISSE-NEXT: psllw $5, %xmm2
234 ; GFNISSE-NEXT: pand %xmm4, %xmm2
235 ; GFNISSE-NEXT: psllw $5, %xmm3
236 ; GFNISSE-NEXT: pand %xmm4, %xmm3
239 ; GFNIAVX1-LABEL: splatconstant_shl_v64i8:
241 ; GFNIAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
242 ; GFNIAVX1-NEXT: vpsllw $5, %xmm2, %xmm2
243 ; GFNIAVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224]
244 ; GFNIAVX1-NEXT: vpand %xmm3, %xmm2, %xmm2
245 ; GFNIAVX1-NEXT: vpsllw $5, %xmm0, %xmm0
246 ; GFNIAVX1-NEXT: vpand %xmm3, %xmm0, %xmm0
247 ; GFNIAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
248 ; GFNIAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
249 ; GFNIAVX1-NEXT: vpsllw $5, %xmm2, %xmm2
250 ; GFNIAVX1-NEXT: vpand %xmm3, %xmm2, %xmm2
251 ; GFNIAVX1-NEXT: vpsllw $5, %xmm1, %xmm1
252 ; GFNIAVX1-NEXT: vpand %xmm3, %xmm1, %xmm1
253 ; GFNIAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
254 ; GFNIAVX1-NEXT: retq
256 ; GFNIAVX2-LABEL: splatconstant_shl_v64i8:
258 ; GFNIAVX2-NEXT: vpsllw $5, %ymm0, %ymm0
259 ; GFNIAVX2-NEXT: vpbroadcastb {{.*#+}} ymm2 = [224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224,224]
260 ; GFNIAVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
261 ; GFNIAVX2-NEXT: vpsllw $5, %ymm1, %ymm1
262 ; GFNIAVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
263 ; GFNIAVX2-NEXT: retq
265 ; GFNIAVX512-LABEL: splatconstant_shl_v64i8:
266 ; GFNIAVX512: # %bb.0:
267 ; GFNIAVX512-NEXT: vpsllw $5, %zmm0, %zmm0
268 ; GFNIAVX512-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
269 ; GFNIAVX512-NEXT: retq
270 %shift = shl <64 x i8> %a, <i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5>
274 define <64 x i8> @splatconstant_lshr_v64i8(<64 x i8> %a) nounwind {
275 ; GFNISSE-LABEL: splatconstant_lshr_v64i8:
277 ; GFNISSE-NEXT: psrlw $7, %xmm0
278 ; GFNISSE-NEXT: movdqa {{.*#+}} xmm4 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
279 ; GFNISSE-NEXT: pand %xmm4, %xmm0
280 ; GFNISSE-NEXT: psrlw $7, %xmm1
281 ; GFNISSE-NEXT: pand %xmm4, %xmm1
282 ; GFNISSE-NEXT: psrlw $7, %xmm2
283 ; GFNISSE-NEXT: pand %xmm4, %xmm2
284 ; GFNISSE-NEXT: psrlw $7, %xmm3
285 ; GFNISSE-NEXT: pand %xmm4, %xmm3
288 ; GFNIAVX1-LABEL: splatconstant_lshr_v64i8:
290 ; GFNIAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
291 ; GFNIAVX1-NEXT: vpsrlw $7, %xmm2, %xmm2
292 ; GFNIAVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
293 ; GFNIAVX1-NEXT: vpand %xmm3, %xmm2, %xmm2
294 ; GFNIAVX1-NEXT: vpsrlw $7, %xmm0, %xmm0
295 ; GFNIAVX1-NEXT: vpand %xmm3, %xmm0, %xmm0
296 ; GFNIAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
297 ; GFNIAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
298 ; GFNIAVX1-NEXT: vpsrlw $7, %xmm2, %xmm2
299 ; GFNIAVX1-NEXT: vpand %xmm3, %xmm2, %xmm2
300 ; GFNIAVX1-NEXT: vpsrlw $7, %xmm1, %xmm1
301 ; GFNIAVX1-NEXT: vpand %xmm3, %xmm1, %xmm1
302 ; GFNIAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
303 ; GFNIAVX1-NEXT: retq
305 ; GFNIAVX2-LABEL: splatconstant_lshr_v64i8:
307 ; GFNIAVX2-NEXT: vpsrlw $7, %ymm0, %ymm0
308 ; GFNIAVX2-NEXT: vpbroadcastb {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
309 ; GFNIAVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
310 ; GFNIAVX2-NEXT: vpsrlw $7, %ymm1, %ymm1
311 ; GFNIAVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
312 ; GFNIAVX2-NEXT: retq
314 ; GFNIAVX512-LABEL: splatconstant_lshr_v64i8:
315 ; GFNIAVX512: # %bb.0:
316 ; GFNIAVX512-NEXT: vpsrlw $7, %zmm0, %zmm0
317 ; GFNIAVX512-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
318 ; GFNIAVX512-NEXT: retq
319 %shift = lshr <64 x i8> %a, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
323 define <64 x i8> @splatconstant_ashr_v64i8(<64 x i8> %a) nounwind {
324 ; GFNISSE-LABEL: splatconstant_ashr_v64i8:
326 ; GFNISSE-NEXT: psrlw $1, %xmm0
327 ; GFNISSE-NEXT: movdqa {{.*#+}} xmm4 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
328 ; GFNISSE-NEXT: pand %xmm4, %xmm0
329 ; GFNISSE-NEXT: movdqa {{.*#+}} xmm5 = [64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64]
330 ; GFNISSE-NEXT: pxor %xmm5, %xmm0
331 ; GFNISSE-NEXT: psubb %xmm5, %xmm0
332 ; GFNISSE-NEXT: psrlw $1, %xmm1
333 ; GFNISSE-NEXT: pand %xmm4, %xmm1
334 ; GFNISSE-NEXT: pxor %xmm5, %xmm1
335 ; GFNISSE-NEXT: psubb %xmm5, %xmm1
336 ; GFNISSE-NEXT: psrlw $1, %xmm2
337 ; GFNISSE-NEXT: pand %xmm4, %xmm2
338 ; GFNISSE-NEXT: pxor %xmm5, %xmm2
339 ; GFNISSE-NEXT: psubb %xmm5, %xmm2
340 ; GFNISSE-NEXT: psrlw $1, %xmm3
341 ; GFNISSE-NEXT: pand %xmm4, %xmm3
342 ; GFNISSE-NEXT: pxor %xmm5, %xmm3
343 ; GFNISSE-NEXT: psubb %xmm5, %xmm3
346 ; GFNIAVX1-LABEL: splatconstant_ashr_v64i8:
348 ; GFNIAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
349 ; GFNIAVX1-NEXT: vpsrlw $1, %xmm2, %xmm2
350 ; GFNIAVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
351 ; GFNIAVX1-NEXT: vpand %xmm3, %xmm2, %xmm2
352 ; GFNIAVX1-NEXT: vbroadcastss {{.*#+}} xmm4 = [64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64]
353 ; GFNIAVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2
354 ; GFNIAVX1-NEXT: vpsubb %xmm4, %xmm2, %xmm2
355 ; GFNIAVX1-NEXT: vpsrlw $1, %xmm0, %xmm0
356 ; GFNIAVX1-NEXT: vpand %xmm3, %xmm0, %xmm0
357 ; GFNIAVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0
358 ; GFNIAVX1-NEXT: vpsubb %xmm4, %xmm0, %xmm0
359 ; GFNIAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
360 ; GFNIAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
361 ; GFNIAVX1-NEXT: vpsrlw $1, %xmm2, %xmm2
362 ; GFNIAVX1-NEXT: vpand %xmm3, %xmm2, %xmm2
363 ; GFNIAVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2
364 ; GFNIAVX1-NEXT: vpsubb %xmm4, %xmm2, %xmm2
365 ; GFNIAVX1-NEXT: vpsrlw $1, %xmm1, %xmm1
366 ; GFNIAVX1-NEXT: vpand %xmm3, %xmm1, %xmm1
367 ; GFNIAVX1-NEXT: vpxor %xmm4, %xmm1, %xmm1
368 ; GFNIAVX1-NEXT: vpsubb %xmm4, %xmm1, %xmm1
369 ; GFNIAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
370 ; GFNIAVX1-NEXT: retq
372 ; GFNIAVX2-LABEL: splatconstant_ashr_v64i8:
374 ; GFNIAVX2-NEXT: vpsrlw $1, %ymm0, %ymm0
375 ; GFNIAVX2-NEXT: vpbroadcastb {{.*#+}} ymm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
376 ; GFNIAVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
377 ; GFNIAVX2-NEXT: vpbroadcastb {{.*#+}} ymm3 = [64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64]
378 ; GFNIAVX2-NEXT: vpxor %ymm3, %ymm0, %ymm0
379 ; GFNIAVX2-NEXT: vpsubb %ymm3, %ymm0, %ymm0
380 ; GFNIAVX2-NEXT: vpsrlw $1, %ymm1, %ymm1
381 ; GFNIAVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
382 ; GFNIAVX2-NEXT: vpxor %ymm3, %ymm1, %ymm1
383 ; GFNIAVX2-NEXT: vpsubb %ymm3, %ymm1, %ymm1
384 ; GFNIAVX2-NEXT: retq
386 ; GFNIAVX512-LABEL: splatconstant_ashr_v64i8:
387 ; GFNIAVX512: # %bb.0:
388 ; GFNIAVX512-NEXT: vpsrlw $1, %zmm0, %zmm0
389 ; GFNIAVX512-NEXT: vpbroadcastb {{.*#+}} zmm1 = [64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64,64]
390 ; GFNIAVX512-NEXT: vpternlogd $108, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %zmm0
391 ; GFNIAVX512-NEXT: vpsubb %zmm1, %zmm0, %zmm0
392 ; GFNIAVX512-NEXT: retq
393 %shift = ashr <64 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
396 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: