1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt < %s -passes=instcombine -mtriple=x86_64-unknown-unknown -S | FileCheck %s
3 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
9 define <8 x i16> @sse2_psrai_w_0(<8 x i16> %v) {
10 ; CHECK-LABEL: @sse2_psrai_w_0(
11 ; CHECK-NEXT: ret <8 x i16> [[V:%.*]]
13 %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %v, i32 0)
17 define <8 x i16> @sse2_psrai_w_15(<8 x i16> %v) {
18 ; CHECK-LABEL: @sse2_psrai_w_15(
19 ; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i16> [[V:%.*]], splat (i16 15)
20 ; CHECK-NEXT: ret <8 x i16> [[TMP1]]
22 %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %v, i32 15)
26 define <8 x i16> @sse2_psrai_w_64(<8 x i16> %v) {
27 ; CHECK-LABEL: @sse2_psrai_w_64(
28 ; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i16> [[V:%.*]], splat (i16 15)
29 ; CHECK-NEXT: ret <8 x i16> [[TMP1]]
31 %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %v, i32 64)
35 define <4 x i32> @sse2_psrai_d_0(<4 x i32> %v) {
36 ; CHECK-LABEL: @sse2_psrai_d_0(
37 ; CHECK-NEXT: ret <4 x i32> [[V:%.*]]
39 %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %v, i32 0)
43 define <4 x i32> @sse2_psrai_d_15(<4 x i32> %v) {
44 ; CHECK-LABEL: @sse2_psrai_d_15(
45 ; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> [[V:%.*]], splat (i32 15)
46 ; CHECK-NEXT: ret <4 x i32> [[TMP1]]
48 %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %v, i32 15)
52 define <4 x i32> @sse2_psrai_d_64(<4 x i32> %v) {
53 ; CHECK-LABEL: @sse2_psrai_d_64(
54 ; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> [[V:%.*]], splat (i32 31)
55 ; CHECK-NEXT: ret <4 x i32> [[TMP1]]
57 %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %v, i32 64)
61 define <16 x i16> @avx2_psrai_w_0(<16 x i16> %v) {
62 ; CHECK-LABEL: @avx2_psrai_w_0(
63 ; CHECK-NEXT: ret <16 x i16> [[V:%.*]]
65 %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %v, i32 0)
69 define <16 x i16> @avx2_psrai_w_15(<16 x i16> %v) {
70 ; CHECK-LABEL: @avx2_psrai_w_15(
71 ; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i16> [[V:%.*]], splat (i16 15)
72 ; CHECK-NEXT: ret <16 x i16> [[TMP1]]
74 %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %v, i32 15)
78 define <16 x i16> @avx2_psrai_w_64(<16 x i16> %v) {
79 ; CHECK-LABEL: @avx2_psrai_w_64(
80 ; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i16> [[V:%.*]], splat (i16 15)
81 ; CHECK-NEXT: ret <16 x i16> [[TMP1]]
83 %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %v, i32 64)
87 define <8 x i32> @avx2_psrai_d_0(<8 x i32> %v) {
88 ; CHECK-LABEL: @avx2_psrai_d_0(
89 ; CHECK-NEXT: ret <8 x i32> [[V:%.*]]
91 %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %v, i32 0)
95 define <8 x i32> @avx2_psrai_d_15(<8 x i32> %v) {
96 ; CHECK-LABEL: @avx2_psrai_d_15(
97 ; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i32> [[V:%.*]], splat (i32 15)
98 ; CHECK-NEXT: ret <8 x i32> [[TMP1]]
100 %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %v, i32 15)
104 define <8 x i32> @avx2_psrai_d_64(<8 x i32> %v) {
105 ; CHECK-LABEL: @avx2_psrai_d_64(
106 ; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i32> [[V:%.*]], splat (i32 31)
107 ; CHECK-NEXT: ret <8 x i32> [[TMP1]]
109 %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %v, i32 64)
113 define <2 x i64> @avx512_psrai_q_128_0(<2 x i64> %v) {
114 ; CHECK-LABEL: @avx512_psrai_q_128_0(
115 ; CHECK-NEXT: ret <2 x i64> [[V:%.*]]
117 %1 = tail call <2 x i64> @llvm.x86.avx512.psrai.q.128(<2 x i64> %v, i32 0)
121 define <2 x i64> @avx512_psrai_q_128_15(<2 x i64> %v) {
122 ; CHECK-LABEL: @avx512_psrai_q_128_15(
123 ; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i64> [[V:%.*]], splat (i64 15)
124 ; CHECK-NEXT: ret <2 x i64> [[TMP1]]
126 %1 = tail call <2 x i64> @llvm.x86.avx512.psrai.q.128(<2 x i64> %v, i32 15)
130 define <2 x i64> @avx512_psrai_q_128_64(<2 x i64> %v) {
131 ; CHECK-LABEL: @avx512_psrai_q_128_64(
132 ; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i64> [[V:%.*]], splat (i64 63)
133 ; CHECK-NEXT: ret <2 x i64> [[TMP1]]
135 %1 = tail call <2 x i64> @llvm.x86.avx512.psrai.q.128(<2 x i64> %v, i32 64)
139 define <4 x i64> @avx512_psrai_q_256_0(<4 x i64> %v) {
140 ; CHECK-LABEL: @avx512_psrai_q_256_0(
141 ; CHECK-NEXT: ret <4 x i64> [[V:%.*]]
143 %1 = tail call <4 x i64> @llvm.x86.avx512.psrai.q.256(<4 x i64> %v, i32 0)
147 define <4 x i64> @avx512_psrai_q_256_15(<4 x i64> %v) {
148 ; CHECK-LABEL: @avx512_psrai_q_256_15(
149 ; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i64> [[V:%.*]], splat (i64 15)
150 ; CHECK-NEXT: ret <4 x i64> [[TMP1]]
152 %1 = tail call <4 x i64> @llvm.x86.avx512.psrai.q.256(<4 x i64> %v, i32 15)
156 define <4 x i64> @avx512_psrai_q_256_64(<4 x i64> %v) {
157 ; CHECK-LABEL: @avx512_psrai_q_256_64(
158 ; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i64> [[V:%.*]], splat (i64 63)
159 ; CHECK-NEXT: ret <4 x i64> [[TMP1]]
161 %1 = tail call <4 x i64> @llvm.x86.avx512.psrai.q.256(<4 x i64> %v, i32 64)
165 define <32 x i16> @avx512_psrai_w_512_0(<32 x i16> %v) {
166 ; CHECK-LABEL: @avx512_psrai_w_512_0(
167 ; CHECK-NEXT: ret <32 x i16> [[V:%.*]]
169 %1 = tail call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> %v, i32 0)
173 define <32 x i16> @avx512_psrai_w_512_15(<32 x i16> %v) {
174 ; CHECK-LABEL: @avx512_psrai_w_512_15(
175 ; CHECK-NEXT: [[TMP1:%.*]] = ashr <32 x i16> [[V:%.*]], splat (i16 15)
176 ; CHECK-NEXT: ret <32 x i16> [[TMP1]]
178 %1 = tail call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> %v, i32 15)
182 define <32 x i16> @avx512_psrai_w_512_64(<32 x i16> %v) {
183 ; CHECK-LABEL: @avx512_psrai_w_512_64(
184 ; CHECK-NEXT: [[TMP1:%.*]] = ashr <32 x i16> [[V:%.*]], splat (i16 15)
185 ; CHECK-NEXT: ret <32 x i16> [[TMP1]]
187 %1 = tail call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> %v, i32 64)
191 define <16 x i32> @avx512_psrai_d_512_0(<16 x i32> %v) {
192 ; CHECK-LABEL: @avx512_psrai_d_512_0(
193 ; CHECK-NEXT: ret <16 x i32> [[V:%.*]]
195 %1 = tail call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> %v, i32 0)
199 define <16 x i32> @avx512_psrai_d_512_15(<16 x i32> %v) {
200 ; CHECK-LABEL: @avx512_psrai_d_512_15(
201 ; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i32> [[V:%.*]], splat (i32 15)
202 ; CHECK-NEXT: ret <16 x i32> [[TMP1]]
204 %1 = tail call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> %v, i32 15)
208 define <16 x i32> @avx512_psrai_d_512_64(<16 x i32> %v) {
209 ; CHECK-LABEL: @avx512_psrai_d_512_64(
210 ; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i32> [[V:%.*]], splat (i32 31)
211 ; CHECK-NEXT: ret <16 x i32> [[TMP1]]
213 %1 = tail call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> %v, i32 64)
217 define <8 x i64> @avx512_psrai_q_512_0(<8 x i64> %v) {
218 ; CHECK-LABEL: @avx512_psrai_q_512_0(
219 ; CHECK-NEXT: ret <8 x i64> [[V:%.*]]
221 %1 = tail call <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64> %v, i32 0)
225 define <8 x i64> @avx512_psrai_q_512_15(<8 x i64> %v) {
226 ; CHECK-LABEL: @avx512_psrai_q_512_15(
227 ; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i64> [[V:%.*]], splat (i64 15)
228 ; CHECK-NEXT: ret <8 x i64> [[TMP1]]
230 %1 = tail call <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64> %v, i32 15)
234 define <8 x i64> @avx512_psrai_q_512_64(<8 x i64> %v) {
235 ; CHECK-LABEL: @avx512_psrai_q_512_64(
236 ; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i64> [[V:%.*]], splat (i64 63)
237 ; CHECK-NEXT: ret <8 x i64> [[TMP1]]
239 %1 = tail call <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64> %v, i32 64)
247 define <8 x i16> @sse2_psrli_w_0(<8 x i16> %v) {
248 ; CHECK-LABEL: @sse2_psrli_w_0(
249 ; CHECK-NEXT: ret <8 x i16> [[V:%.*]]
251 %1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %v, i32 0)
255 define <8 x i16> @sse2_psrli_w_15(<8 x i16> %v) {
256 ; CHECK-LABEL: @sse2_psrli_w_15(
257 ; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i16> [[V:%.*]], splat (i16 15)
258 ; CHECK-NEXT: ret <8 x i16> [[TMP1]]
260 %1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %v, i32 15)
264 define <8 x i16> @sse2_psrli_w_64(<8 x i16> %v) {
265 ; CHECK-LABEL: @sse2_psrli_w_64(
266 ; CHECK-NEXT: ret <8 x i16> zeroinitializer
268 %1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %v, i32 64)
272 define <4 x i32> @sse2_psrli_d_0(<4 x i32> %v) {
273 ; CHECK-LABEL: @sse2_psrli_d_0(
274 ; CHECK-NEXT: ret <4 x i32> [[V:%.*]]
276 %1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %v, i32 0)
280 define <4 x i32> @sse2_psrli_d_15(<4 x i32> %v) {
281 ; CHECK-LABEL: @sse2_psrli_d_15(
282 ; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i32> [[V:%.*]], splat (i32 15)
283 ; CHECK-NEXT: ret <4 x i32> [[TMP1]]
285 %1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %v, i32 15)
289 define <4 x i32> @sse2_psrli_d_64(<4 x i32> %v) {
290 ; CHECK-LABEL: @sse2_psrli_d_64(
291 ; CHECK-NEXT: ret <4 x i32> zeroinitializer
293 %1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %v, i32 64)
297 define <2 x i64> @sse2_psrli_q_0(<2 x i64> %v) {
298 ; CHECK-LABEL: @sse2_psrli_q_0(
299 ; CHECK-NEXT: ret <2 x i64> [[V:%.*]]
301 %1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %v, i32 0)
305 define <2 x i64> @sse2_psrli_q_15(<2 x i64> %v) {
306 ; CHECK-LABEL: @sse2_psrli_q_15(
307 ; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i64> [[V:%.*]], splat (i64 15)
308 ; CHECK-NEXT: ret <2 x i64> [[TMP1]]
310 %1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %v, i32 15)
314 define <2 x i64> @sse2_psrli_q_64(<2 x i64> %v) {
315 ; CHECK-LABEL: @sse2_psrli_q_64(
316 ; CHECK-NEXT: ret <2 x i64> zeroinitializer
318 %1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %v, i32 64)
322 define <16 x i16> @avx2_psrli_w_0(<16 x i16> %v) {
323 ; CHECK-LABEL: @avx2_psrli_w_0(
324 ; CHECK-NEXT: ret <16 x i16> [[V:%.*]]
326 %1 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %v, i32 0)
330 define <16 x i16> @avx2_psrli_w_15(<16 x i16> %v) {
331 ; CHECK-LABEL: @avx2_psrli_w_15(
332 ; CHECK-NEXT: [[TMP1:%.*]] = lshr <16 x i16> [[V:%.*]], splat (i16 15)
333 ; CHECK-NEXT: ret <16 x i16> [[TMP1]]
335 %1 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %v, i32 15)
339 define <16 x i16> @avx2_psrli_w_64(<16 x i16> %v) {
340 ; CHECK-LABEL: @avx2_psrli_w_64(
341 ; CHECK-NEXT: ret <16 x i16> zeroinitializer
343 %1 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %v, i32 64)
347 define <8 x i32> @avx2_psrli_d_0(<8 x i32> %v) {
348 ; CHECK-LABEL: @avx2_psrli_d_0(
349 ; CHECK-NEXT: ret <8 x i32> [[V:%.*]]
351 %1 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %v, i32 0)
355 define <8 x i32> @avx2_psrli_d_15(<8 x i32> %v) {
356 ; CHECK-LABEL: @avx2_psrli_d_15(
357 ; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i32> [[V:%.*]], splat (i32 15)
358 ; CHECK-NEXT: ret <8 x i32> [[TMP1]]
360 %1 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %v, i32 15)
364 define <8 x i32> @avx2_psrli_d_64(<8 x i32> %v) {
365 ; CHECK-LABEL: @avx2_psrli_d_64(
366 ; CHECK-NEXT: ret <8 x i32> zeroinitializer
368 %1 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %v, i32 64)
372 define <4 x i64> @avx2_psrli_q_0(<4 x i64> %v) {
373 ; CHECK-LABEL: @avx2_psrli_q_0(
374 ; CHECK-NEXT: ret <4 x i64> [[V:%.*]]
376 %1 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %v, i32 0)
380 define <4 x i64> @avx2_psrli_q_15(<4 x i64> %v) {
381 ; CHECK-LABEL: @avx2_psrli_q_15(
382 ; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i64> [[V:%.*]], splat (i64 15)
383 ; CHECK-NEXT: ret <4 x i64> [[TMP1]]
385 %1 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %v, i32 15)
389 define <4 x i64> @avx2_psrli_q_64(<4 x i64> %v) {
390 ; CHECK-LABEL: @avx2_psrli_q_64(
391 ; CHECK-NEXT: ret <4 x i64> zeroinitializer
393 %1 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %v, i32 64)
397 define <32 x i16> @avx512_psrli_w_512_0(<32 x i16> %v) {
398 ; CHECK-LABEL: @avx512_psrli_w_512_0(
399 ; CHECK-NEXT: ret <32 x i16> [[V:%.*]]
401 %1 = tail call <32 x i16> @llvm.x86.avx512.psrli.w.512(<32 x i16> %v, i32 0)
405 define <32 x i16> @avx512_psrli_w_512_15(<32 x i16> %v) {
406 ; CHECK-LABEL: @avx512_psrli_w_512_15(
407 ; CHECK-NEXT: [[TMP1:%.*]] = lshr <32 x i16> [[V:%.*]], splat (i16 15)
408 ; CHECK-NEXT: ret <32 x i16> [[TMP1]]
410 %1 = tail call <32 x i16> @llvm.x86.avx512.psrli.w.512(<32 x i16> %v, i32 15)
414 define <32 x i16> @avx512_psrli_w_512_64(<32 x i16> %v) {
415 ; CHECK-LABEL: @avx512_psrli_w_512_64(
416 ; CHECK-NEXT: ret <32 x i16> zeroinitializer
418 %1 = tail call <32 x i16> @llvm.x86.avx512.psrli.w.512(<32 x i16> %v, i32 64)
422 define <16 x i32> @avx512_psrli_d_512_0(<16 x i32> %v) {
423 ; CHECK-LABEL: @avx512_psrli_d_512_0(
424 ; CHECK-NEXT: ret <16 x i32> [[V:%.*]]
426 %1 = tail call <16 x i32> @llvm.x86.avx512.psrli.d.512(<16 x i32> %v, i32 0)
430 define <16 x i32> @avx512_psrli_d_512_15(<16 x i32> %v) {
431 ; CHECK-LABEL: @avx512_psrli_d_512_15(
432 ; CHECK-NEXT: [[TMP1:%.*]] = lshr <16 x i32> [[V:%.*]], splat (i32 15)
433 ; CHECK-NEXT: ret <16 x i32> [[TMP1]]
435 %1 = tail call <16 x i32> @llvm.x86.avx512.psrli.d.512(<16 x i32> %v, i32 15)
439 define <16 x i32> @avx512_psrli_d_512_64(<16 x i32> %v) {
440 ; CHECK-LABEL: @avx512_psrli_d_512_64(
441 ; CHECK-NEXT: ret <16 x i32> zeroinitializer
443 %1 = tail call <16 x i32> @llvm.x86.avx512.psrli.d.512(<16 x i32> %v, i32 64)
447 define <8 x i64> @avx512_psrli_q_512_0(<8 x i64> %v) {
448 ; CHECK-LABEL: @avx512_psrli_q_512_0(
449 ; CHECK-NEXT: ret <8 x i64> [[V:%.*]]
451 %1 = tail call <8 x i64> @llvm.x86.avx512.psrli.q.512(<8 x i64> %v, i32 0)
455 define <8 x i64> @avx512_psrli_q_512_15(<8 x i64> %v) {
456 ; CHECK-LABEL: @avx512_psrli_q_512_15(
457 ; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i64> [[V:%.*]], splat (i64 15)
458 ; CHECK-NEXT: ret <8 x i64> [[TMP1]]
460 %1 = tail call <8 x i64> @llvm.x86.avx512.psrli.q.512(<8 x i64> %v, i32 15)
464 define <8 x i64> @avx512_psrli_q_512_64(<8 x i64> %v) {
465 ; CHECK-LABEL: @avx512_psrli_q_512_64(
466 ; CHECK-NEXT: ret <8 x i64> zeroinitializer
468 %1 = tail call <8 x i64> @llvm.x86.avx512.psrli.q.512(<8 x i64> %v, i32 64)
476 define <8 x i16> @sse2_pslli_w_0(<8 x i16> %v) {
477 ; CHECK-LABEL: @sse2_pslli_w_0(
478 ; CHECK-NEXT: ret <8 x i16> [[V:%.*]]
480 %1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %v, i32 0)
484 define <8 x i16> @sse2_pslli_w_15(<8 x i16> %v) {
485 ; CHECK-LABEL: @sse2_pslli_w_15(
486 ; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i16> [[V:%.*]], splat (i16 15)
487 ; CHECK-NEXT: ret <8 x i16> [[TMP1]]
489 %1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %v, i32 15)
493 define <8 x i16> @sse2_pslli_w_64(<8 x i16> %v) {
494 ; CHECK-LABEL: @sse2_pslli_w_64(
495 ; CHECK-NEXT: ret <8 x i16> zeroinitializer
497 %1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %v, i32 64)
501 define <4 x i32> @sse2_pslli_d_0(<4 x i32> %v) {
502 ; CHECK-LABEL: @sse2_pslli_d_0(
503 ; CHECK-NEXT: ret <4 x i32> [[V:%.*]]
505 %1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %v, i32 0)
509 define <4 x i32> @sse2_pslli_d_15(<4 x i32> %v) {
510 ; CHECK-LABEL: @sse2_pslli_d_15(
511 ; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i32> [[V:%.*]], splat (i32 15)
512 ; CHECK-NEXT: ret <4 x i32> [[TMP1]]
514 %1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %v, i32 15)
518 define <4 x i32> @sse2_pslli_d_64(<4 x i32> %v) {
519 ; CHECK-LABEL: @sse2_pslli_d_64(
520 ; CHECK-NEXT: ret <4 x i32> zeroinitializer
522 %1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %v, i32 64)
526 define <2 x i64> @sse2_pslli_q_0(<2 x i64> %v) {
527 ; CHECK-LABEL: @sse2_pslli_q_0(
528 ; CHECK-NEXT: ret <2 x i64> [[V:%.*]]
530 %1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %v, i32 0)
534 define <2 x i64> @sse2_pslli_q_15(<2 x i64> %v) {
535 ; CHECK-LABEL: @sse2_pslli_q_15(
536 ; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i64> [[V:%.*]], splat (i64 15)
537 ; CHECK-NEXT: ret <2 x i64> [[TMP1]]
539 %1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %v, i32 15)
543 define <2 x i64> @sse2_pslli_q_64(<2 x i64> %v) {
544 ; CHECK-LABEL: @sse2_pslli_q_64(
545 ; CHECK-NEXT: ret <2 x i64> zeroinitializer
547 %1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %v, i32 64)
551 define <16 x i16> @avx2_pslli_w_0(<16 x i16> %v) {
552 ; CHECK-LABEL: @avx2_pslli_w_0(
553 ; CHECK-NEXT: ret <16 x i16> [[V:%.*]]
555 %1 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %v, i32 0)
559 define <16 x i16> @avx2_pslli_w_15(<16 x i16> %v) {
560 ; CHECK-LABEL: @avx2_pslli_w_15(
561 ; CHECK-NEXT: [[TMP1:%.*]] = shl <16 x i16> [[V:%.*]], splat (i16 15)
562 ; CHECK-NEXT: ret <16 x i16> [[TMP1]]
564 %1 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %v, i32 15)
568 define <16 x i16> @avx2_pslli_w_64(<16 x i16> %v) {
569 ; CHECK-LABEL: @avx2_pslli_w_64(
570 ; CHECK-NEXT: ret <16 x i16> zeroinitializer
572 %1 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %v, i32 64)
576 define <8 x i32> @avx2_pslli_d_0(<8 x i32> %v) {
577 ; CHECK-LABEL: @avx2_pslli_d_0(
578 ; CHECK-NEXT: ret <8 x i32> [[V:%.*]]
580 %1 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %v, i32 0)
584 define <8 x i32> @avx2_pslli_d_15(<8 x i32> %v) {
585 ; CHECK-LABEL: @avx2_pslli_d_15(
586 ; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i32> [[V:%.*]], splat (i32 15)
587 ; CHECK-NEXT: ret <8 x i32> [[TMP1]]
589 %1 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %v, i32 15)
593 define <8 x i32> @avx2_pslli_d_64(<8 x i32> %v) {
594 ; CHECK-LABEL: @avx2_pslli_d_64(
595 ; CHECK-NEXT: ret <8 x i32> zeroinitializer
597 %1 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %v, i32 64)
601 define <4 x i64> @avx2_pslli_q_0(<4 x i64> %v) {
602 ; CHECK-LABEL: @avx2_pslli_q_0(
603 ; CHECK-NEXT: ret <4 x i64> [[V:%.*]]
605 %1 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %v, i32 0)
609 define <4 x i64> @avx2_pslli_q_15(<4 x i64> %v) {
610 ; CHECK-LABEL: @avx2_pslli_q_15(
611 ; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i64> [[V:%.*]], splat (i64 15)
612 ; CHECK-NEXT: ret <4 x i64> [[TMP1]]
614 %1 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %v, i32 15)
618 define <4 x i64> @avx2_pslli_q_64(<4 x i64> %v) {
619 ; CHECK-LABEL: @avx2_pslli_q_64(
620 ; CHECK-NEXT: ret <4 x i64> zeroinitializer
622 %1 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %v, i32 64)
626 define <32 x i16> @avx512_pslli_w_512_0(<32 x i16> %v) {
627 ; CHECK-LABEL: @avx512_pslli_w_512_0(
628 ; CHECK-NEXT: ret <32 x i16> [[V:%.*]]
630 %1 = tail call <32 x i16> @llvm.x86.avx512.pslli.w.512(<32 x i16> %v, i32 0)
634 define <32 x i16> @avx512_pslli_w_512_15(<32 x i16> %v) {
635 ; CHECK-LABEL: @avx512_pslli_w_512_15(
636 ; CHECK-NEXT: [[TMP1:%.*]] = shl <32 x i16> [[V:%.*]], splat (i16 15)
637 ; CHECK-NEXT: ret <32 x i16> [[TMP1]]
639 %1 = tail call <32 x i16> @llvm.x86.avx512.pslli.w.512(<32 x i16> %v, i32 15)
643 define <32 x i16> @avx512_pslli_w_512_64(<32 x i16> %v) {
644 ; CHECK-LABEL: @avx512_pslli_w_512_64(
645 ; CHECK-NEXT: ret <32 x i16> zeroinitializer
647 %1 = tail call <32 x i16> @llvm.x86.avx512.pslli.w.512(<32 x i16> %v, i32 64)
651 define <16 x i32> @avx512_pslli_d_512_0(<16 x i32> %v) {
652 ; CHECK-LABEL: @avx512_pslli_d_512_0(
653 ; CHECK-NEXT: ret <16 x i32> [[V:%.*]]
655 %1 = tail call <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32> %v, i32 0)
659 define <16 x i32> @avx512_pslli_d_512_15(<16 x i32> %v) {
660 ; CHECK-LABEL: @avx512_pslli_d_512_15(
661 ; CHECK-NEXT: [[TMP1:%.*]] = shl <16 x i32> [[V:%.*]], splat (i32 15)
662 ; CHECK-NEXT: ret <16 x i32> [[TMP1]]
664 %1 = tail call <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32> %v, i32 15)
668 define <16 x i32> @avx512_pslli_d_512_64(<16 x i32> %v) {
669 ; CHECK-LABEL: @avx512_pslli_d_512_64(
670 ; CHECK-NEXT: ret <16 x i32> zeroinitializer
672 %1 = tail call <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32> %v, i32 64)
676 define <8 x i64> @avx512_pslli_q_512_0(<8 x i64> %v) {
677 ; CHECK-LABEL: @avx512_pslli_q_512_0(
678 ; CHECK-NEXT: ret <8 x i64> [[V:%.*]]
680 %1 = tail call <8 x i64> @llvm.x86.avx512.pslli.q.512(<8 x i64> %v, i32 0)
684 define <8 x i64> @avx512_pslli_q_512_15(<8 x i64> %v) {
685 ; CHECK-LABEL: @avx512_pslli_q_512_15(
686 ; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i64> [[V:%.*]], splat (i64 15)
687 ; CHECK-NEXT: ret <8 x i64> [[TMP1]]
689 %1 = tail call <8 x i64> @llvm.x86.avx512.pslli.q.512(<8 x i64> %v, i32 15)
693 define <8 x i64> @avx512_pslli_q_512_64(<8 x i64> %v) {
694 ; CHECK-LABEL: @avx512_pslli_q_512_64(
695 ; CHECK-NEXT: ret <8 x i64> zeroinitializer
697 %1 = tail call <8 x i64> @llvm.x86.avx512.pslli.q.512(<8 x i64> %v, i32 64)
702 ; ASHR - Constant Vector
705 define <8 x i16> @sse2_psra_w_0(<8 x i16> %v) {
706 ; CHECK-LABEL: @sse2_psra_w_0(
707 ; CHECK-NEXT: ret <8 x i16> [[V:%.*]]
709 %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> zeroinitializer)
713 define <8 x i16> @sse2_psra_w_15(<8 x i16> %v) {
714 ; CHECK-LABEL: @sse2_psra_w_15(
715 ; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i16> [[V:%.*]], splat (i16 15)
716 ; CHECK-NEXT: ret <8 x i16> [[TMP1]]
718 %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
722 define <8 x i16> @sse2_psra_w_15_splat(<8 x i16> %v) {
723 ; CHECK-LABEL: @sse2_psra_w_15_splat(
724 ; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i16> [[V:%.*]], splat (i16 15)
725 ; CHECK-NEXT: ret <8 x i16> [[TMP1]]
727 %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
731 define <8 x i16> @sse2_psra_w_64(<8 x i16> %v) {
732 ; CHECK-LABEL: @sse2_psra_w_64(
733 ; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i16> [[V:%.*]], splat (i16 15)
734 ; CHECK-NEXT: ret <8 x i16> [[TMP1]]
736 %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
740 define <4 x i32> @sse2_psra_d_0(<4 x i32> %v) {
741 ; CHECK-LABEL: @sse2_psra_d_0(
742 ; CHECK-NEXT: ret <4 x i32> [[V:%.*]]
744 %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> zeroinitializer)
748 define <4 x i32> @sse2_psra_d_15(<4 x i32> %v) {
749 ; CHECK-LABEL: @sse2_psra_d_15(
750 ; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> [[V:%.*]], splat (i32 15)
751 ; CHECK-NEXT: ret <4 x i32> [[TMP1]]
753 %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
757 define <4 x i32> @sse2_psra_d_15_splat(<4 x i32> %v) {
758 ; CHECK-LABEL: @sse2_psra_d_15_splat(
759 ; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> [[V:%.*]], splat (i32 31)
760 ; CHECK-NEXT: ret <4 x i32> [[TMP1]]
762 %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
766 define <4 x i32> @sse2_psra_d_64(<4 x i32> %v) {
767 ; CHECK-LABEL: @sse2_psra_d_64(
768 ; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> [[V:%.*]], splat (i32 31)
769 ; CHECK-NEXT: ret <4 x i32> [[TMP1]]
771 %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
775 define <16 x i16> @avx2_psra_w_0(<16 x i16> %v) {
776 ; CHECK-LABEL: @avx2_psra_w_0(
777 ; CHECK-NEXT: ret <16 x i16> [[V:%.*]]
779 %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> zeroinitializer)
783 define <16 x i16> @avx2_psra_w_15(<16 x i16> %v) {
784 ; CHECK-LABEL: @avx2_psra_w_15(
785 ; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i16> [[V:%.*]], splat (i16 15)
786 ; CHECK-NEXT: ret <16 x i16> [[TMP1]]
788 %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
792 define <16 x i16> @avx2_psra_w_15_splat(<16 x i16> %v) {
793 ; CHECK-LABEL: @avx2_psra_w_15_splat(
794 ; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i16> [[V:%.*]], splat (i16 15)
795 ; CHECK-NEXT: ret <16 x i16> [[TMP1]]
797 %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
801 define <16 x i16> @avx2_psra_w_64(<16 x i16> %v) {
802 ; CHECK-LABEL: @avx2_psra_w_64(
803 ; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i16> [[V:%.*]], splat (i16 15)
804 ; CHECK-NEXT: ret <16 x i16> [[TMP1]]
806 %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
810 define <8 x i32> @avx2_psra_d_0(<8 x i32> %v) {
811 ; CHECK-LABEL: @avx2_psra_d_0(
812 ; CHECK-NEXT: ret <8 x i32> [[V:%.*]]
814 %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> zeroinitializer)
818 define <8 x i32> @avx2_psra_d_15(<8 x i32> %v) {
819 ; CHECK-LABEL: @avx2_psra_d_15(
820 ; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i32> [[V:%.*]], splat (i32 15)
821 ; CHECK-NEXT: ret <8 x i32> [[TMP1]]
823 %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
827 define <8 x i32> @avx2_psra_d_15_splat(<8 x i32> %v) {
828 ; CHECK-LABEL: @avx2_psra_d_15_splat(
829 ; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i32> [[V:%.*]], splat (i32 31)
830 ; CHECK-NEXT: ret <8 x i32> [[TMP1]]
832 %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
836 define <8 x i32> @avx2_psra_d_64(<8 x i32> %v) {
837 ; CHECK-LABEL: @avx2_psra_d_64(
838 ; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i32> [[V:%.*]], splat (i32 31)
839 ; CHECK-NEXT: ret <8 x i32> [[TMP1]]
841 %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
845 define <2 x i64> @avx512_psra_q_128_0(<2 x i64> %v) {
846 ; CHECK-LABEL: @avx512_psra_q_128_0(
847 ; CHECK-NEXT: ret <2 x i64> [[V:%.*]]
849 %1 = tail call <2 x i64> @llvm.x86.avx512.psra.q.128(<2 x i64> %v, <2 x i64> zeroinitializer)
853 define <2 x i64> @avx512_psra_q_128_15(<2 x i64> %v) {
854 ; CHECK-LABEL: @avx512_psra_q_128_15(
855 ; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i64> [[V:%.*]], splat (i64 15)
856 ; CHECK-NEXT: ret <2 x i64> [[TMP1]]
858 %1 = tail call <2 x i64> @llvm.x86.avx512.psra.q.128(<2 x i64> %v, <2 x i64> <i64 15, i64 9999>)
862 define <2 x i64> @avx512_psra_q_128_64(<2 x i64> %v) {
863 ; CHECK-LABEL: @avx512_psra_q_128_64(
864 ; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i64> [[V:%.*]], splat (i64 63)
865 ; CHECK-NEXT: ret <2 x i64> [[TMP1]]
867 %1 = tail call <2 x i64> @llvm.x86.avx512.psra.q.128(<2 x i64> %v, <2 x i64> <i64 64, i64 9999>)
871 define <4 x i64> @avx512_psra_q_256_0(<4 x i64> %v) {
872 ; CHECK-LABEL: @avx512_psra_q_256_0(
873 ; CHECK-NEXT: ret <4 x i64> [[V:%.*]]
875 %1 = tail call <4 x i64> @llvm.x86.avx512.psra.q.256(<4 x i64> %v, <2 x i64> zeroinitializer)
879 define <4 x i64> @avx512_psra_q_256_15(<4 x i64> %v) {
880 ; CHECK-LABEL: @avx512_psra_q_256_15(
881 ; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i64> [[V:%.*]], splat (i64 15)
882 ; CHECK-NEXT: ret <4 x i64> [[TMP1]]
884 %1 = tail call <4 x i64> @llvm.x86.avx512.psra.q.256(<4 x i64> %v, <2 x i64> <i64 15, i64 9999>)
888 define <4 x i64> @avx512_psra_q_256_64(<4 x i64> %v) {
889 ; CHECK-LABEL: @avx512_psra_q_256_64(
890 ; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i64> [[V:%.*]], splat (i64 63)
891 ; CHECK-NEXT: ret <4 x i64> [[TMP1]]
893 %1 = tail call <4 x i64> @llvm.x86.avx512.psra.q.256(<4 x i64> %v, <2 x i64> <i64 64, i64 9999>)
897 define <32 x i16> @avx512_psra_w_512_0(<32 x i16> %v) {
898 ; CHECK-LABEL: @avx512_psra_w_512_0(
899 ; CHECK-NEXT: ret <32 x i16> [[V:%.*]]
901 %1 = tail call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %v, <8 x i16> zeroinitializer)
905 define <32 x i16> @avx512_psra_w_512_15(<32 x i16> %v) {
906 ; CHECK-LABEL: @avx512_psra_w_512_15(
907 ; CHECK-NEXT: [[TMP1:%.*]] = ashr <32 x i16> [[V:%.*]], splat (i16 15)
908 ; CHECK-NEXT: ret <32 x i16> [[TMP1]]
910 %1 = tail call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
914 define <32 x i16> @avx512_psra_w_512_15_splat(<32 x i16> %v) {
915 ; CHECK-LABEL: @avx512_psra_w_512_15_splat(
916 ; CHECK-NEXT: [[TMP1:%.*]] = ashr <32 x i16> [[V:%.*]], splat (i16 15)
917 ; CHECK-NEXT: ret <32 x i16> [[TMP1]]
919 %1 = tail call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
923 define <32 x i16> @avx512_psra_w_512_64(<32 x i16> %v) {
924 ; CHECK-LABEL: @avx512_psra_w_512_64(
925 ; CHECK-NEXT: [[TMP1:%.*]] = ashr <32 x i16> [[V:%.*]], splat (i16 15)
926 ; CHECK-NEXT: ret <32 x i16> [[TMP1]]
928 %1 = tail call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
932 define <16 x i32> @avx512_psra_d_512_0(<16 x i32> %v) {
933 ; CHECK-LABEL: @avx512_psra_d_512_0(
934 ; CHECK-NEXT: ret <16 x i32> [[V:%.*]]
936 %1 = tail call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> %v, <4 x i32> zeroinitializer)
940 define <16 x i32> @avx512_psra_d_512_15(<16 x i32> %v) {
941 ; CHECK-LABEL: @avx512_psra_d_512_15(
942 ; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i32> [[V:%.*]], splat (i32 15)
943 ; CHECK-NEXT: ret <16 x i32> [[TMP1]]
945 %1 = tail call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
949 define <16 x i32> @avx512_psra_d_512_15_splat(<16 x i32> %v) {
950 ; CHECK-LABEL: @avx512_psra_d_512_15_splat(
951 ; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i32> [[V:%.*]], splat (i32 31)
952 ; CHECK-NEXT: ret <16 x i32> [[TMP1]]
954 %1 = tail call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
958 define <16 x i32> @avx512_psra_d_512_64(<16 x i32> %v) {
959 ; CHECK-LABEL: @avx512_psra_d_512_64(
960 ; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i32> [[V:%.*]], splat (i32 31)
961 ; CHECK-NEXT: ret <16 x i32> [[TMP1]]
963 %1 = tail call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
967 define <8 x i64> @avx512_psra_q_512_0(<8 x i64> %v) {
968 ; CHECK-LABEL: @avx512_psra_q_512_0(
969 ; CHECK-NEXT: ret <8 x i64> [[V:%.*]]
971 %1 = tail call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> %v, <2 x i64> zeroinitializer)
975 define <8 x i64> @avx512_psra_q_512_15(<8 x i64> %v) {
976 ; CHECK-LABEL: @avx512_psra_q_512_15(
977 ; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i64> [[V:%.*]], splat (i64 15)
978 ; CHECK-NEXT: ret <8 x i64> [[TMP1]]
980 %1 = tail call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> %v, <2 x i64> <i64 15, i64 9999>)
984 define <8 x i64> @avx512_psra_q_512_64(<8 x i64> %v) {
985 ; CHECK-LABEL: @avx512_psra_q_512_64(
986 ; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i64> [[V:%.*]], splat (i64 63)
987 ; CHECK-NEXT: ret <8 x i64> [[TMP1]]
989 %1 = tail call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> %v, <2 x i64> <i64 64, i64 9999>)
994 ; LSHR - Constant Vector
997 define <8 x i16> @sse2_psrl_w_0(<8 x i16> %v) {
998 ; CHECK-LABEL: @sse2_psrl_w_0(
999 ; CHECK-NEXT: ret <8 x i16> [[V:%.*]]
1001 %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> zeroinitializer)
1005 define <8 x i16> @sse2_psrl_w_15(<8 x i16> %v) {
1006 ; CHECK-LABEL: @sse2_psrl_w_15(
1007 ; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i16> [[V:%.*]], splat (i16 15)
1008 ; CHECK-NEXT: ret <8 x i16> [[TMP1]]
1010 %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
1014 define <8 x i16> @sse2_psrl_w_15_splat(<8 x i16> %v) {
1015 ; CHECK-LABEL: @sse2_psrl_w_15_splat(
1016 ; CHECK-NEXT: ret <8 x i16> zeroinitializer
1018 %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
1022 define <8 x i16> @sse2_psrl_w_64(<8 x i16> %v) {
1023 ; CHECK-LABEL: @sse2_psrl_w_64(
1024 ; CHECK-NEXT: ret <8 x i16> zeroinitializer
1026 %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
1030 define <4 x i32> @sse2_psrl_d_0(<4 x i32> %v) {
1031 ; CHECK-LABEL: @sse2_psrl_d_0(
1032 ; CHECK-NEXT: ret <4 x i32> [[V:%.*]]
1034 %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> zeroinitializer)
1038 define <4 x i32> @sse2_psrl_d_15(<4 x i32> %v) {
1039 ; CHECK-LABEL: @sse2_psrl_d_15(
1040 ; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i32> [[V:%.*]], splat (i32 15)
1041 ; CHECK-NEXT: ret <4 x i32> [[TMP1]]
1043 %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
1047 define <4 x i32> @sse2_psrl_d_15_splat(<4 x i32> %v) {
1048 ; CHECK-LABEL: @sse2_psrl_d_15_splat(
1049 ; CHECK-NEXT: ret <4 x i32> zeroinitializer
1051 %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
1055 define <4 x i32> @sse2_psrl_d_64(<4 x i32> %v) {
1056 ; CHECK-LABEL: @sse2_psrl_d_64(
1057 ; CHECK-NEXT: ret <4 x i32> zeroinitializer
1059 %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
1063 define <2 x i64> @sse2_psrl_q_0(<2 x i64> %v) {
1064 ; CHECK-LABEL: @sse2_psrl_q_0(
1065 ; CHECK-NEXT: ret <2 x i64> [[V:%.*]]
1067 %1 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> zeroinitializer)
1071 define <2 x i64> @sse2_psrl_q_15(<2 x i64> %v) {
1072 ; CHECK-LABEL: @sse2_psrl_q_15(
1073 ; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i64> [[V:%.*]], splat (i64 15)
1074 ; CHECK-NEXT: ret <2 x i64> [[TMP1]]
1076 %1 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> <i64 15, i64 9999>)
1080 define <2 x i64> @sse2_psrl_q_64(<2 x i64> %v) {
1081 ; CHECK-LABEL: @sse2_psrl_q_64(
1082 ; CHECK-NEXT: ret <2 x i64> zeroinitializer
1084 %1 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> <i64 64, i64 9999>)
1088 define <16 x i16> @avx2_psrl_w_0(<16 x i16> %v) {
1089 ; CHECK-LABEL: @avx2_psrl_w_0(
1090 ; CHECK-NEXT: ret <16 x i16> [[V:%.*]]
1092 %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> zeroinitializer)
1096 define <16 x i16> @avx2_psrl_w_15(<16 x i16> %v) {
1097 ; CHECK-LABEL: @avx2_psrl_w_15(
1098 ; CHECK-NEXT: [[TMP1:%.*]] = lshr <16 x i16> [[V:%.*]], splat (i16 15)
1099 ; CHECK-NEXT: ret <16 x i16> [[TMP1]]
1101 %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
1105 define <16 x i16> @avx2_psrl_w_15_splat(<16 x i16> %v) {
1106 ; CHECK-LABEL: @avx2_psrl_w_15_splat(
1107 ; CHECK-NEXT: ret <16 x i16> zeroinitializer
1109 %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
1113 define <16 x i16> @avx2_psrl_w_64(<16 x i16> %v) {
1114 ; CHECK-LABEL: @avx2_psrl_w_64(
1115 ; CHECK-NEXT: ret <16 x i16> zeroinitializer
1117 %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
1121 define <8 x i32> @avx2_psrl_d_0(<8 x i32> %v) {
1122 ; CHECK-LABEL: @avx2_psrl_d_0(
1123 ; CHECK-NEXT: ret <8 x i32> [[V:%.*]]
1125 %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> zeroinitializer)
1129 define <8 x i32> @avx2_psrl_d_15(<8 x i32> %v) {
1130 ; CHECK-LABEL: @avx2_psrl_d_15(
1131 ; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i32> [[V:%.*]], splat (i32 15)
1132 ; CHECK-NEXT: ret <8 x i32> [[TMP1]]
1134 %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
1138 define <8 x i32> @avx2_psrl_d_15_splat(<8 x i32> %v) {
1139 ; CHECK-LABEL: @avx2_psrl_d_15_splat(
1140 ; CHECK-NEXT: ret <8 x i32> zeroinitializer
1142 %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
1146 define <8 x i32> @avx2_psrl_d_64(<8 x i32> %v) {
1147 ; CHECK-LABEL: @avx2_psrl_d_64(
1148 ; CHECK-NEXT: ret <8 x i32> zeroinitializer
1150 %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
1154 define <4 x i64> @avx2_psrl_q_0(<4 x i64> %v) {
1155 ; CHECK-LABEL: @avx2_psrl_q_0(
1156 ; CHECK-NEXT: ret <4 x i64> [[V:%.*]]
1158 %1 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> zeroinitializer)
1162 define <4 x i64> @avx2_psrl_q_15(<4 x i64> %v) {
1163 ; CHECK-LABEL: @avx2_psrl_q_15(
1164 ; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i64> [[V:%.*]], splat (i64 15)
1165 ; CHECK-NEXT: ret <4 x i64> [[TMP1]]
1167 %1 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> <i64 15, i64 9999>)
1171 define <4 x i64> @avx2_psrl_q_64(<4 x i64> %v) {
1172 ; CHECK-LABEL: @avx2_psrl_q_64(
1173 ; CHECK-NEXT: ret <4 x i64> zeroinitializer
1175 %1 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> <i64 64, i64 9999>)
1179 define <32 x i16> @avx512_psrl_w_512_0(<32 x i16> %v) {
1180 ; CHECK-LABEL: @avx512_psrl_w_512_0(
1181 ; CHECK-NEXT: ret <32 x i16> [[V:%.*]]
1183 %1 = tail call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> %v, <8 x i16> zeroinitializer)
1187 define <32 x i16> @avx512_psrl_w_512_15(<32 x i16> %v) {
1188 ; CHECK-LABEL: @avx512_psrl_w_512_15(
1189 ; CHECK-NEXT: [[TMP1:%.*]] = lshr <32 x i16> [[V:%.*]], splat (i16 15)
1190 ; CHECK-NEXT: ret <32 x i16> [[TMP1]]
1192 %1 = tail call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
1196 define <32 x i16> @avx512_psrl_w_512_15_splat(<32 x i16> %v) {
1197 ; CHECK-LABEL: @avx512_psrl_w_512_15_splat(
1198 ; CHECK-NEXT: ret <32 x i16> zeroinitializer
1200 %1 = tail call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
1204 define <32 x i16> @avx512_psrl_w_512_64(<32 x i16> %v) {
1205 ; CHECK-LABEL: @avx512_psrl_w_512_64(
1206 ; CHECK-NEXT: ret <32 x i16> zeroinitializer
1208 %1 = tail call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
1212 define <16 x i32> @avx512_psrl_d_512_0(<16 x i32> %v) {
1213 ; CHECK-LABEL: @avx512_psrl_d_512_0(
1214 ; CHECK-NEXT: ret <16 x i32> [[V:%.*]]
1216 %1 = tail call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> %v, <4 x i32> zeroinitializer)
1220 define <16 x i32> @avx512_psrl_d_512_15(<16 x i32> %v) {
1221 ; CHECK-LABEL: @avx512_psrl_d_512_15(
1222 ; CHECK-NEXT: [[TMP1:%.*]] = lshr <16 x i32> [[V:%.*]], splat (i32 15)
1223 ; CHECK-NEXT: ret <16 x i32> [[TMP1]]
1225 %1 = tail call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
1229 define <16 x i32> @avx512_psrl_d_512_15_splat(<16 x i32> %v) {
1230 ; CHECK-LABEL: @avx512_psrl_d_512_15_splat(
1231 ; CHECK-NEXT: ret <16 x i32> zeroinitializer
1233 %1 = tail call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
1237 define <16 x i32> @avx512_psrl_d_512_64(<16 x i32> %v) {
1238 ; CHECK-LABEL: @avx512_psrl_d_512_64(
1239 ; CHECK-NEXT: ret <16 x i32> zeroinitializer
1241 %1 = tail call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
1245 define <8 x i64> @avx512_psrl_q_512_0(<8 x i64> %v) {
1246 ; CHECK-LABEL: @avx512_psrl_q_512_0(
1247 ; CHECK-NEXT: ret <8 x i64> [[V:%.*]]
1249 %1 = tail call <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64> %v, <2 x i64> zeroinitializer)
1253 define <8 x i64> @avx512_psrl_q_512_15(<8 x i64> %v) {
1254 ; CHECK-LABEL: @avx512_psrl_q_512_15(
1255 ; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i64> [[V:%.*]], splat (i64 15)
1256 ; CHECK-NEXT: ret <8 x i64> [[TMP1]]
1258 %1 = tail call <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64> %v, <2 x i64> <i64 15, i64 9999>)
1262 define <8 x i64> @avx512_psrl_q_512_64(<8 x i64> %v) {
1263 ; CHECK-LABEL: @avx512_psrl_q_512_64(
1264 ; CHECK-NEXT: ret <8 x i64> zeroinitializer
1266 %1 = tail call <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64> %v, <2 x i64> <i64 64, i64 9999>)
1271 ; SHL - Constant Vector
1274 define <8 x i16> @sse2_psll_w_0(<8 x i16> %v) {
1275 ; CHECK-LABEL: @sse2_psll_w_0(
1276 ; CHECK-NEXT: ret <8 x i16> [[V:%.*]]
1278 %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> zeroinitializer)
1282 define <8 x i16> @sse2_psll_w_15(<8 x i16> %v) {
1283 ; CHECK-LABEL: @sse2_psll_w_15(
1284 ; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i16> [[V:%.*]], splat (i16 15)
1285 ; CHECK-NEXT: ret <8 x i16> [[TMP1]]
1287 %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
1291 define <8 x i16> @sse2_psll_w_15_splat(<8 x i16> %v) {
1292 ; CHECK-LABEL: @sse2_psll_w_15_splat(
1293 ; CHECK-NEXT: ret <8 x i16> zeroinitializer
1295 %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
1299 define <8 x i16> @sse2_psll_w_64(<8 x i16> %v) {
1300 ; CHECK-LABEL: @sse2_psll_w_64(
1301 ; CHECK-NEXT: ret <8 x i16> zeroinitializer
1303 %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
1307 define <4 x i32> @sse2_psll_d_0(<4 x i32> %v) {
1308 ; CHECK-LABEL: @sse2_psll_d_0(
1309 ; CHECK-NEXT: ret <4 x i32> [[V:%.*]]
1311 %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> zeroinitializer)
1315 define <4 x i32> @sse2_psll_d_15(<4 x i32> %v) {
1316 ; CHECK-LABEL: @sse2_psll_d_15(
1317 ; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i32> [[V:%.*]], splat (i32 15)
1318 ; CHECK-NEXT: ret <4 x i32> [[TMP1]]
1320 %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
1324 define <4 x i32> @sse2_psll_d_15_splat(<4 x i32> %v) {
1325 ; CHECK-LABEL: @sse2_psll_d_15_splat(
1326 ; CHECK-NEXT: ret <4 x i32> zeroinitializer
1328 %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
1332 define <4 x i32> @sse2_psll_d_64(<4 x i32> %v) {
1333 ; CHECK-LABEL: @sse2_psll_d_64(
1334 ; CHECK-NEXT: ret <4 x i32> zeroinitializer
1336 %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
1340 define <2 x i64> @sse2_psll_q_0(<2 x i64> %v) {
1341 ; CHECK-LABEL: @sse2_psll_q_0(
1342 ; CHECK-NEXT: ret <2 x i64> [[V:%.*]]
1344 %1 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> zeroinitializer)
1348 define <2 x i64> @sse2_psll_q_15(<2 x i64> %v) {
1349 ; CHECK-LABEL: @sse2_psll_q_15(
1350 ; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i64> [[V:%.*]], splat (i64 15)
1351 ; CHECK-NEXT: ret <2 x i64> [[TMP1]]
1353 %1 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> <i64 15, i64 9999>)
1357 define <2 x i64> @sse2_psll_q_64(<2 x i64> %v) {
1358 ; CHECK-LABEL: @sse2_psll_q_64(
1359 ; CHECK-NEXT: ret <2 x i64> zeroinitializer
1361 %1 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> <i64 64, i64 9999>)
1365 define <16 x i16> @avx2_psll_w_0(<16 x i16> %v) {
1366 ; CHECK-LABEL: @avx2_psll_w_0(
1367 ; CHECK-NEXT: ret <16 x i16> [[V:%.*]]
1369 %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> zeroinitializer)
1373 define <16 x i16> @avx2_psll_w_15(<16 x i16> %v) {
1374 ; CHECK-LABEL: @avx2_psll_w_15(
1375 ; CHECK-NEXT: [[TMP1:%.*]] = shl <16 x i16> [[V:%.*]], splat (i16 15)
1376 ; CHECK-NEXT: ret <16 x i16> [[TMP1]]
1378 %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
1382 define <16 x i16> @avx2_psll_w_15_splat(<16 x i16> %v) {
1383 ; CHECK-LABEL: @avx2_psll_w_15_splat(
1384 ; CHECK-NEXT: ret <16 x i16> zeroinitializer
1386 %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
1390 define <16 x i16> @avx2_psll_w_64(<16 x i16> %v) {
1391 ; CHECK-LABEL: @avx2_psll_w_64(
1392 ; CHECK-NEXT: ret <16 x i16> zeroinitializer
1394 %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
1398 define <8 x i32> @avx2_psll_d_0(<8 x i32> %v) {
1399 ; CHECK-LABEL: @avx2_psll_d_0(
1400 ; CHECK-NEXT: ret <8 x i32> [[V:%.*]]
1402 %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> zeroinitializer)
1406 define <8 x i32> @avx2_psll_d_15(<8 x i32> %v) {
1407 ; CHECK-LABEL: @avx2_psll_d_15(
1408 ; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i32> [[V:%.*]], splat (i32 15)
1409 ; CHECK-NEXT: ret <8 x i32> [[TMP1]]
1411 %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
1415 define <8 x i32> @avx2_psll_d_15_splat(<8 x i32> %v) {
1416 ; CHECK-LABEL: @avx2_psll_d_15_splat(
1417 ; CHECK-NEXT: ret <8 x i32> zeroinitializer
1419 %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
1423 define <8 x i32> @avx2_psll_d_64(<8 x i32> %v) {
1424 ; CHECK-LABEL: @avx2_psll_d_64(
1425 ; CHECK-NEXT: ret <8 x i32> zeroinitializer
1427 %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
1431 define <4 x i64> @avx2_psll_q_0(<4 x i64> %v) {
1432 ; CHECK-LABEL: @avx2_psll_q_0(
1433 ; CHECK-NEXT: ret <4 x i64> [[V:%.*]]
1435 %1 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> zeroinitializer)
1439 define <4 x i64> @avx2_psll_q_15(<4 x i64> %v) {
1440 ; CHECK-LABEL: @avx2_psll_q_15(
1441 ; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i64> [[V:%.*]], splat (i64 15)
1442 ; CHECK-NEXT: ret <4 x i64> [[TMP1]]
1444 %1 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> <i64 15, i64 9999>)
1448 define <4 x i64> @avx2_psll_q_64(<4 x i64> %v) {
1449 ; CHECK-LABEL: @avx2_psll_q_64(
1450 ; CHECK-NEXT: ret <4 x i64> zeroinitializer
1452 %1 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> <i64 64, i64 9999>)
1456 define <32 x i16> @avx512_psll_w_512_0(<32 x i16> %v) {
1457 ; CHECK-LABEL: @avx512_psll_w_512_0(
1458 ; CHECK-NEXT: ret <32 x i16> [[V:%.*]]
1460 %1 = tail call <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16> %v, <8 x i16> zeroinitializer)
1464 define <32 x i16> @avx512_psll_w_512_15(<32 x i16> %v) {
1465 ; CHECK-LABEL: @avx512_psll_w_512_15(
1466 ; CHECK-NEXT: [[TMP1:%.*]] = shl <32 x i16> [[V:%.*]], splat (i16 15)
1467 ; CHECK-NEXT: ret <32 x i16> [[TMP1]]
1469 %1 = tail call <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
1473 define <32 x i16> @avx512_psll_w_15_512_splat(<32 x i16> %v) {
1474 ; CHECK-LABEL: @avx512_psll_w_15_512_splat(
1475 ; CHECK-NEXT: ret <32 x i16> zeroinitializer
1477 %1 = tail call <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
1481 define <32 x i16> @avx512_psll_w_512_64(<32 x i16> %v) {
1482 ; CHECK-LABEL: @avx512_psll_w_512_64(
1483 ; CHECK-NEXT: ret <32 x i16> zeroinitializer
1485 %1 = tail call <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
1489 define <16 x i32> @avx512_psll_d_512_0(<16 x i32> %v) {
1490 ; CHECK-LABEL: @avx512_psll_d_512_0(
1491 ; CHECK-NEXT: ret <16 x i32> [[V:%.*]]
1493 %1 = tail call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> %v, <4 x i32> zeroinitializer)
1497 define <16 x i32> @avx512_psll_d_512_15(<16 x i32> %v) {
1498 ; CHECK-LABEL: @avx512_psll_d_512_15(
1499 ; CHECK-NEXT: [[TMP1:%.*]] = shl <16 x i32> [[V:%.*]], splat (i32 15)
1500 ; CHECK-NEXT: ret <16 x i32> [[TMP1]]
1502 %1 = tail call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
1506 define <16 x i32> @avx512_psll_d_512_15_splat(<16 x i32> %v) {
1507 ; CHECK-LABEL: @avx512_psll_d_512_15_splat(
1508 ; CHECK-NEXT: ret <16 x i32> zeroinitializer
1510 %1 = tail call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
1514 define <16 x i32> @avx512_psll_d_512_64(<16 x i32> %v) {
1515 ; CHECK-LABEL: @avx512_psll_d_512_64(
1516 ; CHECK-NEXT: ret <16 x i32> zeroinitializer
1518 %1 = tail call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
1522 define <8 x i64> @avx512_psll_q_512_0(<8 x i64> %v) {
1523 ; CHECK-LABEL: @avx512_psll_q_512_0(
1524 ; CHECK-NEXT: ret <8 x i64> [[V:%.*]]
1526 %1 = tail call <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64> %v, <2 x i64> zeroinitializer)
1530 define <8 x i64> @avx512_psll_q_512_15(<8 x i64> %v) {
1531 ; CHECK-LABEL: @avx512_psll_q_512_15(
1532 ; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i64> [[V:%.*]], splat (i64 15)
1533 ; CHECK-NEXT: ret <8 x i64> [[TMP1]]
1535 %1 = tail call <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64> %v, <2 x i64> <i64 15, i64 9999>)
1539 define <8 x i64> @avx512_psll_q_512_64(<8 x i64> %v) {
1540 ; CHECK-LABEL: @avx512_psll_q_512_64(
1541 ; CHECK-NEXT: ret <8 x i64> zeroinitializer
1543 %1 = tail call <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64> %v, <2 x i64> <i64 64, i64 9999>)
1548 ; ASHR - Constant Per-Element Vector
1551 define <4 x i32> @avx2_psrav_d_128_0(<4 x i32> %v) {
1552 ; CHECK-LABEL: @avx2_psrav_d_128_0(
1553 ; CHECK-NEXT: ret <4 x i32> [[V:%.*]]
1555 %1 = tail call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %v, <4 x i32> zeroinitializer)
1559 define <8 x i32> @avx2_psrav_d_256_0(<8 x i32> %v) {
1560 ; CHECK-LABEL: @avx2_psrav_d_256_0(
1561 ; CHECK-NEXT: ret <8 x i32> [[V:%.*]]
1563 %1 = tail call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %v, <8 x i32> zeroinitializer)
1567 define <16 x i32> @avx512_psrav_d_512_0(<16 x i32> %v) {
1568 ; CHECK-LABEL: @avx512_psrav_d_512_0(
1569 ; CHECK-NEXT: ret <16 x i32> [[V:%.*]]
1571 %1 = tail call <16 x i32> @llvm.x86.avx512.psrav.d.512(<16 x i32> %v, <16 x i32> zeroinitializer)
1575 define <4 x i32> @avx2_psrav_d_128_var(<4 x i32> %v) {
1576 ; CHECK-LABEL: @avx2_psrav_d_128_var(
1577 ; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> [[V:%.*]], <i32 0, i32 8, i32 16, i32 31>
1578 ; CHECK-NEXT: ret <4 x i32> [[TMP1]]
1580 %1 = tail call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %v, <4 x i32> <i32 0, i32 8, i32 16, i32 64>)
1584 define <8 x i32> @avx2_psrav_d_256_var(<8 x i32> %v) {
1585 ; CHECK-LABEL: @avx2_psrav_d_256_var(
1586 ; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i32> [[V:%.*]], <i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0>
1587 ; CHECK-NEXT: ret <8 x i32> [[TMP1]]
1589 %1 = tail call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %v, <8 x i32> <i32 0, i32 8, i32 16, i32 24, i32 32, i32 24, i32 8, i32 0>)
1593 define <16 x i32> @avx512_psrav_d_512_var(<16 x i32> %v) {
1594 ; CHECK-LABEL: @avx512_psrav_d_512_var(
1595 ; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i32> [[V:%.*]], <i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0>
1596 ; CHECK-NEXT: ret <16 x i32> [[TMP1]]
1598 %1 = tail call <16 x i32> @llvm.x86.avx512.psrav.d.512(<16 x i32> %v, <16 x i32> <i32 0, i32 8, i32 16, i32 24, i32 32, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 24, i32 32, i32 24, i32 8, i32 0>)
1602 define <4 x i32> @avx2_psrav_d_128_allbig(<4 x i32> %v) {
1603 ; CHECK-LABEL: @avx2_psrav_d_128_allbig(
1604 ; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> [[V:%.*]], <i32 31, i32 31, i32 31, i32 undef>
1605 ; CHECK-NEXT: ret <4 x i32> [[TMP1]]
1607 %1 = tail call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %v, <4 x i32> <i32 32, i32 100, i32 -255, i32 undef>)
1611 define <8 x i32> @avx2_psrav_d_256_allbig(<8 x i32> %v) {
1612 ; CHECK-LABEL: @avx2_psrav_d_256_allbig(
1613 ; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i32> [[V:%.*]], <i32 undef, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
1614 ; CHECK-NEXT: ret <8 x i32> [[TMP1]]
1616 %1 = tail call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %v, <8 x i32> <i32 undef, i32 100, i32 255, i32 55555, i32 -32, i32 -100, i32 -255, i32 -55555>)
1620 define <16 x i32> @avx512_psrav_d_512_allbig(<16 x i32> %v) {
1621 ; CHECK-LABEL: @avx512_psrav_d_512_allbig(
1622 ; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i32> [[V:%.*]], <i32 undef, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 undef, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
1623 ; CHECK-NEXT: ret <16 x i32> [[TMP1]]
1625 %1 = tail call <16 x i32> @llvm.x86.avx512.psrav.d.512(<16 x i32> %v, <16 x i32> <i32 undef, i32 100, i32 255, i32 55555, i32 -32, i32 -100, i32 -255, i32 -55555, i32 undef, i32 100, i32 255, i32 55555, i32 -32, i32 -100, i32 -255, i32 -55555>)
1629 define <4 x i32> @avx2_psrav_d_128_undef(<4 x i32> %v) {
1630 ; CHECK-LABEL: @avx2_psrav_d_128_undef(
1631 ; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> [[V:%.*]], <i32 undef, i32 8, i32 16, i32 31>
1632 ; CHECK-NEXT: ret <4 x i32> [[TMP1]]
1634 %1 = insertelement <4 x i32> <i32 0, i32 8, i32 16, i32 64>, i32 undef, i32 0
1635 %2 = tail call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %v, <4 x i32> %1)
1639 define <8 x i32> @avx2_psrav_d_256_undef(<8 x i32> %v) {
1640 ; CHECK-LABEL: @avx2_psrav_d_256_undef(
1641 ; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i32> [[V:%.*]], <i32 0, i32 undef, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0>
1642 ; CHECK-NEXT: ret <8 x i32> [[TMP1]]
1644 %1 = insertelement <8 x i32> <i32 0, i32 8, i32 16, i32 24, i32 32, i32 24, i32 8, i32 0>, i32 undef, i32 1
1645 %2 = tail call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %v, <8 x i32> %1)
1649 define <16 x i32> @avx512_psrav_d_512_undef(<16 x i32> %v) {
1650 ; CHECK-LABEL: @avx512_psrav_d_512_undef(
1651 ; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i32> [[V:%.*]], <i32 0, i32 undef, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0>
1652 ; CHECK-NEXT: ret <16 x i32> [[TMP1]]
1654 %1 = insertelement <16 x i32> <i32 0, i32 8, i32 16, i32 24, i32 32, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 24, i32 32, i32 24, i32 8, i32 0>, i32 undef, i32 1
1655 %2 = tail call <16 x i32> @llvm.x86.avx512.psrav.d.512(<16 x i32> %v, <16 x i32> %1)
1659 define <2 x i64> @avx512_psrav_q_128_0(<2 x i64> %v) {
1660 ; CHECK-LABEL: @avx512_psrav_q_128_0(
1661 ; CHECK-NEXT: ret <2 x i64> [[V:%.*]]
1663 %1 = tail call <2 x i64> @llvm.x86.avx512.psrav.q.128(<2 x i64> %v, <2 x i64> zeroinitializer)
1667 define <4 x i64> @avx512_psrav_q_256_0(<4 x i64> %v) {
1668 ; CHECK-LABEL: @avx512_psrav_q_256_0(
1669 ; CHECK-NEXT: ret <4 x i64> [[V:%.*]]
1671 %1 = tail call <4 x i64> @llvm.x86.avx512.psrav.q.256(<4 x i64> %v, <4 x i64> zeroinitializer)
1675 define <2 x i64> @avx512_psrav_q_128_var(<2 x i64> %v) {
1676 ; CHECK-LABEL: @avx512_psrav_q_128_var(
1677 ; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i64> [[V:%.*]], <i64 0, i64 8>
1678 ; CHECK-NEXT: ret <2 x i64> [[TMP1]]
1680 %1 = tail call <2 x i64> @llvm.x86.avx512.psrav.q.128(<2 x i64> %v, <2 x i64> <i64 0, i64 8>)
1684 define <4 x i64> @avx512_psrav_q_256_var(<4 x i64> %v) {
1685 ; CHECK-LABEL: @avx512_psrav_q_256_var(
1686 ; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i64> [[V:%.*]], <i64 0, i64 8, i64 16, i64 31>
1687 ; CHECK-NEXT: ret <4 x i64> [[TMP1]]
1689 %1 = tail call <4 x i64> @llvm.x86.avx512.psrav.q.256(<4 x i64> %v, <4 x i64> <i64 0, i64 8, i64 16, i64 31>)
1693 define <2 x i64> @avx512_psrav_q_128_allbig(<2 x i64> %v) {
1694 ; CHECK-LABEL: @avx512_psrav_q_128_allbig(
1695 ; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i64> [[V:%.*]], <i64 63, i64 undef>
1696 ; CHECK-NEXT: ret <2 x i64> [[TMP1]]
1698 %1 = tail call <2 x i64> @llvm.x86.avx512.psrav.q.128(<2 x i64> %v, <2 x i64> <i64 64, i64 undef>)
1702 define <4 x i64> @avx512_psrav_q_256_allbig(<4 x i64> %v) {
1703 ; CHECK-LABEL: @avx512_psrav_q_256_allbig(
1704 ; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i64> [[V:%.*]], <i64 63, i64 undef, i64 63, i64 63>
1705 ; CHECK-NEXT: ret <4 x i64> [[TMP1]]
1707 %1 = tail call <4 x i64> @llvm.x86.avx512.psrav.q.256(<4 x i64> %v, <4 x i64> <i64 64, i64 undef, i64 -128, i64 -60>)
1711 define <2 x i64> @avx512_psrav_q_128_undef(<2 x i64> %v) {
1712 ; CHECK-LABEL: @avx512_psrav_q_128_undef(
1713 ; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i64> [[V:%.*]], <i64 undef, i64 8>
1714 ; CHECK-NEXT: ret <2 x i64> [[TMP1]]
1716 %1 = insertelement <2 x i64> <i64 0, i64 8>, i64 undef, i64 0
1717 %2 = tail call <2 x i64> @llvm.x86.avx512.psrav.q.128(<2 x i64> %v, <2 x i64> %1)
1721 define <4 x i64> @avx512_psrav_q_256_undef(<4 x i64> %v) {
1722 ; CHECK-LABEL: @avx512_psrav_q_256_undef(
1723 ; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i64> [[V:%.*]], <i64 undef, i64 8, i64 16, i64 31>
1724 ; CHECK-NEXT: ret <4 x i64> [[TMP1]]
1726 %1 = insertelement <4 x i64> <i64 0, i64 8, i64 16, i64 31>, i64 undef, i64 0
1727 %2 = tail call <4 x i64> @llvm.x86.avx512.psrav.q.256(<4 x i64> %v, <4 x i64> %1)
1731 define <8 x i64> @avx512_psrav_q_512_0(<8 x i64> %v) {
1732 ; CHECK-LABEL: @avx512_psrav_q_512_0(
1733 ; CHECK-NEXT: ret <8 x i64> [[V:%.*]]
1735 %1 = tail call <8 x i64> @llvm.x86.avx512.psrav.q.512(<8 x i64> %v, <8 x i64> zeroinitializer)
1739 define <8 x i64> @avx512_psrav_q_512_var(<8 x i64> %v) {
1740 ; CHECK-LABEL: @avx512_psrav_q_512_var(
1741 ; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i64> [[V:%.*]], <i64 0, i64 8, i64 16, i64 31, i64 0, i64 8, i64 16, i64 31>
1742 ; CHECK-NEXT: ret <8 x i64> [[TMP1]]
1744 %1 = tail call <8 x i64> @llvm.x86.avx512.psrav.q.512(<8 x i64> %v, <8 x i64> <i64 0, i64 8, i64 16, i64 31, i64 0, i64 8, i64 16, i64 31>)
1748 define <8 x i64> @avx512_psrav_q_512_allbig(<8 x i64> %v) {
1749 ; CHECK-LABEL: @avx512_psrav_q_512_allbig(
1750 ; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i64> [[V:%.*]], <i64 63, i64 undef, i64 63, i64 63, i64 63, i64 undef, i64 63, i64 63>
1751 ; CHECK-NEXT: ret <8 x i64> [[TMP1]]
1753 %1 = tail call <8 x i64> @llvm.x86.avx512.psrav.q.512(<8 x i64> %v, <8 x i64> <i64 64, i64 undef, i64 -128, i64 -60, i64 64, i64 undef, i64 -128, i64 -60>)
1757 define <8 x i64> @avx512_psrav_q_512_undef(<8 x i64> %v) {
1758 ; CHECK-LABEL: @avx512_psrav_q_512_undef(
1759 ; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i64> [[V:%.*]], <i64 undef, i64 8, i64 16, i64 31, i64 0, i64 8, i64 16, i64 31>
1760 ; CHECK-NEXT: ret <8 x i64> [[TMP1]]
1762 %1 = insertelement <8 x i64> <i64 0, i64 8, i64 16, i64 31, i64 0, i64 8, i64 16, i64 31>, i64 undef, i64 0
1763 %2 = tail call <8 x i64> @llvm.x86.avx512.psrav.q.512(<8 x i64> %v, <8 x i64> %1)
1767 define <8 x i16> @avx512_psrav_w_128_0(<8 x i16> %v) {
1768 ; CHECK-LABEL: @avx512_psrav_w_128_0(
1769 ; CHECK-NEXT: ret <8 x i16> [[V:%.*]]
1771 %1 = tail call <8 x i16> @llvm.x86.avx512.psrav.w.128(<8 x i16> %v, <8 x i16> zeroinitializer)
1775 define <8 x i16> @avx512_psrav_w_128_var(<8 x i16> %v) {
1776 ; CHECK-LABEL: @avx512_psrav_w_128_var(
1777 ; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i16> [[V:%.*]], <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
1778 ; CHECK-NEXT: ret <8 x i16> [[TMP1]]
1780 %1 = tail call <8 x i16> @llvm.x86.avx512.psrav.w.128(<8 x i16> %v, <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
1784 define <8 x i16> @avx512_psrav_w_128_allbig(<8 x i16> %v) {
1785 ; CHECK-LABEL: @avx512_psrav_w_128_allbig(
1786 ; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i16> [[V:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 undef>
1787 ; CHECK-NEXT: ret <8 x i16> [[TMP1]]
1789 %1 = tail call <8 x i16> @llvm.x86.avx512.psrav.w.128(<8 x i16> %v, <8 x i16> <i16 20, i16 -1, i16 -2, i16 33, i16 44, i16 55, i16 66, i16 undef>)
1793 define <8 x i16> @avx512_psrav_w_128_undef(<8 x i16> %v) {
1794 ; CHECK-LABEL: @avx512_psrav_w_128_undef(
1795 ; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i16> [[V:%.*]], <i16 undef, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
1796 ; CHECK-NEXT: ret <8 x i16> [[TMP1]]
1798 %1 = insertelement <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, i16 undef, i64 0
1799 %2 = tail call <8 x i16> @llvm.x86.avx512.psrav.w.128(<8 x i16> %v, <8 x i16> %1)
1803 define <16 x i16> @avx512_psrav_w_256_0(<16 x i16> %v) {
1804 ; CHECK-LABEL: @avx512_psrav_w_256_0(
1805 ; CHECK-NEXT: ret <16 x i16> [[V:%.*]]
1807 %1 = tail call <16 x i16> @llvm.x86.avx512.psrav.w.256(<16 x i16> %v, <16 x i16> zeroinitializer)
1811 define <16 x i16> @avx512_psrav_w_256_var(<16 x i16> %v) {
1812 ; CHECK-LABEL: @avx512_psrav_w_256_var(
1813 ; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i16> [[V:%.*]], <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
1814 ; CHECK-NEXT: ret <16 x i16> [[TMP1]]
1816 %1 = tail call <16 x i16> @llvm.x86.avx512.psrav.w.256(<16 x i16> %v, <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>)
1820 define <16 x i16> @avx512_psrav_w_256_allbig(<16 x i16> %v) {
1821 ; CHECK-LABEL: @avx512_psrav_w_256_allbig(
1822 ; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i16> [[V:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 undef, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
1823 ; CHECK-NEXT: ret <16 x i16> [[TMP1]]
1825 %1 = tail call <16 x i16> @llvm.x86.avx512.psrav.w.256(<16 x i16> %v, <16 x i16> <i16 20, i16 -1, i16 -2, i16 33, i16 44, i16 55, i16 66, i16 -7, i16 undef, i16 64, i16 -10, i16 256, i16 16, i16 28, i16 65535, i16 32767>)
1829 define <16 x i16> @avx512_psrav_w_256_undef(<16 x i16> %v) {
1830 ; CHECK-LABEL: @avx512_psrav_w_256_undef(
1831 ; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i16> [[V:%.*]], <i16 undef, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
1832 ; CHECK-NEXT: ret <16 x i16> [[TMP1]]
1834 %1 = insertelement <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, i16 undef, i64 0
1835 %2 = tail call <16 x i16> @llvm.x86.avx512.psrav.w.256(<16 x i16> %v, <16 x i16> %1)
1839 define <32 x i16> @avx512_psrav_w_512_0(<32 x i16> %v) {
1840 ; CHECK-LABEL: @avx512_psrav_w_512_0(
1841 ; CHECK-NEXT: ret <32 x i16> [[V:%.*]]
1843 %1 = tail call <32 x i16> @llvm.x86.avx512.psrav.w.512(<32 x i16> %v, <32 x i16> zeroinitializer)
1847 define <32 x i16> @avx512_psrav_w_512_var(<32 x i16> %v) {
1848 ; CHECK-LABEL: @avx512_psrav_w_512_var(
1849 ; CHECK-NEXT: [[TMP1:%.*]] = ashr <32 x i16> [[V:%.*]], <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>
1850 ; CHECK-NEXT: ret <32 x i16> [[TMP1]]
1852 %1 = tail call <32 x i16> @llvm.x86.avx512.psrav.w.512(<32 x i16> %v, <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>)
1856 define <32 x i16> @avx512_psrav_w_512_allbig(<32 x i16> %v) {
1857 ; CHECK-LABEL: @avx512_psrav_w_512_allbig(
1858 ; CHECK-NEXT: [[TMP1:%.*]] = ashr <32 x i16> [[V:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 undef, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 undef, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 undef, i16 15, i16 15, i16 undef, i16 15, i16 15>
1859 ; CHECK-NEXT: ret <32 x i16> [[TMP1]]
1861 %1 = tail call <32 x i16> @llvm.x86.avx512.psrav.w.512(<32 x i16> %v, <32 x i16> <i16 20, i16 -1, i16 -2, i16 33, i16 44, i16 55, i16 66, i16 -7, i16 undef, i16 64, i16 -10, i16 128, i16 16, i16 28, i16 65535, i16 32767, i16 56, i16 -14, i16 undef, i16 16, i16 67, i16 567, i16 -32768, i16 4096, i16 8192, i16 -12345, i16 undef, i16 345, i16 123, i16 undef, i16 1024, i16 54321>)
1865 define <32 x i16> @avx512_psrav_w_512_undef(<32 x i16> %v) {
1866 ; CHECK-LABEL: @avx512_psrav_w_512_undef(
1867 ; CHECK-NEXT: [[TMP1:%.*]] = ashr <32 x i16> [[V:%.*]], <i16 undef, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>
1868 ; CHECK-NEXT: ret <32 x i16> [[TMP1]]
1870 %1 = insertelement <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, i16 undef, i64 0
1871 %2 = tail call <32 x i16> @llvm.x86.avx512.psrav.w.512(<32 x i16> %v, <32 x i16> %1)
1876 ; LSHR - Constant Per-Element Vector
1879 define <4 x i32> @avx2_psrlv_d_128_0(<4 x i32> %v) {
1880 ; CHECK-LABEL: @avx2_psrlv_d_128_0(
1881 ; CHECK-NEXT: ret <4 x i32> [[V:%.*]]
1883 %1 = tail call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %v, <4 x i32> zeroinitializer)
1887 define <8 x i32> @avx2_psrlv_d_256_0(<8 x i32> %v) {
1888 ; CHECK-LABEL: @avx2_psrlv_d_256_0(
1889 ; CHECK-NEXT: ret <8 x i32> [[V:%.*]]
1891 %1 = tail call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %v, <8 x i32> zeroinitializer)
1895 define <4 x i32> @avx2_psrlv_d_128_var(<4 x i32> %v) {
1896 ; CHECK-LABEL: @avx2_psrlv_d_128_var(
1897 ; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i32> [[V:%.*]], <i32 0, i32 8, i32 16, i32 31>
1898 ; CHECK-NEXT: ret <4 x i32> [[TMP1]]
1900 %1 = tail call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %v, <4 x i32> <i32 0, i32 8, i32 16, i32 31>)
1904 define <8 x i32> @avx2_psrlv_d_256_var(<8 x i32> %v) {
1905 ; CHECK-LABEL: @avx2_psrlv_d_256_var(
1906 ; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i32> [[V:%.*]], <i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0>
1907 ; CHECK-NEXT: ret <8 x i32> [[TMP1]]
1909 %1 = tail call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %v, <8 x i32> <i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0>)
1913 define <4 x i32> @avx2_psrlv_d_128_big(<4 x i32> %v) {
1914 ; CHECK-LABEL: @avx2_psrlv_d_128_big(
1915 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> [[V:%.*]], <4 x i32> <i32 0, i32 8, i32 16, i32 64>)
1916 ; CHECK-NEXT: ret <4 x i32> [[TMP1]]
1918 %1 = tail call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %v, <4 x i32> <i32 0, i32 8, i32 16, i32 64>)
1922 define <8 x i32> @avx2_psrlv_d_256_big(<8 x i32> %v) {
1923 ; CHECK-LABEL: @avx2_psrlv_d_256_big(
1924 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> [[V:%.*]], <8 x i32> <i32 0, i32 8, i32 16, i32 64, i32 31, i32 24, i32 8, i32 0>)
1925 ; CHECK-NEXT: ret <8 x i32> [[TMP1]]
1927 %1 = tail call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %v, <8 x i32> <i32 0, i32 8, i32 16, i32 64, i32 31, i32 24, i32 8, i32 0>)
1931 define <4 x i32> @avx2_psrlv_d_128_allbig(<4 x i32> %v) {
1932 ; CHECK-LABEL: @avx2_psrlv_d_128_allbig(
1933 ; CHECK-NEXT: ret <4 x i32> <i32 0, i32 0, i32 0, i32 undef>
1935 %1 = tail call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %v, <4 x i32> <i32 32, i32 100, i32 -255, i32 undef>)
1939 define <8 x i32> @avx2_psrlv_d_256_allbig(<8 x i32> %v) {
1940 ; CHECK-LABEL: @avx2_psrlv_d_256_allbig(
1941 ; CHECK-NEXT: ret <8 x i32> <i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1943 %1 = tail call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %v, <8 x i32> <i32 undef, i32 100, i32 255, i32 55555, i32 -32, i32 -100, i32 -255, i32 -55555>)
1947 define <4 x i32> @avx2_psrlv_d_128_undef(<4 x i32> %v) {
1948 ; CHECK-LABEL: @avx2_psrlv_d_128_undef(
1949 ; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i32> [[V:%.*]], <i32 undef, i32 8, i32 16, i32 31>
1950 ; CHECK-NEXT: ret <4 x i32> [[TMP1]]
1952 %1 = insertelement <4 x i32> <i32 0, i32 8, i32 16, i32 31>, i32 undef, i32 0
1953 %2 = tail call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %v, <4 x i32> %1)
1957 define <8 x i32> @avx2_psrlv_d_256_undef(<8 x i32> %v) {
1958 ; CHECK-LABEL: @avx2_psrlv_d_256_undef(
1959 ; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i32> [[V:%.*]], <i32 0, i32 undef, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0>
1960 ; CHECK-NEXT: ret <8 x i32> [[TMP1]]
1962 %1 = insertelement <8 x i32> <i32 0, i32 8, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0>, i32 undef, i32 1
1963 %2 = tail call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %v, <8 x i32> %1)
1967 define <2 x i64> @avx2_psrlv_q_128_0(<2 x i64> %v) {
1968 ; CHECK-LABEL: @avx2_psrlv_q_128_0(
1969 ; CHECK-NEXT: ret <2 x i64> [[V:%.*]]
1971 %1 = tail call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %v, <2 x i64> zeroinitializer)
1975 define <4 x i64> @avx2_psrlv_q_256_0(<4 x i64> %v) {
1976 ; CHECK-LABEL: @avx2_psrlv_q_256_0(
1977 ; CHECK-NEXT: ret <4 x i64> [[V:%.*]]
1979 %1 = tail call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %v, <4 x i64> zeroinitializer)
1983 define <2 x i64> @avx2_psrlv_q_128_var(<2 x i64> %v) {
1984 ; CHECK-LABEL: @avx2_psrlv_q_128_var(
1985 ; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i64> [[V:%.*]], <i64 0, i64 8>
1986 ; CHECK-NEXT: ret <2 x i64> [[TMP1]]
1988 %1 = tail call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %v, <2 x i64> <i64 0, i64 8>)
1992 define <4 x i64> @avx2_psrlv_q_256_var(<4 x i64> %v) {
1993 ; CHECK-LABEL: @avx2_psrlv_q_256_var(
1994 ; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i64> [[V:%.*]], <i64 0, i64 8, i64 16, i64 31>
1995 ; CHECK-NEXT: ret <4 x i64> [[TMP1]]
1997 %1 = tail call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %v, <4 x i64> <i64 0, i64 8, i64 16, i64 31>)
2001 define <2 x i64> @avx2_psrlv_q_128_big(<2 x i64> %v) {
2002 ; CHECK-LABEL: @avx2_psrlv_q_128_big(
2003 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> [[V:%.*]], <2 x i64> <i64 0, i64 128>)
2004 ; CHECK-NEXT: ret <2 x i64> [[TMP1]]
2006 %1 = tail call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %v, <2 x i64> <i64 0, i64 128>)
2010 define <4 x i64> @avx2_psrlv_q_256_big(<4 x i64> %v) {
2011 ; CHECK-LABEL: @avx2_psrlv_q_256_big(
2012 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> [[V:%.*]], <4 x i64> <i64 0, i64 8, i64 16, i64 64>)
2013 ; CHECK-NEXT: ret <4 x i64> [[TMP1]]
2015 %1 = tail call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %v, <4 x i64> <i64 0, i64 8, i64 16, i64 64>)
2019 define <2 x i64> @avx2_psrlv_q_128_allbig(<2 x i64> %v) {
2020 ; CHECK-LABEL: @avx2_psrlv_q_128_allbig(
2021 ; CHECK-NEXT: ret <2 x i64> zeroinitializer
2023 %1 = tail call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %v, <2 x i64> <i64 128, i64 -64>)
2027 define <4 x i64> @avx2_psrlv_q_256_allbig(<4 x i64> %v) {
2028 ; CHECK-LABEL: @avx2_psrlv_q_256_allbig(
2029 ; CHECK-NEXT: ret <4 x i64> <i64 0, i64 undef, i64 0, i64 0>
2031 %1 = tail call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %v, <4 x i64> <i64 64, i64 undef, i64 -128, i64 -60>)
2035 ; The shift amount is 0 (the poison lane could be 0), so we return the unshifted input.
2037 define <2 x i64> @avx2_psrlv_q_128_poison(<2 x i64> %v) {
2038 ; CHECK-LABEL: @avx2_psrlv_q_128_poison(
2039 ; CHECK-NEXT: ret <2 x i64> [[V:%.*]]
2041 %1 = insertelement <2 x i64> <i64 0, i64 8>, i64 poison, i64 1
2042 %2 = tail call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %v, <2 x i64> %1)
2046 define <4 x i64> @avx2_psrlv_q_256_poison(<4 x i64> %v) {
2047 ; CHECK-LABEL: @avx2_psrlv_q_256_poison(
2048 ; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i64> [[V:%.*]], <i64 poison, i64 8, i64 16, i64 31>
2049 ; CHECK-NEXT: ret <4 x i64> [[TMP1]]
2051 %1 = insertelement <4 x i64> <i64 0, i64 8, i64 16, i64 31>, i64 poison, i64 0
2052 %2 = tail call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %v, <4 x i64> %1)
2056 define <16 x i32> @avx2_psrlv_d_512_0(<16 x i32> %v) {
2057 ; CHECK-LABEL: @avx2_psrlv_d_512_0(
2058 ; CHECK-NEXT: ret <16 x i32> [[V:%.*]]
2060 %1 = tail call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> %v, <16 x i32> zeroinitializer)
2064 define <16 x i32> @avx512_psrlv_d_512_var(<16 x i32> %v) {
2065 ; CHECK-LABEL: @avx512_psrlv_d_512_var(
2066 ; CHECK-NEXT: [[TMP1:%.*]] = lshr <16 x i32> [[V:%.*]], <i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0>
2067 ; CHECK-NEXT: ret <16 x i32> [[TMP1]]
2069 %1 = tail call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> %v, <16 x i32> <i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0>)
2073 define <16 x i32> @avx512_psrlv_d_512_big(<16 x i32> %v) {
2074 ; CHECK-LABEL: @avx512_psrlv_d_512_big(
2075 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> [[V:%.*]], <16 x i32> <i32 0, i32 8, i32 16, i32 64, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 64, i32 31, i32 24, i32 8, i32 0>)
2076 ; CHECK-NEXT: ret <16 x i32> [[TMP1]]
2078 %1 = tail call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> %v, <16 x i32> <i32 0, i32 8, i32 16, i32 64, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 64, i32 31, i32 24, i32 8, i32 0>)
2082 define <16 x i32> @avx512_psrlv_d_512_allbig(<16 x i32> %v) {
2083 ; CHECK-LABEL: @avx512_psrlv_d_512_allbig(
2084 ; CHECK-NEXT: ret <16 x i32> <i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
2086 %1 = tail call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> %v, <16 x i32> <i32 undef, i32 100, i32 255, i32 55555, i32 -32, i32 -100, i32 -255, i32 -55555, i32 undef, i32 100, i32 255, i32 55555, i32 -32, i32 -100, i32 -255, i32 -55555>)
2090 define <16 x i32> @avx512_psrlv_d_512_undef(<16 x i32> %v) {
2091 ; CHECK-LABEL: @avx512_psrlv_d_512_undef(
2092 ; CHECK-NEXT: [[TMP1:%.*]] = lshr <16 x i32> [[V:%.*]], <i32 0, i32 undef, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0>
2093 ; CHECK-NEXT: ret <16 x i32> [[TMP1]]
2095 %1 = insertelement <16 x i32> <i32 0, i32 8, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0>, i32 undef, i32 1
2096 %2 = tail call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> %v, <16 x i32> %1)
2100 define <8 x i64> @avx512_psrlv_q_512_0(<8 x i64> %v) {
2101 ; CHECK-LABEL: @avx512_psrlv_q_512_0(
2102 ; CHECK-NEXT: ret <8 x i64> [[V:%.*]]
2104 %1 = tail call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> %v, <8 x i64> zeroinitializer)
2108 define <8 x i64> @avx512_psrlv_q_512_var(<8 x i64> %v) {
2109 ; CHECK-LABEL: @avx512_psrlv_q_512_var(
2110 ; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i64> [[V:%.*]], <i64 0, i64 8, i64 16, i64 31, i64 0, i64 8, i64 16, i64 31>
2111 ; CHECK-NEXT: ret <8 x i64> [[TMP1]]
2113 %1 = tail call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> %v, <8 x i64> <i64 0, i64 8, i64 16, i64 31, i64 0, i64 8, i64 16, i64 31>)
2117 define <8 x i64> @avx512_psrlv_q_512_big(<8 x i64> %v) {
2118 ; CHECK-LABEL: @avx512_psrlv_q_512_big(
2119 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> [[V:%.*]], <8 x i64> <i64 0, i64 8, i64 16, i64 64, i64 0, i64 8, i64 16, i64 64>)
2120 ; CHECK-NEXT: ret <8 x i64> [[TMP1]]
2122 %1 = tail call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> %v, <8 x i64> <i64 0, i64 8, i64 16, i64 64, i64 0, i64 8, i64 16, i64 64>)
2126 define <8 x i64> @avx512_psrlv_q_512_allbig(<8 x i64> %v) {
2127 ; CHECK-LABEL: @avx512_psrlv_q_512_allbig(
2128 ; CHECK-NEXT: ret <8 x i64> <i64 0, i64 undef, i64 0, i64 0, i64 0, i64 undef, i64 0, i64 0>
2130 %1 = tail call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> %v, <8 x i64> <i64 64, i64 undef, i64 -128, i64 -60, i64 64, i64 undef, i64 -128, i64 -60>)
2134 define <8 x i64> @avx512_psrlv_q_512_undef(<8 x i64> %v) {
2135 ; CHECK-LABEL: @avx512_psrlv_q_512_undef(
2136 ; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i64> [[V:%.*]], <i64 undef, i64 8, i64 16, i64 31, i64 0, i64 8, i64 16, i64 31>
2137 ; CHECK-NEXT: ret <8 x i64> [[TMP1]]
2139 %1 = insertelement <8 x i64> <i64 0, i64 8, i64 16, i64 31, i64 0, i64 8, i64 16, i64 31>, i64 undef, i64 0
2140 %2 = tail call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> %v, <8 x i64> %1)
2144 define <8 x i16> @avx512_psrlv_w_128_0(<8 x i16> %v) {
2145 ; CHECK-LABEL: @avx512_psrlv_w_128_0(
2146 ; CHECK-NEXT: ret <8 x i16> [[V:%.*]]
2148 %1 = tail call <8 x i16> @llvm.x86.avx512.psrlv.w.128(<8 x i16> %v, <8 x i16> zeroinitializer)
2152 define <8 x i16> @avx512_psrlv_w_128_var(<8 x i16> %v) {
2153 ; CHECK-LABEL: @avx512_psrlv_w_128_var(
2154 ; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i16> [[V:%.*]], <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
2155 ; CHECK-NEXT: ret <8 x i16> [[TMP1]]
2157 %1 = tail call <8 x i16> @llvm.x86.avx512.psrlv.w.128(<8 x i16> %v, <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
2161 define <8 x i16> @avx512_psrlv_w_128_big(<8 x i16> %v) {
2162 ; CHECK-LABEL: @avx512_psrlv_w_128_big(
2163 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.x86.avx512.psrlv.w.128(<8 x i16> [[V:%.*]], <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 16>)
2164 ; CHECK-NEXT: ret <8 x i16> [[TMP1]]
2166 %1 = tail call <8 x i16> @llvm.x86.avx512.psrlv.w.128(<8 x i16> %v, <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 16>)
2170 define <8 x i16> @avx512_psrlv_w_128_allbig(<8 x i16> %v) {
2171 ; CHECK-LABEL: @avx512_psrlv_w_128_allbig(
2172 ; CHECK-NEXT: ret <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 undef>
2174 %1 = tail call <8 x i16> @llvm.x86.avx512.psrlv.w.128(<8 x i16> %v, <8 x i16> <i16 20, i16 -1, i16 -2, i16 33, i16 44, i16 55, i16 66, i16 undef>)
2178 define <8 x i16> @avx512_psrlv_w_128_undef(<8 x i16> %v) {
2179 ; CHECK-LABEL: @avx512_psrlv_w_128_undef(
2180 ; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i16> [[V:%.*]], <i16 undef, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
2181 ; CHECK-NEXT: ret <8 x i16> [[TMP1]]
2183 %1 = insertelement <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, i16 undef, i64 0
2184 %2 = tail call <8 x i16> @llvm.x86.avx512.psrlv.w.128(<8 x i16> %v, <8 x i16> %1)
2188 define <16 x i16> @avx512_psrlv_w_256_0(<16 x i16> %v) {
2189 ; CHECK-LABEL: @avx512_psrlv_w_256_0(
2190 ; CHECK-NEXT: ret <16 x i16> [[V:%.*]]
2192 %1 = tail call <16 x i16> @llvm.x86.avx512.psrlv.w.256(<16 x i16> %v, <16 x i16> zeroinitializer)
2196 define <16 x i16> @avx512_psrlv_w_256_var(<16 x i16> %v) {
2197 ; CHECK-LABEL: @avx512_psrlv_w_256_var(
2198 ; CHECK-NEXT: [[TMP1:%.*]] = lshr <16 x i16> [[V:%.*]], <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
2199 ; CHECK-NEXT: ret <16 x i16> [[TMP1]]
2201 %1 = tail call <16 x i16> @llvm.x86.avx512.psrlv.w.256(<16 x i16> %v, <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>)
2205 define <16 x i16> @avx512_psrlv_w_256_big(<16 x i16> %v) {
2206 ; CHECK-LABEL: @avx512_psrlv_w_256_big(
2207 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.x86.avx512.psrlv.w.256(<16 x i16> [[V:%.*]], <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 16>)
2208 ; CHECK-NEXT: ret <16 x i16> [[TMP1]]
2210 %1 = tail call <16 x i16> @llvm.x86.avx512.psrlv.w.256(<16 x i16> %v, <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 16>)
2214 define <16 x i16> @avx512_psrlv_w_256_allbig(<16 x i16> %v) {
2215 ; CHECK-LABEL: @avx512_psrlv_w_256_allbig(
2216 ; CHECK-NEXT: ret <16 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 undef, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
2218 %1 = tail call <16 x i16> @llvm.x86.avx512.psrlv.w.256(<16 x i16> %v, <16 x i16> <i16 20, i16 -1, i16 -2, i16 33, i16 44, i16 55, i16 66, i16 -7, i16 undef, i16 64, i16 -10, i16 256, i16 16, i16 28, i16 65535, i16 32767>)
2222 define <16 x i16> @avx512_psrlv_w_256_undef(<16 x i16> %v) {
2223 ; CHECK-LABEL: @avx512_psrlv_w_256_undef(
2224 ; CHECK-NEXT: [[TMP1:%.*]] = lshr <16 x i16> [[V:%.*]], <i16 undef, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
2225 ; CHECK-NEXT: ret <16 x i16> [[TMP1]]
2227 %1 = insertelement <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, i16 undef, i64 0
2228 %2 = tail call <16 x i16> @llvm.x86.avx512.psrlv.w.256(<16 x i16> %v, <16 x i16> %1)
2232 define <32 x i16> @avx512_psrlv_w_512_0(<32 x i16> %v) {
2233 ; CHECK-LABEL: @avx512_psrlv_w_512_0(
2234 ; CHECK-NEXT: ret <32 x i16> [[V:%.*]]
2236 %1 = tail call <32 x i16> @llvm.x86.avx512.psrlv.w.512(<32 x i16> %v, <32 x i16> zeroinitializer)
2240 define <32 x i16> @avx512_psrlv_w_512_var(<32 x i16> %v) {
2241 ; CHECK-LABEL: @avx512_psrlv_w_512_var(
2242 ; CHECK-NEXT: [[TMP1:%.*]] = lshr <32 x i16> [[V:%.*]], <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>
2243 ; CHECK-NEXT: ret <32 x i16> [[TMP1]]
2245 %1 = tail call <32 x i16> @llvm.x86.avx512.psrlv.w.512(<32 x i16> %v, <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>)
2249 define <32 x i16> @avx512_psrlv_w_512_big(<32 x i16> %v) {
2250 ; CHECK-LABEL: @avx512_psrlv_w_512_big(
2251 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i16> @llvm.x86.avx512.psrlv.w.512(<32 x i16> [[V:%.*]], <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>)
2252 ; CHECK-NEXT: ret <32 x i16> [[TMP1]]
2254 %1 = tail call <32 x i16> @llvm.x86.avx512.psrlv.w.512(<32 x i16> %v, <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>)
2258 define <32 x i16> @avx512_psrlv_w_512_allbig(<32 x i16> %v) {
2259 ; CHECK-LABEL: @avx512_psrlv_w_512_allbig(
2260 ; CHECK-NEXT: ret <32 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 undef, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 undef, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 undef, i16 0, i16 0, i16 undef, i16 0, i16 0>
2262 %1 = tail call <32 x i16> @llvm.x86.avx512.psrlv.w.512(<32 x i16> %v, <32 x i16> <i16 20, i16 -1, i16 -2, i16 33, i16 44, i16 55, i16 66, i16 -7, i16 undef, i16 64, i16 -10, i16 128, i16 16, i16 28, i16 65535, i16 32767, i16 56, i16 -14, i16 undef, i16 16, i16 67, i16 567, i16 -32768, i16 4096, i16 8192, i16 -12345, i16 undef, i16 345, i16 123, i16 undef, i16 1024, i16 54321>)
2266 define <32 x i16> @avx512_psrlv_w_512_undef(<32 x i16> %v) {
2267 ; CHECK-LABEL: @avx512_psrlv_w_512_undef(
2268 ; CHECK-NEXT: [[TMP1:%.*]] = lshr <32 x i16> [[V:%.*]], <i16 undef, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>
2269 ; CHECK-NEXT: ret <32 x i16> [[TMP1]]
2271 %1 = insertelement <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, i16 undef, i64 0
2272 %2 = tail call <32 x i16> @llvm.x86.avx512.psrlv.w.512(<32 x i16> %v, <32 x i16> %1)
2277 ; SHL - Constant Per-Element Vector
2280 define <4 x i32> @avx2_psllv_d_128_0(<4 x i32> %v) {
2281 ; CHECK-LABEL: @avx2_psllv_d_128_0(
2282 ; CHECK-NEXT: ret <4 x i32> [[V:%.*]]
2284 %1 = tail call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %v, <4 x i32> zeroinitializer)
2288 define <8 x i32> @avx2_psllv_d_256_0(<8 x i32> %v) {
2289 ; CHECK-LABEL: @avx2_psllv_d_256_0(
2290 ; CHECK-NEXT: ret <8 x i32> [[V:%.*]]
2292 %1 = tail call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %v, <8 x i32> zeroinitializer)
2296 define <4 x i32> @avx2_psllv_d_128_var(<4 x i32> %v) {
2297 ; CHECK-LABEL: @avx2_psllv_d_128_var(
2298 ; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i32> [[V:%.*]], <i32 0, i32 8, i32 16, i32 31>
2299 ; CHECK-NEXT: ret <4 x i32> [[TMP1]]
2301 %1 = tail call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %v, <4 x i32> <i32 0, i32 8, i32 16, i32 31>)
2305 define <8 x i32> @avx2_psllv_d_256_var(<8 x i32> %v) {
2306 ; CHECK-LABEL: @avx2_psllv_d_256_var(
2307 ; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i32> [[V:%.*]], <i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0>
2308 ; CHECK-NEXT: ret <8 x i32> [[TMP1]]
2310 %1 = tail call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %v, <8 x i32> <i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0>)
2314 define <4 x i32> @avx2_psllv_d_128_big(<4 x i32> %v) {
2315 ; CHECK-LABEL: @avx2_psllv_d_128_big(
2316 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> [[V:%.*]], <4 x i32> <i32 0, i32 8, i32 16, i32 64>)
2317 ; CHECK-NEXT: ret <4 x i32> [[TMP1]]
2319 %1 = tail call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %v, <4 x i32> <i32 0, i32 8, i32 16, i32 64>)
2323 define <8 x i32> @avx2_psllv_d_256_big(<8 x i32> %v) {
2324 ; CHECK-LABEL: @avx2_psllv_d_256_big(
2325 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> [[V:%.*]], <8 x i32> <i32 0, i32 8, i32 16, i32 64, i32 31, i32 24, i32 8, i32 0>)
2326 ; CHECK-NEXT: ret <8 x i32> [[TMP1]]
2328 %1 = tail call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %v, <8 x i32> <i32 0, i32 8, i32 16, i32 64, i32 31, i32 24, i32 8, i32 0>)
2332 define <4 x i32> @avx2_psllv_d_128_allbig(<4 x i32> %v) {
2333 ; CHECK-LABEL: @avx2_psllv_d_128_allbig(
2334 ; CHECK-NEXT: ret <4 x i32> <i32 0, i32 0, i32 0, i32 undef>
2336 %1 = tail call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %v, <4 x i32> <i32 32, i32 100, i32 -255, i32 undef>)
2340 define <8 x i32> @avx2_psllv_d_256_allbig(<8 x i32> %v) {
2341 ; CHECK-LABEL: @avx2_psllv_d_256_allbig(
2342 ; CHECK-NEXT: ret <8 x i32> <i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
2344 %1 = tail call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %v, <8 x i32> <i32 undef, i32 100, i32 255, i32 55555, i32 -32, i32 -100, i32 -255, i32 -55555>)
2348 define <4 x i32> @avx2_psllv_d_128_undef(<4 x i32> %v) {
2349 ; CHECK-LABEL: @avx2_psllv_d_128_undef(
2350 ; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i32> [[V:%.*]], <i32 undef, i32 8, i32 16, i32 31>
2351 ; CHECK-NEXT: ret <4 x i32> [[TMP1]]
2353 %1 = insertelement <4 x i32> <i32 0, i32 8, i32 16, i32 31>, i32 undef, i32 0
2354 %2 = tail call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %v, <4 x i32> %1)
2358 define <8 x i32> @avx2_psllv_d_256_undef(<8 x i32> %v) {
2359 ; CHECK-LABEL: @avx2_psllv_d_256_undef(
2360 ; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i32> [[V:%.*]], <i32 0, i32 undef, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0>
2361 ; CHECK-NEXT: ret <8 x i32> [[TMP1]]
2363 %1 = insertelement <8 x i32> <i32 0, i32 8, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0>, i32 undef, i32 1
2364 %2 = tail call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %v, <8 x i32> %1)
2368 define <2 x i64> @avx2_psllv_q_128_0(<2 x i64> %v) {
2369 ; CHECK-LABEL: @avx2_psllv_q_128_0(
2370 ; CHECK-NEXT: ret <2 x i64> [[V:%.*]]
2372 %1 = tail call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %v, <2 x i64> zeroinitializer)
2376 define <4 x i64> @avx2_psllv_q_256_0(<4 x i64> %v) {
2377 ; CHECK-LABEL: @avx2_psllv_q_256_0(
2378 ; CHECK-NEXT: ret <4 x i64> [[V:%.*]]
2380 %1 = tail call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %v, <4 x i64> zeroinitializer)
2384 define <2 x i64> @avx2_psllv_q_128_var(<2 x i64> %v) {
2385 ; CHECK-LABEL: @avx2_psllv_q_128_var(
2386 ; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i64> [[V:%.*]], <i64 0, i64 8>
2387 ; CHECK-NEXT: ret <2 x i64> [[TMP1]]
2389 %1 = tail call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %v, <2 x i64> <i64 0, i64 8>)
2393 define <4 x i64> @avx2_psllv_q_256_var(<4 x i64> %v) {
2394 ; CHECK-LABEL: @avx2_psllv_q_256_var(
2395 ; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i64> [[V:%.*]], <i64 0, i64 8, i64 16, i64 31>
2396 ; CHECK-NEXT: ret <4 x i64> [[TMP1]]
2398 %1 = tail call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %v, <4 x i64> <i64 0, i64 8, i64 16, i64 31>)
2402 define <2 x i64> @avx2_psllv_q_128_big(<2 x i64> %v) {
2403 ; CHECK-LABEL: @avx2_psllv_q_128_big(
2404 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> [[V:%.*]], <2 x i64> <i64 0, i64 128>)
2405 ; CHECK-NEXT: ret <2 x i64> [[TMP1]]
2407 %1 = tail call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %v, <2 x i64> <i64 0, i64 128>)
2411 define <4 x i64> @avx2_psllv_q_256_big(<4 x i64> %v) {
2412 ; CHECK-LABEL: @avx2_psllv_q_256_big(
2413 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> [[V:%.*]], <4 x i64> <i64 0, i64 8, i64 16, i64 64>)
2414 ; CHECK-NEXT: ret <4 x i64> [[TMP1]]
2416 %1 = tail call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %v, <4 x i64> <i64 0, i64 8, i64 16, i64 64>)
2420 define <2 x i64> @avx2_psllv_q_128_allbig(<2 x i64> %v) {
2421 ; CHECK-LABEL: @avx2_psllv_q_128_allbig(
2422 ; CHECK-NEXT: ret <2 x i64> zeroinitializer
2424 %1 = tail call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %v, <2 x i64> <i64 128, i64 -64>)
2428 define <4 x i64> @avx2_psllv_q_256_allbig(<4 x i64> %v) {
2429 ; CHECK-LABEL: @avx2_psllv_q_256_allbig(
2430 ; CHECK-NEXT: ret <4 x i64> <i64 0, i64 undef, i64 0, i64 0>
2432 %1 = tail call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %v, <4 x i64> <i64 64, i64 undef, i64 -128, i64 -60>)
2436 ; The shift amount is 0 (the undef lane could be 0), so we return the unshifted input.
2438 define <2 x i64> @avx2_psllv_q_128_poison(<2 x i64> %v) {
2439 ; CHECK-LABEL: @avx2_psllv_q_128_poison(
2440 ; CHECK-NEXT: ret <2 x i64> [[V:%.*]]
2442 %1 = insertelement <2 x i64> <i64 0, i64 8>, i64 poison, i64 1
2443 %2 = tail call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %v, <2 x i64> %1)
2447 define <4 x i64> @avx2_psllv_q_256_poison(<4 x i64> %v) {
2448 ; CHECK-LABEL: @avx2_psllv_q_256_poison(
2449 ; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i64> [[V:%.*]], <i64 poison, i64 8, i64 16, i64 31>
2450 ; CHECK-NEXT: ret <4 x i64> [[TMP1]]
2452 %1 = insertelement <4 x i64> <i64 0, i64 8, i64 16, i64 31>, i64 poison, i64 0
2453 %2 = tail call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %v, <4 x i64> %1)
2457 define <16 x i32> @avx512_psllv_d_512_0(<16 x i32> %v) {
2458 ; CHECK-LABEL: @avx512_psllv_d_512_0(
2459 ; CHECK-NEXT: ret <16 x i32> [[V:%.*]]
2461 %1 = tail call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> %v, <16 x i32> zeroinitializer)
2465 define <16 x i32> @avx512_psllv_d_512_var(<16 x i32> %v) {
2466 ; CHECK-LABEL: @avx512_psllv_d_512_var(
2467 ; CHECK-NEXT: [[TMP1:%.*]] = shl <16 x i32> [[V:%.*]], <i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0>
2468 ; CHECK-NEXT: ret <16 x i32> [[TMP1]]
2470 %1 = tail call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> %v, <16 x i32> <i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0>)
2474 define <16 x i32> @avx512_psllv_d_512_big(<16 x i32> %v) {
2475 ; CHECK-LABEL: @avx512_psllv_d_512_big(
2476 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> [[V:%.*]], <16 x i32> <i32 0, i32 8, i32 16, i32 64, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 64, i32 31, i32 24, i32 8, i32 0>)
2477 ; CHECK-NEXT: ret <16 x i32> [[TMP1]]
2479 %1 = tail call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> %v, <16 x i32> <i32 0, i32 8, i32 16, i32 64, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 64, i32 31, i32 24, i32 8, i32 0>)
2483 define <16 x i32> @avx512_psllv_d_512_allbig(<16 x i32> %v) {
2484 ; CHECK-LABEL: @avx512_psllv_d_512_allbig(
2485 ; CHECK-NEXT: ret <16 x i32> <i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
2487 %1 = tail call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> %v, <16 x i32> <i32 undef, i32 100, i32 255, i32 55555, i32 -32, i32 -100, i32 -255, i32 -55555, i32 undef, i32 100, i32 255, i32 55555, i32 -32, i32 -100, i32 -255, i32 -55555>)
2491 define <16 x i32> @avx512_psllv_d_512_undef(<16 x i32> %v) {
2492 ; CHECK-LABEL: @avx512_psllv_d_512_undef(
2493 ; CHECK-NEXT: [[TMP1:%.*]] = shl <16 x i32> [[V:%.*]], <i32 0, i32 undef, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0>
2494 ; CHECK-NEXT: ret <16 x i32> [[TMP1]]
2496 %1 = insertelement <16 x i32> <i32 0, i32 8, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0>, i32 undef, i32 1
2497 %2 = tail call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> %v, <16 x i32> %1)
2501 define <8 x i64> @avx512_psllv_q_512_0(<8 x i64> %v) {
2502 ; CHECK-LABEL: @avx512_psllv_q_512_0(
2503 ; CHECK-NEXT: ret <8 x i64> [[V:%.*]]
2505 %1 = tail call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> %v, <8 x i64> zeroinitializer)
2509 define <8 x i64> @avx512_psllv_q_512_var(<8 x i64> %v) {
2510 ; CHECK-LABEL: @avx512_psllv_q_512_var(
2511 ; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i64> [[V:%.*]], <i64 0, i64 8, i64 16, i64 31, i64 0, i64 8, i64 16, i64 31>
2512 ; CHECK-NEXT: ret <8 x i64> [[TMP1]]
2514 %1 = tail call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> %v, <8 x i64> <i64 0, i64 8, i64 16, i64 31, i64 0, i64 8, i64 16, i64 31>)
2518 define <8 x i64> @avx512_psllv_q_512_big(<8 x i64> %v) {
2519 ; CHECK-LABEL: @avx512_psllv_q_512_big(
2520 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> [[V:%.*]], <8 x i64> <i64 0, i64 8, i64 16, i64 64, i64 0, i64 8, i64 16, i64 64>)
2521 ; CHECK-NEXT: ret <8 x i64> [[TMP1]]
2523 %1 = tail call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> %v, <8 x i64> <i64 0, i64 8, i64 16, i64 64, i64 0, i64 8, i64 16, i64 64>)
2527 define <8 x i64> @avx512_psllv_q_512_allbig(<8 x i64> %v) {
2528 ; CHECK-LABEL: @avx512_psllv_q_512_allbig(
2529 ; CHECK-NEXT: ret <8 x i64> <i64 0, i64 undef, i64 0, i64 0, i64 0, i64 undef, i64 0, i64 0>
2531 %1 = tail call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> %v, <8 x i64> <i64 64, i64 undef, i64 -128, i64 -60, i64 64, i64 undef, i64 -128, i64 -60>)
2535 define <8 x i64> @avx512_psllv_q_512_undef(<8 x i64> %v) {
2536 ; CHECK-LABEL: @avx512_psllv_q_512_undef(
2537 ; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i64> [[V:%.*]], <i64 undef, i64 8, i64 16, i64 31, i64 0, i64 8, i64 16, i64 31>
2538 ; CHECK-NEXT: ret <8 x i64> [[TMP1]]
2540 %1 = insertelement <8 x i64> <i64 0, i64 8, i64 16, i64 31, i64 0, i64 8, i64 16, i64 31>, i64 undef, i64 0
2541 %2 = tail call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> %v, <8 x i64> %1)
2545 define <8 x i16> @avx512_psllv_w_128_0(<8 x i16> %v) {
2546 ; CHECK-LABEL: @avx512_psllv_w_128_0(
2547 ; CHECK-NEXT: ret <8 x i16> [[V:%.*]]
2549 %1 = tail call <8 x i16> @llvm.x86.avx512.psllv.w.128(<8 x i16> %v, <8 x i16> zeroinitializer)
2553 define <8 x i16> @avx512_psllv_w_128_var(<8 x i16> %v) {
2554 ; CHECK-LABEL: @avx512_psllv_w_128_var(
2555 ; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i16> [[V:%.*]], <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
2556 ; CHECK-NEXT: ret <8 x i16> [[TMP1]]
2558 %1 = tail call <8 x i16> @llvm.x86.avx512.psllv.w.128(<8 x i16> %v, <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
2562 define <8 x i16> @avx512_psllv_w_128_big(<8 x i16> %v) {
2563 ; CHECK-LABEL: @avx512_psllv_w_128_big(
2564 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.x86.avx512.psllv.w.128(<8 x i16> [[V:%.*]], <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 16>)
2565 ; CHECK-NEXT: ret <8 x i16> [[TMP1]]
2567 %1 = tail call <8 x i16> @llvm.x86.avx512.psllv.w.128(<8 x i16> %v, <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 16>)
2571 define <8 x i16> @avx512_psllv_w_128_allbig(<8 x i16> %v) {
2572 ; CHECK-LABEL: @avx512_psllv_w_128_allbig(
2573 ; CHECK-NEXT: ret <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 undef>
2575 %1 = tail call <8 x i16> @llvm.x86.avx512.psllv.w.128(<8 x i16> %v, <8 x i16> <i16 20, i16 -1, i16 -2, i16 33, i16 44, i16 55, i16 66, i16 undef>)
2579 define <8 x i16> @avx512_psllv_w_128_undef(<8 x i16> %v) {
2580 ; CHECK-LABEL: @avx512_psllv_w_128_undef(
2581 ; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i16> [[V:%.*]], <i16 undef, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
2582 ; CHECK-NEXT: ret <8 x i16> [[TMP1]]
2584 %1 = insertelement <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, i16 undef, i64 0
2585 %2 = tail call <8 x i16> @llvm.x86.avx512.psllv.w.128(<8 x i16> %v, <8 x i16> %1)
2589 define <16 x i16> @avx512_psllv_w_256_0(<16 x i16> %v) {
2590 ; CHECK-LABEL: @avx512_psllv_w_256_0(
2591 ; CHECK-NEXT: ret <16 x i16> [[V:%.*]]
2593 %1 = tail call <16 x i16> @llvm.x86.avx512.psllv.w.256(<16 x i16> %v, <16 x i16> zeroinitializer)
2597 define <16 x i16> @avx512_psllv_w_256_var(<16 x i16> %v) {
2598 ; CHECK-LABEL: @avx512_psllv_w_256_var(
2599 ; CHECK-NEXT: [[TMP1:%.*]] = shl <16 x i16> [[V:%.*]], <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
2600 ; CHECK-NEXT: ret <16 x i16> [[TMP1]]
2602 %1 = tail call <16 x i16> @llvm.x86.avx512.psllv.w.256(<16 x i16> %v, <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>)
2606 define <16 x i16> @avx512_psllv_w_256_big(<16 x i16> %v) {
2607 ; CHECK-LABEL: @avx512_psllv_w_256_big(
2608 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.x86.avx512.psllv.w.256(<16 x i16> [[V:%.*]], <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 16>)
2609 ; CHECK-NEXT: ret <16 x i16> [[TMP1]]
2611 %1 = tail call <16 x i16> @llvm.x86.avx512.psllv.w.256(<16 x i16> %v, <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 16>)
2615 define <16 x i16> @avx512_psllv_w_256_allbig(<16 x i16> %v) {
2616 ; CHECK-LABEL: @avx512_psllv_w_256_allbig(
2617 ; CHECK-NEXT: ret <16 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 undef, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
2619 %1 = tail call <16 x i16> @llvm.x86.avx512.psllv.w.256(<16 x i16> %v, <16 x i16> <i16 20, i16 -1, i16 -2, i16 33, i16 44, i16 55, i16 66, i16 -7, i16 undef, i16 64, i16 -10, i16 256, i16 16, i16 28, i16 65535, i16 32767>)
2623 define <16 x i16> @avx512_psllv_w_256_undef(<16 x i16> %v) {
2624 ; CHECK-LABEL: @avx512_psllv_w_256_undef(
2625 ; CHECK-NEXT: [[TMP1:%.*]] = shl <16 x i16> [[V:%.*]], <i16 undef, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
2626 ; CHECK-NEXT: ret <16 x i16> [[TMP1]]
2628 %1 = insertelement <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, i16 undef, i64 0
2629 %2 = tail call <16 x i16> @llvm.x86.avx512.psllv.w.256(<16 x i16> %v, <16 x i16> %1)
2633 define <32 x i16> @avx512_psllv_w_512_0(<32 x i16> %v) {
2634 ; CHECK-LABEL: @avx512_psllv_w_512_0(
2635 ; CHECK-NEXT: ret <32 x i16> [[V:%.*]]
2637 %1 = tail call <32 x i16> @llvm.x86.avx512.psllv.w.512(<32 x i16> %v, <32 x i16> zeroinitializer)
2641 define <32 x i16> @avx512_psllv_w_512_var(<32 x i16> %v) {
2642 ; CHECK-LABEL: @avx512_psllv_w_512_var(
2643 ; CHECK-NEXT: [[TMP1:%.*]] = shl <32 x i16> [[V:%.*]], <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>
2644 ; CHECK-NEXT: ret <32 x i16> [[TMP1]]
2646 %1 = tail call <32 x i16> @llvm.x86.avx512.psllv.w.512(<32 x i16> %v, <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>)
2650 define <32 x i16> @avx512_psllv_w_512_big(<32 x i16> %v) {
2651 ; CHECK-LABEL: @avx512_psllv_w_512_big(
2652 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i16> @llvm.x86.avx512.psllv.w.512(<32 x i16> [[V:%.*]], <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>)
2653 ; CHECK-NEXT: ret <32 x i16> [[TMP1]]
2655 %1 = tail call <32 x i16> @llvm.x86.avx512.psllv.w.512(<32 x i16> %v, <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>)
2659 define <32 x i16> @avx512_psllv_w_512_allbig(<32 x i16> %v) {
2660 ; CHECK-LABEL: @avx512_psllv_w_512_allbig(
2661 ; CHECK-NEXT: ret <32 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 undef, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 undef, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 undef, i16 0, i16 0, i16 undef, i16 0, i16 0>
2663 %1 = tail call <32 x i16> @llvm.x86.avx512.psllv.w.512(<32 x i16> %v, <32 x i16> <i16 20, i16 -1, i16 -2, i16 33, i16 44, i16 55, i16 66, i16 -7, i16 undef, i16 64, i16 -10, i16 128, i16 16, i16 28, i16 65535, i16 32767, i16 56, i16 -14, i16 undef, i16 16, i16 67, i16 567, i16 -32768, i16 4096, i16 8192, i16 -12345, i16 undef, i16 345, i16 123, i16 undef, i16 1024, i16 54321>)
2667 define <32 x i16> @avx512_psllv_w_512_undef(<32 x i16> %v) {
2668 ; CHECK-LABEL: @avx512_psllv_w_512_undef(
2669 ; CHECK-NEXT: [[TMP1:%.*]] = shl <32 x i16> [[V:%.*]], <i16 undef, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>
2670 ; CHECK-NEXT: ret <32 x i16> [[TMP1]]
2672 %1 = insertelement <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, i16 undef, i64 0
2673 %2 = tail call <32 x i16> @llvm.x86.avx512.psllv.w.512(<32 x i16> %v, <32 x i16> %1)
2678 ; Vector Masked Shift Amounts
2681 define <8 x i16> @sse2_psra_w_128_masked(<8 x i16> %v, <8 x i16> %a) {
2682 ; CHECK-LABEL: @sse2_psra_w_128_masked(
2683 ; CHECK-NEXT: [[TMP1:%.*]] = and <8 x i16> [[A:%.*]], <i16 15, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison>
2684 ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> zeroinitializer
2685 ; CHECK-NEXT: [[TMP3:%.*]] = ashr <8 x i16> [[V:%.*]], [[TMP2]]
2686 ; CHECK-NEXT: ret <8 x i16> [[TMP3]]
2688 %1 = and <8 x i16> %a, <i16 15, i16 0, i16 0, i16 0, i16 undef, i16 undef, i16 undef, i16 undef>
2689 %2 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> %1)
2693 define <8 x i32> @avx2_psra_d_256_masked(<8 x i32> %v, <4 x i32> %a) {
2694 ; CHECK-LABEL: @avx2_psra_d_256_masked(
2695 ; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i32> [[A:%.*]], <i32 31, i32 poison, i32 poison, i32 poison>
2696 ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> zeroinitializer
2697 ; CHECK-NEXT: [[TMP3:%.*]] = ashr <8 x i32> [[V:%.*]], [[TMP2]]
2698 ; CHECK-NEXT: ret <8 x i32> [[TMP3]]
2700 %1 = and <4 x i32> %a, <i32 31, i32 0, i32 undef, i32 undef>
2701 %2 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> %1)
2705 define <8 x i64> @avx512_psra_q_512_masked(<8 x i64> %v, <2 x i64> %a) {
2706 ; CHECK-LABEL: @avx512_psra_q_512_masked(
2707 ; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i64> [[A:%.*]], <i64 63, i64 poison>
2708 ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> poison, <8 x i32> zeroinitializer
2709 ; CHECK-NEXT: [[TMP3:%.*]] = ashr <8 x i64> [[V:%.*]], [[TMP2]]
2710 ; CHECK-NEXT: ret <8 x i64> [[TMP3]]
2712 %1 = and <2 x i64> %a, <i64 63, i64 undef>
2713 %2 = tail call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> %v, <2 x i64> %1)
2717 define <4 x i32> @sse2_psrl_d_128_masked(<4 x i32> %v, <4 x i32> %a) {
2718 ; CHECK-LABEL: @sse2_psrl_d_128_masked(
2719 ; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i32> [[A:%.*]], <i32 31, i32 poison, i32 poison, i32 poison>
2720 ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> zeroinitializer
2721 ; CHECK-NEXT: [[TMP3:%.*]] = lshr <4 x i32> [[V:%.*]], [[TMP2]]
2722 ; CHECK-NEXT: ret <4 x i32> [[TMP3]]
2724 %1 = and <4 x i32> %a, <i32 31, i32 0, i32 undef, i32 undef>
2725 %2 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> %1)
2729 define <4 x i64> @avx2_psrl_q_256_masked(<4 x i64> %v, <2 x i64> %a) {
2730 ; CHECK-LABEL: @avx2_psrl_q_256_masked(
2731 ; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i64> [[A:%.*]], <i64 63, i64 poison>
2732 ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> poison, <4 x i32> zeroinitializer
2733 ; CHECK-NEXT: [[TMP3:%.*]] = lshr <4 x i64> [[V:%.*]], [[TMP2]]
2734 ; CHECK-NEXT: ret <4 x i64> [[TMP3]]
2736 %1 = and <2 x i64> %a, <i64 63, i64 undef>
2737 %2 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> %1)
2741 define <32 x i16> @avx512_psrl_w_512_masked(<32 x i16> %v, <8 x i16> %a) {
2742 ; CHECK-LABEL: @avx512_psrl_w_512_masked(
2743 ; CHECK-NEXT: [[TMP1:%.*]] = and <8 x i16> [[A:%.*]], <i16 15, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison>
2744 ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <32 x i32> zeroinitializer
2745 ; CHECK-NEXT: [[TMP3:%.*]] = lshr <32 x i16> [[V:%.*]], [[TMP2]]
2746 ; CHECK-NEXT: ret <32 x i16> [[TMP3]]
2748 %1 = and <8 x i16> %a, <i16 15, i16 0, i16 0, i16 0, i16 undef, i16 undef, i16 undef, i16 undef>
2749 %2 = tail call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> %v, <8 x i16> %1)
2753 define <2 x i64> @sse2_psll_q_128_masked(<2 x i64> %v, <2 x i64> %a) {
2754 ; CHECK-LABEL: @sse2_psll_q_128_masked(
2755 ; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i64> [[A:%.*]], <i64 63, i64 poison>
2756 ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> poison, <2 x i32> zeroinitializer
2757 ; CHECK-NEXT: [[TMP3:%.*]] = shl <2 x i64> [[V:%.*]], [[TMP2]]
2758 ; CHECK-NEXT: ret <2 x i64> [[TMP3]]
2760 %1 = and <2 x i64> %a, <i64 63, i64 undef>
2761 %2 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> %1)
2765 ; The shift amount is in range (masked with 31 and high 32-bits are zero),
2766 ; so convert to standard IR - https://llvm.org/PR50123
2768 define <2 x i64> @sse2_psll_q_128_masked_bitcast(<2 x i64> %v, <2 x i64> %a) {
2769 ; CHECK-LABEL: @sse2_psll_q_128_masked_bitcast(
2770 ; CHECK-NEXT: [[B:%.*]] = bitcast <2 x i64> [[A:%.*]] to <4 x i32>
2771 ; CHECK-NEXT: [[M:%.*]] = and <4 x i32> [[B]], <i32 31, i32 poison, i32 poison, i32 poison>
2772 ; CHECK-NEXT: [[I:%.*]] = insertelement <4 x i32> [[M]], i32 0, i64 1
2773 ; CHECK-NEXT: [[SHAMT:%.*]] = bitcast <4 x i32> [[I]] to <2 x i64>
2774 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i64> [[SHAMT]], <2 x i64> poison, <2 x i32> zeroinitializer
2775 ; CHECK-NEXT: [[R:%.*]] = shl <2 x i64> [[V:%.*]], [[TMP1]]
2776 ; CHECK-NEXT: ret <2 x i64> [[R]]
2778 %b = bitcast <2 x i64> %a to <4 x i32>
2779 %m = and <4 x i32> %b, <i32 31, i32 poison, i32 poison, i32 poison>
2780 %i = insertelement <4 x i32> %m, i32 0, i32 1
2781 %shamt = bitcast <4 x i32> %i to <2 x i64>
2782 %r = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> %shamt) #2
2786 ; TODO: This could be recognized as an over-shift.
2788 define <2 x i64> @sse2_psll_q_128_masked_bitcast_overshift(<2 x i64> %v, <2 x i64> %a) {
2789 ; CHECK-LABEL: @sse2_psll_q_128_masked_bitcast_overshift(
2790 ; CHECK-NEXT: [[B:%.*]] = bitcast <2 x i64> [[A:%.*]] to <4 x i32>
2791 ; CHECK-NEXT: [[M:%.*]] = and <4 x i32> [[B]], <i32 31, i32 poison, i32 poison, i32 poison>
2792 ; CHECK-NEXT: [[I:%.*]] = insertelement <4 x i32> [[M]], i32 1, i64 1
2793 ; CHECK-NEXT: [[SHAMT:%.*]] = bitcast <4 x i32> [[I]] to <2 x i64>
2794 ; CHECK-NEXT: [[R:%.*]] = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> [[V:%.*]], <2 x i64> [[SHAMT]])
2795 ; CHECK-NEXT: ret <2 x i64> [[R]]
2797 %b = bitcast <2 x i64> %a to <4 x i32>
2798 %m = and <4 x i32> %b, <i32 31, i32 poison, i32 poison, i32 poison>
2799 %i = insertelement <4 x i32> %m, i32 1, i32 1
2800 %shamt = bitcast <4 x i32> %i to <2 x i64>
2801 %r = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> %shamt) #2
2805 define <16 x i16> @avx2_psll_w_256_masked(<16 x i16> %v, <8 x i16> %a) {
2806 ; CHECK-LABEL: @avx2_psll_w_256_masked(
2807 ; CHECK-NEXT: [[TMP1:%.*]] = and <8 x i16> [[A:%.*]], <i16 15, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison>
2808 ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <16 x i32> zeroinitializer
2809 ; CHECK-NEXT: [[TMP3:%.*]] = shl <16 x i16> [[V:%.*]], [[TMP2]]
2810 ; CHECK-NEXT: ret <16 x i16> [[TMP3]]
2812 %1 = and <8 x i16> %a, <i16 15, i16 0, i16 0, i16 0, i16 undef, i16 undef, i16 undef, i16 undef>
2813 %2 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> %1)
2817 define <16 x i32> @avx512_psll_d_512_masked(<16 x i32> %v, <4 x i32> %a) {
2818 ; CHECK-LABEL: @avx512_psll_d_512_masked(
2819 ; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i32> [[A:%.*]], <i32 31, i32 poison, i32 poison, i32 poison>
2820 ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <16 x i32> zeroinitializer
2821 ; CHECK-NEXT: [[TMP3:%.*]] = shl <16 x i32> [[V:%.*]], [[TMP2]]
2822 ; CHECK-NEXT: ret <16 x i32> [[TMP3]]
2824 %1 = and <4 x i32> %a, <i32 31, i32 0, i32 undef, i32 undef>
2825 %2 = tail call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> %v, <4 x i32> %1)
2829 define <8 x i16> @sse2_psrai_w_128_masked(<8 x i16> %v, i32 %a) {
2830 ; CHECK-LABEL: @sse2_psrai_w_128_masked(
2831 ; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[A:%.*]] to i16
2832 ; CHECK-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15
2833 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[TMP2]], i64 0
2834 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
2835 ; CHECK-NEXT: [[TMP3:%.*]] = ashr <8 x i16> [[V:%.*]], [[DOTSPLAT]]
2836 ; CHECK-NEXT: ret <8 x i16> [[TMP3]]
2839 %2 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %v, i32 %1)
2843 define <8 x i32> @avx2_psrai_d_256_masked(<8 x i32> %v, i32 %a) {
2844 ; CHECK-LABEL: @avx2_psrai_d_256_masked(
2845 ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[A:%.*]], 31
2846 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[TMP1]], i64 0
2847 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer
2848 ; CHECK-NEXT: [[TMP2:%.*]] = ashr <8 x i32> [[V:%.*]], [[DOTSPLAT]]
2849 ; CHECK-NEXT: ret <8 x i32> [[TMP2]]
2852 %2 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %v, i32 %1)
2856 define <8 x i64> @avx512_psrai_q_512_masked(<8 x i64> %v, i32 %a) {
2857 ; CHECK-LABEL: @avx512_psrai_q_512_masked(
2858 ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[A:%.*]], 63
2859 ; CHECK-NEXT: [[TMP2:%.*]] = zext nneg i32 [[TMP1]] to i64
2860 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[TMP2]], i64 0
2861 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i64> [[DOTSPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
2862 ; CHECK-NEXT: [[TMP3:%.*]] = ashr <8 x i64> [[V:%.*]], [[DOTSPLAT]]
2863 ; CHECK-NEXT: ret <8 x i64> [[TMP3]]
2866 %2 = tail call <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64> %v, i32 %1)
2870 define <4 x i32> @sse2_psrli_d_128_masked(<4 x i32> %v, i32 %a) {
2871 ; CHECK-LABEL: @sse2_psrli_d_128_masked(
2872 ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[A:%.*]], 31
2873 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP1]], i64 0
2874 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
2875 ; CHECK-NEXT: [[TMP2:%.*]] = lshr <4 x i32> [[V:%.*]], [[DOTSPLAT]]
2876 ; CHECK-NEXT: ret <4 x i32> [[TMP2]]
2879 %2 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %v, i32 %1)
2883 define <4 x i64> @avx2_psrli_q_256_masked(<4 x i64> %v, i32 %a) {
2884 ; CHECK-LABEL: @avx2_psrli_q_256_masked(
2885 ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[A:%.*]], 63
2886 ; CHECK-NEXT: [[TMP2:%.*]] = zext nneg i32 [[TMP1]] to i64
2887 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP2]], i64 0
2888 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i64> [[DOTSPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
2889 ; CHECK-NEXT: [[TMP3:%.*]] = lshr <4 x i64> [[V:%.*]], [[DOTSPLAT]]
2890 ; CHECK-NEXT: ret <4 x i64> [[TMP3]]
2893 %2 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %v, i32 %1)
2897 define <32 x i16> @avx512_psrli_w_512_masked(<32 x i16> %v, i32 %a) {
2898 ; CHECK-LABEL: @avx512_psrli_w_512_masked(
2899 ; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[A:%.*]] to i16
2900 ; CHECK-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15
2901 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <32 x i16> poison, i16 [[TMP2]], i64 0
2902 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <32 x i16> [[DOTSPLATINSERT]], <32 x i16> poison, <32 x i32> zeroinitializer
2903 ; CHECK-NEXT: [[TMP3:%.*]] = lshr <32 x i16> [[V:%.*]], [[DOTSPLAT]]
2904 ; CHECK-NEXT: ret <32 x i16> [[TMP3]]
2907 %2 = tail call <32 x i16> @llvm.x86.avx512.psrli.w.512(<32 x i16> %v, i32 %1)
2911 define <2 x i64> @sse2_pslli_q_128_masked(<2 x i64> %v, i32 %a) {
2912 ; CHECK-LABEL: @sse2_pslli_q_128_masked(
2913 ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[A:%.*]], 63
2914 ; CHECK-NEXT: [[TMP2:%.*]] = zext nneg i32 [[TMP1]] to i64
2915 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP2]], i64 0
2916 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i64> [[DOTSPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
2917 ; CHECK-NEXT: [[TMP3:%.*]] = shl <2 x i64> [[V:%.*]], [[DOTSPLAT]]
2918 ; CHECK-NEXT: ret <2 x i64> [[TMP3]]
2921 %2 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %v, i32 %1)
2925 define <16 x i16> @avx2_pslli_w_256_masked(<16 x i16> %v, i32 %a) {
2926 ; CHECK-LABEL: @avx2_pslli_w_256_masked(
2927 ; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[A:%.*]] to i16
2928 ; CHECK-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15
2929 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i16> poison, i16 [[TMP2]], i64 0
2930 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i16> [[DOTSPLATINSERT]], <16 x i16> poison, <16 x i32> zeroinitializer
2931 ; CHECK-NEXT: [[TMP3:%.*]] = shl <16 x i16> [[V:%.*]], [[DOTSPLAT]]
2932 ; CHECK-NEXT: ret <16 x i16> [[TMP3]]
2935 %2 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %v, i32 %1)
2939 define <16 x i32> @avx512_pslli_d_512_masked(<16 x i32> %v, i32 %a) {
2940 ; CHECK-LABEL: @avx512_pslli_d_512_masked(
2941 ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[A:%.*]], 31
2942 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[TMP1]], i64 0
2943 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i32> [[DOTSPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
2944 ; CHECK-NEXT: [[TMP2:%.*]] = shl <16 x i32> [[V:%.*]], [[DOTSPLAT]]
2945 ; CHECK-NEXT: ret <16 x i32> [[TMP2]]
2948 %2 = tail call <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32> %v, i32 %1)
2952 define <4 x i32> @avx2_psrav_d_128_masked(<4 x i32> %v, <4 x i32> %a) {
2953 ; CHECK-LABEL: @avx2_psrav_d_128_masked(
2954 ; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i32> [[A:%.*]], splat (i32 31)
2955 ; CHECK-NEXT: [[TMP2:%.*]] = ashr <4 x i32> [[V:%.*]], [[TMP1]]
2956 ; CHECK-NEXT: ret <4 x i32> [[TMP2]]
2958 %1 = and <4 x i32> %a, <i32 31, i32 31, i32 31, i32 31>
2959 %2 = tail call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %v, <4 x i32> %1)
2963 define <4 x i32> @avx2_psrav_d_128_masked_shuffle(<4 x i32> %v, <4 x i32> %a) {
2964 ; CHECK-LABEL: @avx2_psrav_d_128_masked_shuffle(
2965 ; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i32> [[A:%.*]], <i32 poison, i32 poison, i32 15, i32 31>
2966 ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
2967 ; CHECK-NEXT: [[TMP3:%.*]] = ashr <4 x i32> [[V:%.*]], [[TMP2]]
2968 ; CHECK-NEXT: ret <4 x i32> [[TMP3]]
2970 %1 = and <4 x i32> %a, <i32 undef, i32 undef, i32 15, i32 31>
2971 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
2972 %3 = tail call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %v, <4 x i32> %2)
2976 define <8 x i32> @avx2_psrav_d_256_masked(<8 x i32> %v, <8 x i32> %a) {
2977 ; CHECK-LABEL: @avx2_psrav_d_256_masked(
2978 ; CHECK-NEXT: [[TMP1:%.*]] = and <8 x i32> [[A:%.*]], <i32 0, i32 1, i32 7, i32 15, i32 16, i32 30, i32 31, i32 31>
2979 ; CHECK-NEXT: [[TMP2:%.*]] = ashr <8 x i32> [[V:%.*]], [[TMP1]]
2980 ; CHECK-NEXT: ret <8 x i32> [[TMP2]]
2982 %1 = and <8 x i32> %a, <i32 0, i32 1, i32 7, i32 15, i32 16, i32 30, i32 31, i32 31>
2983 %2 = tail call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %v, <8 x i32> %1)
2987 define <32 x i16> @avx512_psrav_w_512_masked(<32 x i16> %v, <32 x i16> %a) {
2988 ; CHECK-LABEL: @avx512_psrav_w_512_masked(
2989 ; CHECK-NEXT: [[TMP1:%.*]] = and <32 x i16> [[A:%.*]], <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
2990 ; CHECK-NEXT: [[TMP2:%.*]] = ashr <32 x i16> [[V:%.*]], [[TMP1]]
2991 ; CHECK-NEXT: ret <32 x i16> [[TMP2]]
2993 %1 = and <32 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
2994 %2 = tail call <32 x i16> @llvm.x86.avx512.psrav.w.512(<32 x i16> %v, <32 x i16> %1)
2998 define <2 x i64> @avx2_psrlv_q_128_masked(<2 x i64> %v, <2 x i64> %a) {
2999 ; CHECK-LABEL: @avx2_psrlv_q_128_masked(
3000 ; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i64> [[A:%.*]], <i64 32, i64 63>
3001 ; CHECK-NEXT: [[TMP2:%.*]] = lshr <2 x i64> [[V:%.*]], [[TMP1]]
3002 ; CHECK-NEXT: ret <2 x i64> [[TMP2]]
3004 %1 = and <2 x i64> %a, <i64 32, i64 63>
3005 %2 = tail call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %v, <2 x i64> %1)
3009 define <8 x i32> @avx2_psrlv_d_256_masked(<8 x i32> %v, <8 x i32> %a) {
3010 ; CHECK-LABEL: @avx2_psrlv_d_256_masked(
3011 ; CHECK-NEXT: [[TMP1:%.*]] = and <8 x i32> [[A:%.*]], <i32 0, i32 1, i32 7, i32 15, i32 16, i32 30, i32 31, i32 31>
3012 ; CHECK-NEXT: [[TMP2:%.*]] = lshr <8 x i32> [[V:%.*]], [[TMP1]]
3013 ; CHECK-NEXT: ret <8 x i32> [[TMP2]]
3015 %1 = and <8 x i32> %a, <i32 0, i32 1, i32 7, i32 15, i32 16, i32 30, i32 31, i32 31>
3016 %2 = tail call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %v, <8 x i32> %1)
3020 define <8 x i64> @avx512_psrlv_q_512_masked(<8 x i64> %v, <8 x i64> %a) {
3021 ; CHECK-LABEL: @avx512_psrlv_q_512_masked(
3022 ; CHECK-NEXT: [[TMP1:%.*]] = and <8 x i64> [[A:%.*]], <i64 0, i64 1, i64 4, i64 16, i64 32, i64 47, i64 62, i64 63>
3023 ; CHECK-NEXT: [[TMP2:%.*]] = lshr <8 x i64> [[V:%.*]], [[TMP1]]
3024 ; CHECK-NEXT: ret <8 x i64> [[TMP2]]
3026 %1 = and <8 x i64> %a, <i64 0, i64 1, i64 4, i64 16, i64 32, i64 47, i64 62, i64 63>
3027 %2 = tail call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> %v, <8 x i64> %1)
3031 define <4 x i32> @avx2_psllv_d_128_masked(<4 x i32> %v, <4 x i32> %a) {
3032 ; CHECK-LABEL: @avx2_psllv_d_128_masked(
3033 ; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i32> [[A:%.*]], <i32 0, i32 15, i32 16, i32 31>
3034 ; CHECK-NEXT: [[TMP2:%.*]] = shl <4 x i32> [[V:%.*]], [[TMP1]]
3035 ; CHECK-NEXT: ret <4 x i32> [[TMP2]]
3037 %1 = and <4 x i32> %a, <i32 0, i32 15, i32 16, i32 31>
3038 %2 = tail call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %v, <4 x i32> %1)
3042 define <4 x i64> @avx2_psllv_q_256_masked(<4 x i64> %v, <4 x i64> %a) {
3043 ; CHECK-LABEL: @avx2_psllv_q_256_masked(
3044 ; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i64> [[A:%.*]], <i64 0, i64 16, i64 32, i64 63>
3045 ; CHECK-NEXT: [[TMP2:%.*]] = shl <4 x i64> [[V:%.*]], [[TMP1]]
3046 ; CHECK-NEXT: ret <4 x i64> [[TMP2]]
3048 %1 = and <4 x i64> %a, <i64 0, i64 16, i64 32, i64 63>
3049 %2 = tail call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %v, <4 x i64> %1)
3053 define <32 x i16> @avx512_psllv_w_512_masked(<32 x i16> %v, <32 x i16> %a) {
3054 ; CHECK-LABEL: @avx512_psllv_w_512_masked(
3055 ; CHECK-NEXT: [[TMP1:%.*]] = and <32 x i16> [[A:%.*]], <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
3056 ; CHECK-NEXT: [[TMP2:%.*]] = shl <32 x i16> [[V:%.*]], [[TMP1]]
3057 ; CHECK-NEXT: ret <32 x i16> [[TMP2]]
3059 %1 = and <32 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
3060 %2 = tail call <32 x i16> @llvm.x86.avx512.psllv.w.512(<32 x i16> %v, <32 x i16> %1)
3065 ; Vector Demanded Bits
3068 define <8 x i16> @sse2_psra_w_var(<8 x i16> %v, <8 x i16> %a) {
3069 ; CHECK-LABEL: @sse2_psra_w_var(
3070 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> [[V:%.*]], <8 x i16> [[A:%.*]])
3071 ; CHECK-NEXT: ret <8 x i16> [[TMP1]]
3073 %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
3074 %2 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> %1)
3078 define <8 x i16> @sse2_psra_w_var_bc(<8 x i16> %v, <2 x i64> %a) {
3079 ; CHECK-LABEL: @sse2_psra_w_var_bc(
3080 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[A:%.*]] to <8 x i16>
3081 ; CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> [[V:%.*]], <8 x i16> [[TMP1]])
3082 ; CHECK-NEXT: ret <8 x i16> [[TMP2]]
3084 %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
3085 %2 = bitcast <2 x i64> %1 to <8 x i16>
3086 %3 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> %2)
3090 define <4 x i32> @sse2_psra_d_var(<4 x i32> %v, <4 x i32> %a) {
3091 ; CHECK-LABEL: @sse2_psra_d_var(
3092 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> [[V:%.*]], <4 x i32> [[A:%.*]])
3093 ; CHECK-NEXT: ret <4 x i32> [[TMP1]]
3095 %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
3096 %2 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> %1)
3100 define <4 x i32> @sse2_psra_d_var_bc(<4 x i32> %v, <8 x i16> %a) {
3101 ; CHECK-LABEL: @sse2_psra_d_var_bc(
3102 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[A:%.*]] to <4 x i32>
3103 ; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> [[V:%.*]], <4 x i32> [[TMP1]])
3104 ; CHECK-NEXT: ret <4 x i32> [[TMP2]]
3106 %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
3107 %2 = bitcast <8 x i16> %1 to <4 x i32>
3108 %3 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> %2)
3112 define <16 x i16> @avx2_psra_w_var(<16 x i16> %v, <8 x i16> %a) {
3113 ; CHECK-LABEL: @avx2_psra_w_var(
3114 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> [[V:%.*]], <8 x i16> [[A:%.*]])
3115 ; CHECK-NEXT: ret <16 x i16> [[TMP1]]
3117 %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
3118 %2 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> %1)
3122 define <8 x i32> @avx2_psra_d_var(<8 x i32> %v, <4 x i32> %a) {
3123 ; CHECK-LABEL: @avx2_psra_d_var(
3124 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> [[V:%.*]], <4 x i32> [[A:%.*]])
3125 ; CHECK-NEXT: ret <8 x i32> [[TMP1]]
3127 %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
3128 %2 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> %1)
3132 define <2 x i64> @avx512_psra_q_128_var(<2 x i64> %v, <2 x i64> %a) {
3133 ; CHECK-LABEL: @avx512_psra_q_128_var(
3134 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.avx512.psra.q.128(<2 x i64> [[V:%.*]], <2 x i64> [[A:%.*]])
3135 ; CHECK-NEXT: ret <2 x i64> [[TMP1]]
3137 %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
3138 %2 = tail call <2 x i64> @llvm.x86.avx512.psra.q.128(<2 x i64> %v, <2 x i64> %1)
3142 define <4 x i64> @avx512_psra_q_256_var(<4 x i64> %v, <2 x i64> %a) {
3143 ; CHECK-LABEL: @avx512_psra_q_256_var(
3144 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.x86.avx512.psra.q.256(<4 x i64> [[V:%.*]], <2 x i64> [[A:%.*]])
3145 ; CHECK-NEXT: ret <4 x i64> [[TMP1]]
3147 %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
3148 %2 = tail call <4 x i64> @llvm.x86.avx512.psra.q.256(<4 x i64> %v, <2 x i64> %1)
3152 define <32 x i16> @avx512_psra_w_512_var(<32 x i16> %v, <8 x i16> %a) {
3153 ; CHECK-LABEL: @avx512_psra_w_512_var(
3154 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> [[V:%.*]], <8 x i16> [[A:%.*]])
3155 ; CHECK-NEXT: ret <32 x i16> [[TMP1]]
3157 %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
3158 %2 = tail call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %v, <8 x i16> %1)
3162 define <16 x i32> @avx512_psra_d_512_var(<16 x i32> %v, <4 x i32> %a) {
3163 ; CHECK-LABEL: @avx512_psra_d_512_var(
3164 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> [[V:%.*]], <4 x i32> [[A:%.*]])
3165 ; CHECK-NEXT: ret <16 x i32> [[TMP1]]
3167 %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
3168 %2 = tail call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> %v, <4 x i32> %1)
3172 define <8 x i64> @avx512_psra_q_512_var(<8 x i64> %v, <2 x i64> %a) {
3173 ; CHECK-LABEL: @avx512_psra_q_512_var(
3174 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> [[V:%.*]], <2 x i64> [[A:%.*]])
3175 ; CHECK-NEXT: ret <8 x i64> [[TMP1]]
3177 %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
3178 %2 = tail call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> %v, <2 x i64> %1)
3182 define <8 x i16> @sse2_psrl_w_var(<8 x i16> %v, <8 x i16> %a) {
3183 ; CHECK-LABEL: @sse2_psrl_w_var(
3184 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> [[V:%.*]], <8 x i16> [[A:%.*]])
3185 ; CHECK-NEXT: ret <8 x i16> [[TMP1]]
3187 %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
3188 %2 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> %1)
3192 define <4 x i32> @sse2_psrl_d_var(<4 x i32> %v, <4 x i32> %a) {
3193 ; CHECK-LABEL: @sse2_psrl_d_var(
3194 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> [[V:%.*]], <4 x i32> [[A:%.*]])
3195 ; CHECK-NEXT: ret <4 x i32> [[TMP1]]
3197 %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
3198 %2 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> %1)
3202 define <2 x i64> @sse2_psrl_q_var(<2 x i64> %v, <2 x i64> %a) {
3203 ; CHECK-LABEL: @sse2_psrl_q_var(
3204 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> [[V:%.*]], <2 x i64> [[A:%.*]])
3205 ; CHECK-NEXT: ret <2 x i64> [[TMP1]]
3207 %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
3208 %2 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> %1)
3212 define <16 x i16> @avx2_psrl_w_var(<16 x i16> %v, <8 x i16> %a) {
3213 ; CHECK-LABEL: @avx2_psrl_w_var(
3214 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> [[V:%.*]], <8 x i16> [[A:%.*]])
3215 ; CHECK-NEXT: ret <16 x i16> [[TMP1]]
3217 %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
3218 %2 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> %1)
3222 define <16 x i16> @avx2_psrl_w_var_bc(<16 x i16> %v, <16 x i8> %a) {
3223 ; CHECK-LABEL: @avx2_psrl_w_var_bc(
3224 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[A:%.*]] to <8 x i16>
3225 ; CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> [[V:%.*]], <8 x i16> [[TMP1]])
3226 ; CHECK-NEXT: ret <16 x i16> [[TMP2]]
3228 %1 = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
3229 %2 = bitcast <16 x i8> %1 to <8 x i16>
3230 %3 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> %2)
3234 define <8 x i32> @avx2_psrl_d_var(<8 x i32> %v, <4 x i32> %a) {
3235 ; CHECK-LABEL: @avx2_psrl_d_var(
3236 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> [[V:%.*]], <4 x i32> [[A:%.*]])
3237 ; CHECK-NEXT: ret <8 x i32> [[TMP1]]
3239 %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
3240 %2 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> %1)
3244 define <8 x i32> @avx2_psrl_d_var_bc(<8 x i32> %v, <2 x i64> %a) {
3245 ; CHECK-LABEL: @avx2_psrl_d_var_bc(
3246 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[A:%.*]] to <4 x i32>
3247 ; CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> [[V:%.*]], <4 x i32> [[TMP1]])
3248 ; CHECK-NEXT: ret <8 x i32> [[TMP2]]
3250 %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
3251 %2 = bitcast <2 x i64> %1 to <4 x i32>
3252 %3 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> %2)
3256 define <4 x i64> @avx2_psrl_q_var(<4 x i64> %v, <2 x i64> %a) {
3257 ; CHECK-LABEL: @avx2_psrl_q_var(
3258 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> [[V:%.*]], <2 x i64> [[A:%.*]])
3259 ; CHECK-NEXT: ret <4 x i64> [[TMP1]]
3261 %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
3262 %2 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> %1)
3266 define <32 x i16> @avx512_psrl_w_512_var(<32 x i16> %v, <8 x i16> %a) {
3267 ; CHECK-LABEL: @avx512_psrl_w_512_var(
3268 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> [[V:%.*]], <8 x i16> [[A:%.*]])
3269 ; CHECK-NEXT: ret <32 x i16> [[TMP1]]
3271 %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
3272 %2 = tail call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> %v, <8 x i16> %1)
3276 define <32 x i16> @avx512_psrl_w_512_var_bc(<32 x i16> %v, <16 x i8> %a) {
3277 ; CHECK-LABEL: @avx512_psrl_w_512_var_bc(
3278 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[A:%.*]] to <8 x i16>
3279 ; CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> [[V:%.*]], <8 x i16> [[TMP1]])
3280 ; CHECK-NEXT: ret <32 x i16> [[TMP2]]
3282 %1 = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
3283 %2 = bitcast <16 x i8> %1 to <8 x i16>
3284 %3 = tail call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> %v, <8 x i16> %2)
3288 define <16 x i32> @avx512_psrl_d_512_var(<16 x i32> %v, <4 x i32> %a) {
3289 ; CHECK-LABEL: @avx512_psrl_d_512_var(
3290 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> [[V:%.*]], <4 x i32> [[A:%.*]])
3291 ; CHECK-NEXT: ret <16 x i32> [[TMP1]]
3293 %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
3294 %2 = tail call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> %v, <4 x i32> %1)
3298 define <16 x i32> @avx512_psrl_d_512_var_bc(<16 x i32> %v, <2 x i64> %a) {
3299 ; CHECK-LABEL: @avx512_psrl_d_512_var_bc(
3300 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[A:%.*]] to <4 x i32>
3301 ; CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> [[V:%.*]], <4 x i32> [[TMP1]])
3302 ; CHECK-NEXT: ret <16 x i32> [[TMP2]]
3304 %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
3305 %2 = bitcast <2 x i64> %1 to <4 x i32>
3306 %3 = tail call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> %v, <4 x i32> %2)
3310 define <8 x i64> @avx512_psrl_q_512_var(<8 x i64> %v, <2 x i64> %a) {
3311 ; CHECK-LABEL: @avx512_psrl_q_512_var(
3312 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64> [[V:%.*]], <2 x i64> [[A:%.*]])
3313 ; CHECK-NEXT: ret <8 x i64> [[TMP1]]
3315 %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
3316 %2 = tail call <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64> %v, <2 x i64> %1)
3320 define <8 x i16> @sse2_psll_w_var(<8 x i16> %v, <8 x i16> %a) {
3321 ; CHECK-LABEL: @sse2_psll_w_var(
3322 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> [[V:%.*]], <8 x i16> [[A:%.*]])
3323 ; CHECK-NEXT: ret <8 x i16> [[TMP1]]
3325 %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
3326 %2 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> %1)
3330 define <4 x i32> @sse2_psll_d_var(<4 x i32> %v, <4 x i32> %a) {
3331 ; CHECK-LABEL: @sse2_psll_d_var(
3332 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> [[V:%.*]], <4 x i32> [[A:%.*]])
3333 ; CHECK-NEXT: ret <4 x i32> [[TMP1]]
3335 %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
3336 %2 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> %1)
3340 define <2 x i64> @sse2_psll_q_var(<2 x i64> %v, <2 x i64> %a) {
3341 ; CHECK-LABEL: @sse2_psll_q_var(
3342 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> [[V:%.*]], <2 x i64> [[A:%.*]])
3343 ; CHECK-NEXT: ret <2 x i64> [[TMP1]]
3345 %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
3346 %2 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> %1)
3350 define <16 x i16> @avx2_psll_w_var(<16 x i16> %v, <8 x i16> %a) {
3351 ; CHECK-LABEL: @avx2_psll_w_var(
3352 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> [[V:%.*]], <8 x i16> [[A:%.*]])
3353 ; CHECK-NEXT: ret <16 x i16> [[TMP1]]
3355 %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
3356 %2 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> %1)
3360 define <8 x i32> @avx2_psll_d_var(<8 x i32> %v, <4 x i32> %a) {
3361 ; CHECK-LABEL: @avx2_psll_d_var(
3362 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> [[V:%.*]], <4 x i32> [[A:%.*]])
3363 ; CHECK-NEXT: ret <8 x i32> [[TMP1]]
3365 %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
3366 %2 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> %1)
3370 define <4 x i64> @avx2_psll_q_var(<4 x i64> %v, <2 x i64> %a) {
3371 ; CHECK-LABEL: @avx2_psll_q_var(
3372 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> [[V:%.*]], <2 x i64> [[A:%.*]])
3373 ; CHECK-NEXT: ret <4 x i64> [[TMP1]]
3375 %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
3376 %2 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> %1)
3380 define <32 x i16> @avx512_psll_w_512_var(<32 x i16> %v, <8 x i16> %a) {
3381 ; CHECK-LABEL: @avx512_psll_w_512_var(
3382 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16> [[V:%.*]], <8 x i16> [[A:%.*]])
3383 ; CHECK-NEXT: ret <32 x i16> [[TMP1]]
3385 %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
3386 %2 = tail call <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16> %v, <8 x i16> %1)
3390 define <16 x i32> @avx512_psll_d_512_var(<16 x i32> %v, <4 x i32> %a) {
3391 ; CHECK-LABEL: @avx512_psll_d_512_var(
3392 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> [[V:%.*]], <4 x i32> [[A:%.*]])
3393 ; CHECK-NEXT: ret <16 x i32> [[TMP1]]
3395 %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
3396 %2 = tail call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> %v, <4 x i32> %1)
3400 define <8 x i64> @avx512_psll_q_512_var(<8 x i64> %v, <2 x i64> %a) {
3401 ; CHECK-LABEL: @avx512_psll_q_512_var(
3402 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64> [[V:%.*]], <2 x i64> [[A:%.*]])
3403 ; CHECK-NEXT: ret <8 x i64> [[TMP1]]
3405 %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
3406 %2 = tail call <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64> %v, <2 x i64> %1)
3414 define <8 x i16> @test_sse2_psra_w_0(<8 x i16> %A) {
3415 ; CHECK-LABEL: @test_sse2_psra_w_0(
3416 ; CHECK-NEXT: ret <8 x i16> [[A:%.*]]
3418 %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %A, i32 0)
3419 %2 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %1, <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>)
3420 %3 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %2, i32 0)
3424 define <8 x i16> @test_sse2_psra_w_8() {
3425 ; CHECK-LABEL: @test_sse2_psra_w_8(
3426 ; CHECK-NEXT: ret <8 x i16> <i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16>
3428 %1 = bitcast <2 x i64> <i64 1152956690052710400, i64 1152956690052710400> to <8 x i16>
3429 %2 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %1, i32 3)
3430 %3 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %2, <8 x i16> <i16 3, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>)
3431 %4 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %3, i32 2)
3435 define <4 x i32> @test_sse2_psra_d_0(<4 x i32> %A) {
3436 ; CHECK-LABEL: @test_sse2_psra_d_0(
3437 ; CHECK-NEXT: ret <4 x i32> [[A:%.*]]
3439 %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %A, i32 0)
3440 %2 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %1, <4 x i32> <i32 0, i32 0, i32 7, i32 0>)
3441 %3 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %1, i32 0)
3445 define <4 x i32> @sse2_psra_d_8() {
3446 ; CHECK-LABEL: @sse2_psra_d_8(
3447 ; CHECK-NEXT: ret <4 x i32> <i32 4194432, i32 1048608, i32 4194432, i32 1048608>
3449 %1 = bitcast <2 x i64> <i64 1152956690052710400, i64 1152956690052710400> to <4 x i32>
3450 %2 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %1, i32 3)
3451 %3 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %2, <4 x i32> <i32 3, i32 0, i32 7, i32 0>)
3452 %4 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %3, i32 2)
3456 define <16 x i16> @test_avx2_psra_w_0(<16 x i16> %A) {
3457 ; CHECK-LABEL: @test_avx2_psra_w_0(
3458 ; CHECK-NEXT: ret <16 x i16> [[A:%.*]]
3460 %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %A, i32 0)
3461 %2 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %1, <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>)
3462 %3 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %2, i32 0)
3466 define <16 x i16> @test_avx2_psra_w_8(<16 x i16> %A) {
3467 ; CHECK-LABEL: @test_avx2_psra_w_8(
3468 ; CHECK-NEXT: ret <16 x i16> <i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16>
3470 %1 = bitcast <4 x i64> <i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400> to <16 x i16>
3471 %2 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %1, i32 3)
3472 %3 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %2, <8 x i16> <i16 3, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>)
3473 %4 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %3, i32 2)
3477 define <8 x i32> @test_avx2_psra_d_0(<8 x i32> %A) {
3478 ; CHECK-LABEL: @test_avx2_psra_d_0(
3479 ; CHECK-NEXT: ret <8 x i32> [[A:%.*]]
3481 %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %A, i32 0)
3482 %2 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %1, <4 x i32> <i32 0, i32 0, i32 7, i32 0>)
3483 %3 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %2, i32 0)
3487 define <8 x i32> @test_avx2_psra_d_8() {
3488 ; CHECK-LABEL: @test_avx2_psra_d_8(
3489 ; CHECK-NEXT: ret <8 x i32> <i32 4194432, i32 1048608, i32 4194432, i32 1048608, i32 4194432, i32 1048608, i32 4194432, i32 1048608>
3491 %1 = bitcast <4 x i64> <i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400> to <8 x i32>
3492 %2 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %1, i32 3)
3493 %3 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %2, <4 x i32> <i32 3, i32 0, i32 7, i32 0>)
3494 %4 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %3, i32 2)
3498 define <32 x i16> @test_avx512_psra_w_512_0(<32 x i16> %A) {
3499 ; CHECK-LABEL: @test_avx512_psra_w_512_0(
3500 ; CHECK-NEXT: ret <32 x i16> [[A:%.*]]
3502 %1 = tail call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> %A, i32 0)
3503 %2 = tail call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %1, <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>)
3504 %3 = tail call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> %2, i32 0)
3508 define <32 x i16> @test_avx512_psra_w_512_8(<32 x i16> %A) {
3509 ; CHECK-LABEL: @test_avx512_psra_w_512_8(
3510 ; CHECK-NEXT: ret <32 x i16> <i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16>
3512 %1 = bitcast <8 x i64> <i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400> to <32 x i16>
3513 %2 = tail call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> %1, i32 3)
3514 %3 = tail call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %2, <8 x i16> <i16 3, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>)
3515 %4 = tail call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> %3, i32 2)
3519 define <16 x i32> @test_avx512_psra_d_512_0(<16 x i32> %A) {
3520 ; CHECK-LABEL: @test_avx512_psra_d_512_0(
3521 ; CHECK-NEXT: ret <16 x i32> [[A:%.*]]
3523 %1 = tail call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> %A, i32 0)
3524 %2 = tail call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> %1, <4 x i32> <i32 0, i32 0, i32 7, i32 0>)
3525 %3 = tail call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> %2, i32 0)
3529 define <16 x i32> @test_avx512_psra_d_512_8() {
3530 ; CHECK-LABEL: @test_avx512_psra_d_512_8(
3531 ; CHECK-NEXT: ret <16 x i32> <i32 4194432, i32 1048608, i32 4194432, i32 1048608, i32 4194432, i32 1048608, i32 4194432, i32 1048608, i32 4194432, i32 1048608, i32 4194432, i32 1048608, i32 4194432, i32 1048608, i32 4194432, i32 1048608>
3533 %1 = bitcast <8 x i64> <i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400> to <16 x i32>
3534 %2 = tail call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> %1, i32 3)
3535 %3 = tail call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> %2, <4 x i32> <i32 3, i32 0, i32 7, i32 0>)
3536 %4 = tail call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> %3, i32 2)
3544 define <2 x i64> @test_sse2_1() {
3545 ; CHECK-LABEL: @test_sse2_1(
3546 ; CHECK-NEXT: ret <2 x i64> <i64 72058418680037440, i64 144117112246370624>
3548 %S = bitcast i32 1 to i32
3549 %1 = zext i32 %S to i64
3550 %2 = insertelement <2 x i64> undef, i64 %1, i32 0
3551 %3 = insertelement <2 x i64> %2, i64 0, i32 1
3552 %4 = bitcast <2 x i64> %3 to <8 x i16>
3553 %5 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, <8 x i16> %4)
3554 %6 = bitcast <8 x i16> %5 to <4 x i32>
3555 %7 = bitcast <2 x i64> %3 to <4 x i32>
3556 %8 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %6, <4 x i32> %7)
3557 %9 = bitcast <4 x i32> %8 to <2 x i64>
3558 %10 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %9, <2 x i64> %3)
3559 %11 = bitcast <2 x i64> %10 to <8 x i16>
3560 %12 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %11, i32 %S)
3561 %13 = bitcast <8 x i16> %12 to <4 x i32>
3562 %14 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %13, i32 %S)
3563 %15 = bitcast <4 x i32> %14 to <2 x i64>
3564 %16 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %15, i32 %S)
3568 define <4 x i64> @test_avx2_1() {
3569 ; CHECK-LABEL: @test_avx2_1(
3570 ; CHECK-NEXT: ret <4 x i64> <i64 64, i64 128, i64 192, i64 256>
3572 %S = bitcast i32 1 to i32
3573 %1 = zext i32 %S to i64
3574 %2 = insertelement <2 x i64> undef, i64 %1, i32 0
3575 %3 = insertelement <2 x i64> %2, i64 0, i32 1
3576 %4 = bitcast <2 x i64> %3 to <8 x i16>
3577 %5 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> <i16 1, i16 0, i16 0, i16 0, i16 2, i16 0, i16 0, i16 0, i16 3, i16 0, i16 0, i16 0, i16 4, i16 0, i16 0, i16 0>, <8 x i16> %4)
3578 %6 = bitcast <16 x i16> %5 to <8 x i32>
3579 %7 = bitcast <2 x i64> %3 to <4 x i32>
3580 %8 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %6, <4 x i32> %7)
3581 %9 = bitcast <8 x i32> %8 to <4 x i64>
3582 %10 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %9, <2 x i64> %3)
3583 %11 = bitcast <4 x i64> %10 to <16 x i16>
3584 %12 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %11, i32 %S)
3585 %13 = bitcast <16 x i16> %12 to <8 x i32>
3586 %14 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %13, i32 %S)
3587 %15 = bitcast <8 x i32> %14 to <4 x i64>
3588 %16 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %15, i32 %S)
3592 define <2 x i64> @test_sse2_0() {
3593 ; CHECK-LABEL: @test_sse2_0(
3594 ; CHECK-NEXT: ret <2 x i64> zeroinitializer
3596 %S = bitcast i32 128 to i32
3597 %1 = zext i32 %S to i64
3598 %2 = insertelement <2 x i64> undef, i64 %1, i32 0
3599 %3 = insertelement <2 x i64> %2, i64 0, i32 1
3600 %4 = bitcast <2 x i64> %3 to <8 x i16>
3601 %5 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, <8 x i16> %4)
3602 %6 = bitcast <8 x i16> %5 to <4 x i32>
3603 %7 = bitcast <2 x i64> %3 to <4 x i32>
3604 %8 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %6, <4 x i32> %7)
3605 %9 = bitcast <4 x i32> %8 to <2 x i64>
3606 %10 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %9, <2 x i64> %3)
3607 %11 = bitcast <2 x i64> %10 to <8 x i16>
3608 %12 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %11, i32 %S)
3609 %13 = bitcast <8 x i16> %12 to <4 x i32>
3610 %14 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %13, i32 %S)
3611 %15 = bitcast <4 x i32> %14 to <2 x i64>
3612 %16 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %15, i32 %S)
3616 define <4 x i64> @test_avx2_0() {
3617 ; CHECK-LABEL: @test_avx2_0(
3618 ; CHECK-NEXT: ret <4 x i64> zeroinitializer
3620 %S = bitcast i32 128 to i32
3621 %1 = zext i32 %S to i64
3622 %2 = insertelement <2 x i64> undef, i64 %1, i32 0
3623 %3 = insertelement <2 x i64> %2, i64 0, i32 1
3624 %4 = bitcast <2 x i64> %3 to <8 x i16>
3625 %5 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> <i16 1, i16 0, i16 0, i16 0, i16 2, i16 0, i16 0, i16 0, i16 3, i16 0, i16 0, i16 0, i16 4, i16 0, i16 0, i16 0>, <8 x i16> %4)
3626 %6 = bitcast <16 x i16> %5 to <8 x i32>
3627 %7 = bitcast <2 x i64> %3 to <4 x i32>
3628 %8 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %6, <4 x i32> %7)
3629 %9 = bitcast <8 x i32> %8 to <4 x i64>
3630 %10 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %9, <2 x i64> %3)
3631 %11 = bitcast <4 x i64> %10 to <16 x i16>
3632 %12 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %11, i32 %S)
3633 %13 = bitcast <16 x i16> %12 to <8 x i32>
3634 %14 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %13, i32 %S)
3635 %15 = bitcast <8 x i32> %14 to <4 x i64>
3636 %16 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %15, i32 %S)
3639 define <2 x i64> @test_sse2_psrl_1() {
3640 ; CHECK-LABEL: @test_sse2_psrl_1(
3641 ; CHECK-NEXT: ret <2 x i64> <i64 562954248421376, i64 9007267974742020>
3643 %S = bitcast i32 1 to i32
3644 %1 = zext i32 %S to i64
3645 %2 = insertelement <2 x i64> undef, i64 %1, i32 0
3646 %3 = insertelement <2 x i64> %2, i64 0, i32 1
3647 %4 = bitcast <2 x i64> %3 to <8 x i16>
3648 %5 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> <i16 16, i16 32, i16 64, i16 128, i16 256, i16 512, i16 1024, i16 2048>, <8 x i16> %4)
3649 %6 = bitcast <8 x i16> %5 to <4 x i32>
3650 %7 = bitcast <2 x i64> %3 to <4 x i32>
3651 %8 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %6, <4 x i32> %7)
3652 %9 = bitcast <4 x i32> %8 to <2 x i64>
3653 %10 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %9, <2 x i64> %3)
3654 %11 = bitcast <2 x i64> %10 to <8 x i16>
3655 %12 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %11, i32 %S)
3656 %13 = bitcast <8 x i16> %12 to <4 x i32>
3657 %14 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %13, i32 %S)
3658 %15 = bitcast <4 x i32> %14 to <2 x i64>
3659 %16 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %15, i32 %S)
3663 define <4 x i64> @test_avx2_psrl_1() {
3664 ; CHECK-LABEL: @test_avx2_psrl_1(
3665 ; CHECK-NEXT: ret <4 x i64> <i64 16, i64 32, i64 64, i64 128>
3667 %S = bitcast i32 1 to i32
3668 %1 = zext i32 %S to i64
3669 %2 = insertelement <2 x i64> undef, i64 %1, i32 0
3670 %3 = insertelement <2 x i64> %2, i64 0, i32 1
3671 %4 = bitcast <2 x i64> %3 to <8 x i16>
3672 %5 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> <i16 1024, i16 0, i16 0, i16 0, i16 2048, i16 0, i16 0, i16 0, i16 4096, i16 0, i16 0, i16 0, i16 8192, i16 0, i16 0, i16 0>, <8 x i16> %4)
3673 %6 = bitcast <16 x i16> %5 to <8 x i32>
3674 %7 = bitcast <2 x i64> %3 to <4 x i32>
3675 %8 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %6, <4 x i32> %7)
3676 %9 = bitcast <8 x i32> %8 to <4 x i64>
3677 %10 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %9, <2 x i64> %3)
3678 %11 = bitcast <4 x i64> %10 to <16 x i16>
3679 %12 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %11, i32 %S)
3680 %13 = bitcast <16 x i16> %12 to <8 x i32>
3681 %14 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %13, i32 %S)
3682 %15 = bitcast <8 x i32> %14 to <4 x i64>
3683 %16 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %15, i32 %S)
3687 define <2 x i64> @test_sse2_psrl_0() {
3688 ; CHECK-LABEL: @test_sse2_psrl_0(
3689 ; CHECK-NEXT: ret <2 x i64> zeroinitializer
3691 %S = bitcast i32 128 to i32
3692 %1 = zext i32 %S to i64
3693 %2 = insertelement <2 x i64> undef, i64 %1, i32 0
3694 %3 = insertelement <2 x i64> %2, i64 0, i32 1
3695 %4 = bitcast <2 x i64> %3 to <8 x i16>
3696 %5 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> <i16 32, i16 64, i16 128, i16 256, i16 512, i16 1024, i16 2048, i16 4096>, <8 x i16> %4)
3697 %6 = bitcast <8 x i16> %5 to <4 x i32>
3698 %7 = bitcast <2 x i64> %3 to <4 x i32>
3699 %8 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %6, <4 x i32> %7)
3700 %9 = bitcast <4 x i32> %8 to <2 x i64>
3701 %10 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %9, <2 x i64> %3)
3702 %11 = bitcast <2 x i64> %10 to <8 x i16>
3703 %12 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %11, i32 %S)
3704 %13 = bitcast <8 x i16> %12 to <4 x i32>
3705 %14 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %13, i32 %S)
3706 %15 = bitcast <4 x i32> %14 to <2 x i64>
3707 %16 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %15, i32 %S)
3711 define <4 x i64> @test_avx2_psrl_0() {
3712 ; CHECK-LABEL: @test_avx2_psrl_0(
3713 ; CHECK-NEXT: ret <4 x i64> zeroinitializer
3715 %S = bitcast i32 128 to i32
3716 %1 = zext i32 %S to i64
3717 %2 = insertelement <2 x i64> undef, i64 %1, i32 0
3718 %3 = insertelement <2 x i64> %2, i64 0, i32 1
3719 %4 = bitcast <2 x i64> %3 to <8 x i16>
3720 %5 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> <i16 1024, i16 0, i16 0, i16 0, i16 2048, i16 0, i16 0, i16 0, i16 4096, i16 0, i16 0, i16 0, i16 8192, i16 0, i16 0, i16 0>, <8 x i16> %4)
3721 %6 = bitcast <16 x i16> %5 to <8 x i32>
3722 %7 = bitcast <2 x i64> %3 to <4 x i32>
3723 %8 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %6, <4 x i32> %7)
3724 %9 = bitcast <8 x i32> %8 to <4 x i64>
3725 %10 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %9, <2 x i64> %3)
3726 %11 = bitcast <4 x i64> %10 to <16 x i16>
3727 %12 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %11, i32 %S)
3728 %13 = bitcast <16 x i16> %12 to <8 x i32>
3729 %14 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %13, i32 %S)
3730 %15 = bitcast <8 x i32> %14 to <4 x i64>
3731 %16 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %15, i32 %S)
3735 declare <8 x i64> @llvm.x86.avx512.pslli.q.512(<8 x i64>, i32) #1
3736 declare <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32>, i32) #1
3737 declare <32 x i16> @llvm.x86.avx512.pslli.w.512(<32 x i16>, i32) #1
3738 declare <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64>, <2 x i64>) #1
3739 declare <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32>, <4 x i32>) #1
3740 declare <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16>, <8 x i16>) #1
3741 declare <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64>, i32) #1
3742 declare <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32>, i32) #1
3743 declare <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16>, i32) #1
3744 declare <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64>, <2 x i64>) #1
3745 declare <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32>, <4 x i32>) #1
3746 declare <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16>, <8 x i16>) #1
3747 declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) #1
3748 declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) #1
3749 declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) #1
3750 declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) #1
3751 declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) #1
3752 declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) #1
3754 declare <8 x i64> @llvm.x86.avx512.psrli.q.512(<8 x i64>, i32) #1
3755 declare <16 x i32> @llvm.x86.avx512.psrli.d.512(<16 x i32>, i32) #1
3756 declare <32 x i16> @llvm.x86.avx512.psrli.w.512(<32 x i16>, i32) #1
3757 declare <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64>, <2 x i64>) #1
3758 declare <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32>, <4 x i32>) #1
3759 declare <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16>, <8 x i16>) #1
3760 declare <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64>, i32) #1
3761 declare <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32>, i32) #1
3762 declare <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16>, i32) #1
3763 declare <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64>, <2 x i64>) #1
3764 declare <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32>, <4 x i32>) #1
3765 declare <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16>, <8 x i16>) #1
3766 declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32) #1
3767 declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32) #1
3768 declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) #1
3769 declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) #1
3770 declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) #1
3771 declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) #1
3773 declare <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64>, i32) #1
3774 declare <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32>, i32) #1
3775 declare <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16>, i32) #1
3776 declare <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64>, <2 x i64>) #1
3777 declare <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32>, <4 x i32>) #1
3778 declare <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16>, <8 x i16>) #1
3779 declare <4 x i64> @llvm.x86.avx512.psrai.q.256(<4 x i64>, i32) #1
3780 declare <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32>, i32) #1
3781 declare <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16>, i32) #1
3782 declare <4 x i64> @llvm.x86.avx512.psra.q.256(<4 x i64>, <2 x i64>) #1
3783 declare <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32>, <4 x i32>) #1
3784 declare <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16>, <8 x i16>) #1
3785 declare <2 x i64> @llvm.x86.avx512.psrai.q.128(<2 x i64>, i32) #1
3786 declare <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32>, i32) #1
3787 declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32) #1
3788 declare <2 x i64> @llvm.x86.avx512.psra.q.128(<2 x i64>, <2 x i64>) #1
3789 declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>) #1
3790 declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>) #1
3792 declare <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32>, <4 x i32>) #1
3793 declare <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32>, <8 x i32>) #1
3794 declare <16 x i32> @llvm.x86.avx512.psrav.d.512(<16 x i32>, <16 x i32>) #1
3795 declare <2 x i64> @llvm.x86.avx512.psrav.q.128(<2 x i64>, <2 x i64>) #1
3796 declare <4 x i64> @llvm.x86.avx512.psrav.q.256(<4 x i64>, <4 x i64>) #1
3797 declare <8 x i64> @llvm.x86.avx512.psrav.q.512(<8 x i64>, <8 x i64>) #1
3799 declare <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32>, <4 x i32>) #1
3800 declare <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32>, <8 x i32>) #1
3801 declare <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64>, <2 x i64>) #1
3802 declare <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64>, <4 x i64>) #1
3803 declare <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32>, <16 x i32>) #1
3804 declare <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64>, <8 x i64>) #1
3806 declare <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32>, <4 x i32>) #1
3807 declare <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32>, <8 x i32>) #1
3808 declare <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64>, <2 x i64>) #1
3809 declare <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64>, <4 x i64>) #1
3810 declare <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32>, <16 x i32>) #1
3811 declare <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64>, <8 x i64>) #1
3813 declare <8 x i16> @llvm.x86.avx512.psrav.w.128(<8 x i16>, <8 x i16>) #1
3814 declare <16 x i16> @llvm.x86.avx512.psrav.w.256(<16 x i16>, <16 x i16>) #1
3815 declare <32 x i16> @llvm.x86.avx512.psrav.w.512(<32 x i16>, <32 x i16>) #1
3816 declare <8 x i16> @llvm.x86.avx512.psrlv.w.128(<8 x i16>, <8 x i16>) #1
3817 declare <16 x i16> @llvm.x86.avx512.psrlv.w.256(<16 x i16>, <16 x i16>) #1
3818 declare <32 x i16> @llvm.x86.avx512.psrlv.w.512(<32 x i16>, <32 x i16>) #1
3819 declare <8 x i16> @llvm.x86.avx512.psllv.w.128(<8 x i16>, <8 x i16>) #1
3820 declare <16 x i16> @llvm.x86.avx512.psllv.w.256(<16 x i16>, <16 x i16>) #1
3821 declare <32 x i16> @llvm.x86.avx512.psllv.w.512(<32 x i16>, <32 x i16>) #1
3823 attributes #1 = { nounwind readnone }