1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X32
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X64
5 define <4 x i32> @variable_shl0(<4 x i32> %x, <4 x i32> %y) {
6 ; X32-LABEL: variable_shl0:
8 ; X32-NEXT: vpsllvd %xmm1, %xmm0, %xmm0
11 ; X64-LABEL: variable_shl0:
13 ; X64-NEXT: vpsllvd %xmm1, %xmm0, %xmm0
15 %k = shl <4 x i32> %x, %y
19 define <8 x i32> @variable_shl1(<8 x i32> %x, <8 x i32> %y) {
20 ; X32-LABEL: variable_shl1:
22 ; X32-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
25 ; X64-LABEL: variable_shl1:
27 ; X64-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
29 %k = shl <8 x i32> %x, %y
33 define <2 x i64> @variable_shl2(<2 x i64> %x, <2 x i64> %y) {
34 ; X32-LABEL: variable_shl2:
36 ; X32-NEXT: vpsllvq %xmm1, %xmm0, %xmm0
39 ; X64-LABEL: variable_shl2:
41 ; X64-NEXT: vpsllvq %xmm1, %xmm0, %xmm0
43 %k = shl <2 x i64> %x, %y
47 define <4 x i64> @variable_shl3(<4 x i64> %x, <4 x i64> %y) {
48 ; X32-LABEL: variable_shl3:
50 ; X32-NEXT: vpsllvq %ymm1, %ymm0, %ymm0
53 ; X64-LABEL: variable_shl3:
55 ; X64-NEXT: vpsllvq %ymm1, %ymm0, %ymm0
57 %k = shl <4 x i64> %x, %y
61 define <4 x i32> @variable_srl0(<4 x i32> %x, <4 x i32> %y) {
62 ; X32-LABEL: variable_srl0:
64 ; X32-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0
67 ; X64-LABEL: variable_srl0:
69 ; X64-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0
71 %k = lshr <4 x i32> %x, %y
75 define <8 x i32> @variable_srl1(<8 x i32> %x, <8 x i32> %y) {
76 ; X32-LABEL: variable_srl1:
78 ; X32-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
81 ; X64-LABEL: variable_srl1:
83 ; X64-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
85 %k = lshr <8 x i32> %x, %y
89 define <2 x i64> @variable_srl2(<2 x i64> %x, <2 x i64> %y) {
90 ; X32-LABEL: variable_srl2:
92 ; X32-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0
95 ; X64-LABEL: variable_srl2:
97 ; X64-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0
99 %k = lshr <2 x i64> %x, %y
103 define <4 x i64> @variable_srl3(<4 x i64> %x, <4 x i64> %y) {
104 ; X32-LABEL: variable_srl3:
106 ; X32-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0
109 ; X64-LABEL: variable_srl3:
111 ; X64-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0
113 %k = lshr <4 x i64> %x, %y
117 define <4 x i32> @variable_sra0(<4 x i32> %x, <4 x i32> %y) {
118 ; X32-LABEL: variable_sra0:
120 ; X32-NEXT: vpsravd %xmm1, %xmm0, %xmm0
123 ; X64-LABEL: variable_sra0:
125 ; X64-NEXT: vpsravd %xmm1, %xmm0, %xmm0
127 %k = ashr <4 x i32> %x, %y
131 define <8 x i32> @variable_sra1(<8 x i32> %x, <8 x i32> %y) {
132 ; X32-LABEL: variable_sra1:
134 ; X32-NEXT: vpsravd %ymm1, %ymm0, %ymm0
137 ; X64-LABEL: variable_sra1:
139 ; X64-NEXT: vpsravd %ymm1, %ymm0, %ymm0
141 %k = ashr <8 x i32> %x, %y
147 define <8 x i32> @vshift00(<8 x i32> %a) nounwind readnone {
148 ; X32-LABEL: vshift00:
150 ; X32-NEXT: vpslld $2, %ymm0, %ymm0
153 ; X64-LABEL: vshift00:
155 ; X64-NEXT: vpslld $2, %ymm0, %ymm0
157 %s = shl <8 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
161 define <16 x i16> @vshift01(<16 x i16> %a) nounwind readnone {
162 ; X32-LABEL: vshift01:
164 ; X32-NEXT: vpsllw $2, %ymm0, %ymm0
167 ; X64-LABEL: vshift01:
169 ; X64-NEXT: vpsllw $2, %ymm0, %ymm0
171 %s = shl <16 x i16> %a, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
175 define <4 x i64> @vshift02(<4 x i64> %a) nounwind readnone {
176 ; X32-LABEL: vshift02:
178 ; X32-NEXT: vpsllq $2, %ymm0, %ymm0
181 ; X64-LABEL: vshift02:
183 ; X64-NEXT: vpsllq $2, %ymm0, %ymm0
185 %s = shl <4 x i64> %a, <i64 2, i64 2, i64 2, i64 2>
189 ;;; Logical Shift right
191 define <8 x i32> @vshift03(<8 x i32> %a) nounwind readnone {
192 ; X32-LABEL: vshift03:
194 ; X32-NEXT: vpsrld $2, %ymm0, %ymm0
197 ; X64-LABEL: vshift03:
199 ; X64-NEXT: vpsrld $2, %ymm0, %ymm0
201 %s = lshr <8 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
205 define <16 x i16> @vshift04(<16 x i16> %a) nounwind readnone {
206 ; X32-LABEL: vshift04:
208 ; X32-NEXT: vpsrlw $2, %ymm0, %ymm0
211 ; X64-LABEL: vshift04:
213 ; X64-NEXT: vpsrlw $2, %ymm0, %ymm0
215 %s = lshr <16 x i16> %a, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
219 define <4 x i64> @vshift05(<4 x i64> %a) nounwind readnone {
220 ; X32-LABEL: vshift05:
222 ; X32-NEXT: vpsrlq $2, %ymm0, %ymm0
225 ; X64-LABEL: vshift05:
227 ; X64-NEXT: vpsrlq $2, %ymm0, %ymm0
229 %s = lshr <4 x i64> %a, <i64 2, i64 2, i64 2, i64 2>
233 ;;; Arithmetic Shift right
235 define <8 x i32> @vshift06(<8 x i32> %a) nounwind readnone {
236 ; X32-LABEL: vshift06:
238 ; X32-NEXT: vpsrad $2, %ymm0, %ymm0
241 ; X64-LABEL: vshift06:
243 ; X64-NEXT: vpsrad $2, %ymm0, %ymm0
245 %s = ashr <8 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
249 define <16 x i16> @vshift07(<16 x i16> %a) nounwind readnone {
250 ; X32-LABEL: vshift07:
252 ; X32-NEXT: vpsraw $2, %ymm0, %ymm0
255 ; X64-LABEL: vshift07:
257 ; X64-NEXT: vpsraw $2, %ymm0, %ymm0
259 %s = ashr <16 x i16> %a, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
263 define <4 x i32> @variable_sra0_load(<4 x i32> %x, <4 x i32>* %y) {
264 ; X32-LABEL: variable_sra0_load:
266 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
267 ; X32-NEXT: vpsravd (%eax), %xmm0, %xmm0
270 ; X64-LABEL: variable_sra0_load:
272 ; X64-NEXT: vpsravd (%rdi), %xmm0, %xmm0
274 %y1 = load <4 x i32>, <4 x i32>* %y
275 %k = ashr <4 x i32> %x, %y1
279 define <8 x i32> @variable_sra1_load(<8 x i32> %x, <8 x i32>* %y) {
280 ; X32-LABEL: variable_sra1_load:
282 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
283 ; X32-NEXT: vpsravd (%eax), %ymm0, %ymm0
286 ; X64-LABEL: variable_sra1_load:
288 ; X64-NEXT: vpsravd (%rdi), %ymm0, %ymm0
290 %y1 = load <8 x i32>, <8 x i32>* %y
291 %k = ashr <8 x i32> %x, %y1
295 define <4 x i32> @variable_shl0_load(<4 x i32> %x, <4 x i32>* %y) {
296 ; X32-LABEL: variable_shl0_load:
298 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
299 ; X32-NEXT: vpsllvd (%eax), %xmm0, %xmm0
302 ; X64-LABEL: variable_shl0_load:
304 ; X64-NEXT: vpsllvd (%rdi), %xmm0, %xmm0
306 %y1 = load <4 x i32>, <4 x i32>* %y
307 %k = shl <4 x i32> %x, %y1
311 define <8 x i32> @variable_shl1_load(<8 x i32> %x, <8 x i32>* %y) {
312 ; X32-LABEL: variable_shl1_load:
314 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
315 ; X32-NEXT: vpsllvd (%eax), %ymm0, %ymm0
318 ; X64-LABEL: variable_shl1_load:
320 ; X64-NEXT: vpsllvd (%rdi), %ymm0, %ymm0
322 %y1 = load <8 x i32>, <8 x i32>* %y
323 %k = shl <8 x i32> %x, %y1
327 define <2 x i64> @variable_shl2_load(<2 x i64> %x, <2 x i64>* %y) {
328 ; X32-LABEL: variable_shl2_load:
330 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
331 ; X32-NEXT: vpsllvq (%eax), %xmm0, %xmm0
334 ; X64-LABEL: variable_shl2_load:
336 ; X64-NEXT: vpsllvq (%rdi), %xmm0, %xmm0
338 %y1 = load <2 x i64>, <2 x i64>* %y
339 %k = shl <2 x i64> %x, %y1
343 define <4 x i64> @variable_shl3_load(<4 x i64> %x, <4 x i64>* %y) {
344 ; X32-LABEL: variable_shl3_load:
346 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
347 ; X32-NEXT: vpsllvq (%eax), %ymm0, %ymm0
350 ; X64-LABEL: variable_shl3_load:
352 ; X64-NEXT: vpsllvq (%rdi), %ymm0, %ymm0
354 %y1 = load <4 x i64>, <4 x i64>* %y
355 %k = shl <4 x i64> %x, %y1
359 define <4 x i32> @variable_srl0_load(<4 x i32> %x, <4 x i32>* %y) {
360 ; X32-LABEL: variable_srl0_load:
362 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
363 ; X32-NEXT: vpsrlvd (%eax), %xmm0, %xmm0
366 ; X64-LABEL: variable_srl0_load:
368 ; X64-NEXT: vpsrlvd (%rdi), %xmm0, %xmm0
370 %y1 = load <4 x i32>, <4 x i32>* %y
371 %k = lshr <4 x i32> %x, %y1
375 define <8 x i32> @variable_srl1_load(<8 x i32> %x, <8 x i32>* %y) {
376 ; X32-LABEL: variable_srl1_load:
378 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
379 ; X32-NEXT: vpsrlvd (%eax), %ymm0, %ymm0
382 ; X64-LABEL: variable_srl1_load:
384 ; X64-NEXT: vpsrlvd (%rdi), %ymm0, %ymm0
386 %y1 = load <8 x i32>, <8 x i32>* %y
387 %k = lshr <8 x i32> %x, %y1
391 define <2 x i64> @variable_srl2_load(<2 x i64> %x, <2 x i64>* %y) {
392 ; X32-LABEL: variable_srl2_load:
394 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
395 ; X32-NEXT: vpsrlvq (%eax), %xmm0, %xmm0
398 ; X64-LABEL: variable_srl2_load:
400 ; X64-NEXT: vpsrlvq (%rdi), %xmm0, %xmm0
402 %y1 = load <2 x i64>, <2 x i64>* %y
403 %k = lshr <2 x i64> %x, %y1
407 define <4 x i64> @variable_srl3_load(<4 x i64> %x, <4 x i64>* %y) {
408 ; X32-LABEL: variable_srl3_load:
410 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
411 ; X32-NEXT: vpsrlvq (%eax), %ymm0, %ymm0
414 ; X64-LABEL: variable_srl3_load:
416 ; X64-NEXT: vpsrlvq (%rdi), %ymm0, %ymm0
418 %y1 = load <4 x i64>, <4 x i64>* %y
419 %k = lshr <4 x i64> %x, %y1
423 define <32 x i8> @shl9(<32 x i8> %A) nounwind {
426 ; X32-NEXT: vpsllw $3, %ymm0, %ymm0
427 ; X32-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
432 ; X64-NEXT: vpsllw $3, %ymm0, %ymm0
433 ; X64-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
435 %B = shl <32 x i8> %A, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
439 define <32 x i8> @shr9(<32 x i8> %A) nounwind {
442 ; X32-NEXT: vpsrlw $3, %ymm0, %ymm0
443 ; X32-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
448 ; X64-NEXT: vpsrlw $3, %ymm0, %ymm0
449 ; X64-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
451 %B = lshr <32 x i8> %A, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
455 define <32 x i8> @sra_v32i8_7(<32 x i8> %A) nounwind {
456 ; X32-LABEL: sra_v32i8_7:
458 ; X32-NEXT: vpxor %xmm1, %xmm1, %xmm1
459 ; X32-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
462 ; X64-LABEL: sra_v32i8_7:
464 ; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1
465 ; X64-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
467 %B = ashr <32 x i8> %A, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
471 define <32 x i8> @sra_v32i8(<32 x i8> %A) nounwind {
472 ; X32-LABEL: sra_v32i8:
474 ; X32-NEXT: vpsrlw $3, %ymm0, %ymm0
475 ; X32-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
476 ; X32-NEXT: vmovdqa {{.*#+}} ymm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
477 ; X32-NEXT: vpxor %ymm1, %ymm0, %ymm0
478 ; X32-NEXT: vpsubb %ymm1, %ymm0, %ymm0
481 ; X64-LABEL: sra_v32i8:
483 ; X64-NEXT: vpsrlw $3, %ymm0, %ymm0
484 ; X64-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
485 ; X64-NEXT: vmovdqa {{.*#+}} ymm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
486 ; X64-NEXT: vpxor %ymm1, %ymm0, %ymm0
487 ; X64-NEXT: vpsubb %ymm1, %ymm0, %ymm0
489 %B = ashr <32 x i8> %A, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
493 define <16 x i16> @sext_v16i16(<16 x i16> %a) nounwind {
494 ; X32-LABEL: sext_v16i16:
496 ; X32-NEXT: vpsllw $8, %ymm0, %ymm0
497 ; X32-NEXT: vpsraw $8, %ymm0, %ymm0
500 ; X64-LABEL: sext_v16i16:
502 ; X64-NEXT: vpsllw $8, %ymm0, %ymm0
503 ; X64-NEXT: vpsraw $8, %ymm0, %ymm0
505 %b = trunc <16 x i16> %a to <16 x i8>
506 %c = sext <16 x i8> %b to <16 x i16>
510 define <8 x i32> @sext_v8i32(<8 x i32> %a) nounwind {
511 ; X32-LABEL: sext_v8i32:
513 ; X32-NEXT: vpslld $16, %ymm0, %ymm0
514 ; X32-NEXT: vpsrad $16, %ymm0, %ymm0
517 ; X64-LABEL: sext_v8i32:
519 ; X64-NEXT: vpslld $16, %ymm0, %ymm0
520 ; X64-NEXT: vpsrad $16, %ymm0, %ymm0
522 %b = trunc <8 x i32> %a to <8 x i16>
523 %c = sext <8 x i16> %b to <8 x i32>
527 define <8 x i16> @variable_shl16(<8 x i16> %lhs, <8 x i16> %rhs) {
528 ; X32-LABEL: variable_shl16:
530 ; X32-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
531 ; X32-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
532 ; X32-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
533 ; X32-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u,16,17,20,21,24,25,28,29,u,u,u,u,u,u,u,u]
534 ; X32-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
535 ; X32-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
536 ; X32-NEXT: vzeroupper
539 ; X64-LABEL: variable_shl16:
541 ; X64-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
542 ; X64-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
543 ; X64-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
544 ; X64-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u,16,17,20,21,24,25,28,29,u,u,u,u,u,u,u,u]
545 ; X64-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
546 ; X64-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
547 ; X64-NEXT: vzeroupper
549 %res = shl <8 x i16> %lhs, %rhs
553 define <8 x i16> @variable_ashr16(<8 x i16> %lhs, <8 x i16> %rhs) {
554 ; X32-LABEL: variable_ashr16:
556 ; X32-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
557 ; X32-NEXT: vpmovsxwd %xmm0, %ymm0
558 ; X32-NEXT: vpsravd %ymm1, %ymm0, %ymm0
559 ; X32-NEXT: vextracti128 $1, %ymm0, %xmm1
560 ; X32-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
561 ; X32-NEXT: vzeroupper
564 ; X64-LABEL: variable_ashr16:
566 ; X64-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
567 ; X64-NEXT: vpmovsxwd %xmm0, %ymm0
568 ; X64-NEXT: vpsravd %ymm1, %ymm0, %ymm0
569 ; X64-NEXT: vextracti128 $1, %ymm0, %xmm1
570 ; X64-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
571 ; X64-NEXT: vzeroupper
573 %res = ashr <8 x i16> %lhs, %rhs
577 define <8 x i16> @variable_lshr16(<8 x i16> %lhs, <8 x i16> %rhs) {
578 ; X32-LABEL: variable_lshr16:
580 ; X32-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
581 ; X32-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
582 ; X32-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
583 ; X32-NEXT: vextracti128 $1, %ymm0, %xmm1
584 ; X32-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
585 ; X32-NEXT: vzeroupper
588 ; X64-LABEL: variable_lshr16:
590 ; X64-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
591 ; X64-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
592 ; X64-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
593 ; X64-NEXT: vextracti128 $1, %ymm0, %xmm1
594 ; X64-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
595 ; X64-NEXT: vzeroupper
597 %res = lshr <8 x i16> %lhs, %rhs