1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=ALL,AVX1
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=ALL,AVX2OR512VL,AVX2,AVX2-SLOW
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=ALL,AVX2OR512VL,AVX2,AVX2-FAST,AVX2-FAST-ALL
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=ALL,AVX2OR512VL,AVX2,AVX2-FAST,AVX2-FAST-PERLANE
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512bw | FileCheck %s --check-prefixes=ALL,AVX2OR512VL,AVX512VL,AVX512VLBW,AVX512VLBW-SLOW
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512bw,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=ALL,AVX2OR512VL,AVX512VL,AVX512VLBW,AVX512VLBW-FAST
8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512bw,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=ALL,AVX2OR512VL,AVX512VL,AVX512VLBW,AVX512VLBW-FAST
9 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512vbmi | FileCheck %s --check-prefixes=ALL,AVX2OR512VL,AVX512VL,AVX512VLVBMI,AVX512VLVBMI-SLOW
10 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512vbmi,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=ALL,AVX2OR512VL,AVX512VL,AVX512VLVBMI,AVX512VLVBMI-FAST-ALL
11 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512vbmi,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=ALL,AVX2OR512VL,AVX512VL,AVX512VLVBMI,AVX512VLVBMI-FAST-PERLANE
12 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx | FileCheck %s --check-prefixes=ALL,XOP,XOPAVX1
13 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx2 | FileCheck %s --check-prefixes=ALL,XOP,XOPAVX2
15 define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
16 ; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
18 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
19 ; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0
20 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
23 ; AVX2OR512VL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
24 ; AVX2OR512VL: # %bb.0:
25 ; AVX2OR512VL-NEXT: vpbroadcastb %xmm0, %ymm0
26 ; AVX2OR512VL-NEXT: retq
28 ; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
30 ; XOPAVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
31 ; XOPAVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0
32 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
35 ; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
37 ; XOPAVX2-NEXT: vpbroadcastb %xmm0, %ymm0
39 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
40 ret <32 x i8> %shuffle
43 define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00(<32 x i8> %a, <32 x i8> %b) {
44 ; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00:
46 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
47 ; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
48 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0]
49 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
52 ; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00:
54 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0]
55 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
58 ; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00:
59 ; AVX512VLBW: # %bb.0:
60 ; AVX512VLBW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0]
61 ; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
62 ; AVX512VLBW-NEXT: retq
64 ; AVX512VLVBMI-SLOW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00:
65 ; AVX512VLVBMI-SLOW: # %bb.0:
66 ; AVX512VLVBMI-SLOW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0]
67 ; AVX512VLVBMI-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
68 ; AVX512VLVBMI-SLOW-NEXT: retq
70 ; AVX512VLVBMI-FAST-ALL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00:
71 ; AVX512VLVBMI-FAST-ALL: # %bb.0:
72 ; AVX512VLVBMI-FAST-ALL-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0]
73 ; AVX512VLVBMI-FAST-ALL-NEXT: vpermb %ymm0, %ymm1, %ymm0
74 ; AVX512VLVBMI-FAST-ALL-NEXT: retq
76 ; AVX512VLVBMI-FAST-PERLANE-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00:
77 ; AVX512VLVBMI-FAST-PERLANE: # %bb.0:
78 ; AVX512VLVBMI-FAST-PERLANE-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0]
79 ; AVX512VLVBMI-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
80 ; AVX512VLVBMI-FAST-PERLANE-NEXT: retq
82 ; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00:
84 ; XOPAVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
85 ; XOPAVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
86 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0]
87 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
90 ; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00:
92 ; XOPAVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0]
93 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
95 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0>
96 ret <32 x i8> %shuffle
99 define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00(<32 x i8> %a, <32 x i8> %b) {
100 ; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00:
102 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
103 ; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
104 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0]
105 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
108 ; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00:
110 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0]
111 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
114 ; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00:
115 ; AVX512VLBW: # %bb.0:
116 ; AVX512VLBW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0]
117 ; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
118 ; AVX512VLBW-NEXT: retq
120 ; AVX512VLVBMI-SLOW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00:
121 ; AVX512VLVBMI-SLOW: # %bb.0:
122 ; AVX512VLVBMI-SLOW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0]
123 ; AVX512VLVBMI-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
124 ; AVX512VLVBMI-SLOW-NEXT: retq
126 ; AVX512VLVBMI-FAST-ALL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00:
127 ; AVX512VLVBMI-FAST-ALL: # %bb.0:
128 ; AVX512VLVBMI-FAST-ALL-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0]
129 ; AVX512VLVBMI-FAST-ALL-NEXT: vpermb %ymm0, %ymm1, %ymm0
130 ; AVX512VLVBMI-FAST-ALL-NEXT: retq
132 ; AVX512VLVBMI-FAST-PERLANE-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00:
133 ; AVX512VLVBMI-FAST-PERLANE: # %bb.0:
134 ; AVX512VLVBMI-FAST-PERLANE-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0]
135 ; AVX512VLVBMI-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
136 ; AVX512VLVBMI-FAST-PERLANE-NEXT: retq
138 ; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00:
140 ; XOPAVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
141 ; XOPAVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
142 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0]
143 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
146 ; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00:
148 ; XOPAVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0]
149 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
151 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0>
152 ret <32 x i8> %shuffle
155 define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_03_00_00_00(<32 x i8> %a, <32 x i8> %b) {
156 ; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_03_00_00_00:
158 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
159 ; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
160 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0]
161 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
164 ; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_03_00_00_00:
166 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0]
167 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
170 ; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_03_00_00_00:
171 ; AVX512VLBW: # %bb.0:
172 ; AVX512VLBW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0]
173 ; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
174 ; AVX512VLBW-NEXT: retq
176 ; AVX512VLVBMI-SLOW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_03_00_00_00:
177 ; AVX512VLVBMI-SLOW: # %bb.0:
178 ; AVX512VLVBMI-SLOW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0]
179 ; AVX512VLVBMI-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
180 ; AVX512VLVBMI-SLOW-NEXT: retq
182 ; AVX512VLVBMI-FAST-ALL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_03_00_00_00:
183 ; AVX512VLVBMI-FAST-ALL: # %bb.0:
184 ; AVX512VLVBMI-FAST-ALL-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0]
185 ; AVX512VLVBMI-FAST-ALL-NEXT: vpermb %ymm0, %ymm1, %ymm0
186 ; AVX512VLVBMI-FAST-ALL-NEXT: retq
188 ; AVX512VLVBMI-FAST-PERLANE-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_03_00_00_00:
189 ; AVX512VLVBMI-FAST-PERLANE: # %bb.0:
190 ; AVX512VLVBMI-FAST-PERLANE-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0]
191 ; AVX512VLVBMI-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
192 ; AVX512VLVBMI-FAST-PERLANE-NEXT: retq
194 ; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_03_00_00_00:
196 ; XOPAVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
197 ; XOPAVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
198 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0]
199 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
202 ; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_03_00_00_00:
204 ; XOPAVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0]
205 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
207 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0>
208 ret <32 x i8> %shuffle
211 define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_04_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
212 ; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_04_00_00_00_00:
214 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
215 ; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
216 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0]
217 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
220 ; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_04_00_00_00_00:
222 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0]
223 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
226 ; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_04_00_00_00_00:
227 ; AVX512VLBW: # %bb.0:
228 ; AVX512VLBW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0]
229 ; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
230 ; AVX512VLBW-NEXT: retq
232 ; AVX512VLVBMI-SLOW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_04_00_00_00_00:
233 ; AVX512VLVBMI-SLOW: # %bb.0:
234 ; AVX512VLVBMI-SLOW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0]
235 ; AVX512VLVBMI-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
236 ; AVX512VLVBMI-SLOW-NEXT: retq
238 ; AVX512VLVBMI-FAST-ALL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_04_00_00_00_00:
239 ; AVX512VLVBMI-FAST-ALL: # %bb.0:
240 ; AVX512VLVBMI-FAST-ALL-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0]
241 ; AVX512VLVBMI-FAST-ALL-NEXT: vpermb %ymm0, %ymm1, %ymm0
242 ; AVX512VLVBMI-FAST-ALL-NEXT: retq
244 ; AVX512VLVBMI-FAST-PERLANE-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_04_00_00_00_00:
245 ; AVX512VLVBMI-FAST-PERLANE: # %bb.0:
246 ; AVX512VLVBMI-FAST-PERLANE-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0]
247 ; AVX512VLVBMI-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
248 ; AVX512VLVBMI-FAST-PERLANE-NEXT: retq
250 ; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_04_00_00_00_00:
252 ; XOPAVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
253 ; XOPAVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
254 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0]
255 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
258 ; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_04_00_00_00_00:
260 ; XOPAVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0]
261 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
263 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0>
264 ret <32 x i8> %shuffle
267 define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_05_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
268 ; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_05_00_00_00_00_00:
270 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
271 ; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
272 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0]
273 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
276 ; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_05_00_00_00_00_00:
278 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0]
279 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
282 ; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_05_00_00_00_00_00:
283 ; AVX512VLBW: # %bb.0:
284 ; AVX512VLBW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0]
285 ; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
286 ; AVX512VLBW-NEXT: retq
288 ; AVX512VLVBMI-SLOW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_05_00_00_00_00_00:
289 ; AVX512VLVBMI-SLOW: # %bb.0:
290 ; AVX512VLVBMI-SLOW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0]
291 ; AVX512VLVBMI-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
292 ; AVX512VLVBMI-SLOW-NEXT: retq
294 ; AVX512VLVBMI-FAST-ALL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_05_00_00_00_00_00:
295 ; AVX512VLVBMI-FAST-ALL: # %bb.0:
296 ; AVX512VLVBMI-FAST-ALL-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0]
297 ; AVX512VLVBMI-FAST-ALL-NEXT: vpermb %ymm0, %ymm1, %ymm0
298 ; AVX512VLVBMI-FAST-ALL-NEXT: retq
300 ; AVX512VLVBMI-FAST-PERLANE-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_05_00_00_00_00_00:
301 ; AVX512VLVBMI-FAST-PERLANE: # %bb.0:
302 ; AVX512VLVBMI-FAST-PERLANE-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0]
303 ; AVX512VLVBMI-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
304 ; AVX512VLVBMI-FAST-PERLANE-NEXT: retq
306 ; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_05_00_00_00_00_00:
308 ; XOPAVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
309 ; XOPAVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
310 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0]
311 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
314 ; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_05_00_00_00_00_00:
316 ; XOPAVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0]
317 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
319 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0>
320 ret <32 x i8> %shuffle
323 define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_06_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
324 ; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_06_00_00_00_00_00_00:
326 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
327 ; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
328 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0]
329 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
332 ; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_06_00_00_00_00_00_00:
334 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0]
335 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
338 ; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_06_00_00_00_00_00_00:
339 ; AVX512VLBW: # %bb.0:
340 ; AVX512VLBW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0]
341 ; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
342 ; AVX512VLBW-NEXT: retq
344 ; AVX512VLVBMI-SLOW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_06_00_00_00_00_00_00:
345 ; AVX512VLVBMI-SLOW: # %bb.0:
346 ; AVX512VLVBMI-SLOW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0]
347 ; AVX512VLVBMI-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
348 ; AVX512VLVBMI-SLOW-NEXT: retq
350 ; AVX512VLVBMI-FAST-ALL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_06_00_00_00_00_00_00:
351 ; AVX512VLVBMI-FAST-ALL: # %bb.0:
352 ; AVX512VLVBMI-FAST-ALL-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0]
353 ; AVX512VLVBMI-FAST-ALL-NEXT: vpermb %ymm0, %ymm1, %ymm0
354 ; AVX512VLVBMI-FAST-ALL-NEXT: retq
356 ; AVX512VLVBMI-FAST-PERLANE-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_06_00_00_00_00_00_00:
357 ; AVX512VLVBMI-FAST-PERLANE: # %bb.0:
358 ; AVX512VLVBMI-FAST-PERLANE-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0]
359 ; AVX512VLVBMI-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
360 ; AVX512VLVBMI-FAST-PERLANE-NEXT: retq
362 ; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_06_00_00_00_00_00_00:
364 ; XOPAVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
365 ; XOPAVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
366 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0]
367 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
370 ; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_06_00_00_00_00_00_00:
372 ; XOPAVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0]
373 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
375 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
376 ret <32 x i8> %shuffle
379 define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
380 ; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00:
382 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
383 ; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
384 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0]
385 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
388 ; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00:
390 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0]
391 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
394 ; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00:
395 ; AVX512VLBW: # %bb.0:
396 ; AVX512VLBW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0]
397 ; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
398 ; AVX512VLBW-NEXT: retq
400 ; AVX512VLVBMI-SLOW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00:
401 ; AVX512VLVBMI-SLOW: # %bb.0:
402 ; AVX512VLVBMI-SLOW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0]
403 ; AVX512VLVBMI-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
404 ; AVX512VLVBMI-SLOW-NEXT: retq
406 ; AVX512VLVBMI-FAST-ALL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00:
407 ; AVX512VLVBMI-FAST-ALL: # %bb.0:
408 ; AVX512VLVBMI-FAST-ALL-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0]
409 ; AVX512VLVBMI-FAST-ALL-NEXT: vpermb %ymm0, %ymm1, %ymm0
410 ; AVX512VLVBMI-FAST-ALL-NEXT: retq
412 ; AVX512VLVBMI-FAST-PERLANE-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00:
413 ; AVX512VLVBMI-FAST-PERLANE: # %bb.0:
414 ; AVX512VLVBMI-FAST-PERLANE-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0]
415 ; AVX512VLVBMI-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
416 ; AVX512VLVBMI-FAST-PERLANE-NEXT: retq
418 ; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00:
420 ; XOPAVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
421 ; XOPAVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
422 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0]
423 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
426 ; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00:
428 ; XOPAVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0]
429 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
431 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
432 ret <32 x i8> %shuffle
435 define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
436 ; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00:
438 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
439 ; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
440 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0]
441 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
444 ; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00:
446 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8]
447 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
450 ; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00:
451 ; AVX512VLBW: # %bb.0:
452 ; AVX512VLBW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8]
453 ; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
454 ; AVX512VLBW-NEXT: retq
456 ; AVX512VLVBMI-SLOW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00:
457 ; AVX512VLVBMI-SLOW: # %bb.0:
458 ; AVX512VLVBMI-SLOW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8]
459 ; AVX512VLVBMI-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
460 ; AVX512VLVBMI-SLOW-NEXT: retq
462 ; AVX512VLVBMI-FAST-ALL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00:
463 ; AVX512VLVBMI-FAST-ALL: # %bb.0:
464 ; AVX512VLVBMI-FAST-ALL-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0]
465 ; AVX512VLVBMI-FAST-ALL-NEXT: vpermb %ymm0, %ymm1, %ymm0
466 ; AVX512VLVBMI-FAST-ALL-NEXT: retq
468 ; AVX512VLVBMI-FAST-PERLANE-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00:
469 ; AVX512VLVBMI-FAST-PERLANE: # %bb.0:
470 ; AVX512VLVBMI-FAST-PERLANE-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8]
471 ; AVX512VLVBMI-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
472 ; AVX512VLVBMI-FAST-PERLANE-NEXT: retq
474 ; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00:
476 ; XOPAVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
477 ; XOPAVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
478 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0]
479 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
482 ; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00:
484 ; XOPAVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8]
485 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
487 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
488 ret <32 x i8> %shuffle
491 define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
492 ; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_00:
494 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
495 ; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
496 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,9,0,0,0,0,0,0,0,0,0]
497 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
500 ; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_00:
502 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,0]
503 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
506 ; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_00:
507 ; AVX512VLBW: # %bb.0:
508 ; AVX512VLBW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,0]
509 ; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
510 ; AVX512VLBW-NEXT: retq
512 ; AVX512VLVBMI-SLOW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_00:
513 ; AVX512VLVBMI-SLOW: # %bb.0:
514 ; AVX512VLVBMI-SLOW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,0]
515 ; AVX512VLVBMI-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
516 ; AVX512VLVBMI-SLOW-NEXT: retq
518 ; AVX512VLVBMI-FAST-ALL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_00:
519 ; AVX512VLVBMI-FAST-ALL: # %bb.0:
520 ; AVX512VLVBMI-FAST-ALL-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,0,0,0,0,0,0,0,0,0]
521 ; AVX512VLVBMI-FAST-ALL-NEXT: vpermb %ymm0, %ymm1, %ymm0
522 ; AVX512VLVBMI-FAST-ALL-NEXT: retq
524 ; AVX512VLVBMI-FAST-PERLANE-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_00:
525 ; AVX512VLVBMI-FAST-PERLANE: # %bb.0:
526 ; AVX512VLVBMI-FAST-PERLANE-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,0]
527 ; AVX512VLVBMI-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
528 ; AVX512VLVBMI-FAST-PERLANE-NEXT: retq
530 ; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_00:
532 ; XOPAVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
533 ; XOPAVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
534 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,9,0,0,0,0,0,0,0,0,0]
535 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
538 ; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_00:
540 ; XOPAVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,0]
541 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
543 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 9, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
544 ret <32 x i8> %shuffle
547 define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
548 ; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_00:
550 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
551 ; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
552 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,10,0,0,0,0,0,0,0,0,0,0]
553 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
556 ; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_00:
558 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,10,0,0]
559 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
562 ; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_00:
563 ; AVX512VLBW: # %bb.0:
564 ; AVX512VLBW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,10,0,0]
565 ; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
566 ; AVX512VLBW-NEXT: retq
568 ; AVX512VLVBMI-SLOW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_00:
569 ; AVX512VLVBMI-SLOW: # %bb.0:
570 ; AVX512VLVBMI-SLOW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,10,0,0]
571 ; AVX512VLVBMI-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
572 ; AVX512VLVBMI-SLOW-NEXT: retq
574 ; AVX512VLVBMI-FAST-ALL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_00:
575 ; AVX512VLVBMI-FAST-ALL: # %bb.0:
576 ; AVX512VLVBMI-FAST-ALL-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10,0,0,0,0,0,0,0,0,0,0]
577 ; AVX512VLVBMI-FAST-ALL-NEXT: vpermb %ymm0, %ymm1, %ymm0
578 ; AVX512VLVBMI-FAST-ALL-NEXT: retq
580 ; AVX512VLVBMI-FAST-PERLANE-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_00:
581 ; AVX512VLVBMI-FAST-PERLANE: # %bb.0:
582 ; AVX512VLVBMI-FAST-PERLANE-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,10,0,0]
583 ; AVX512VLVBMI-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
584 ; AVX512VLVBMI-FAST-PERLANE-NEXT: retq
586 ; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_00:
588 ; XOPAVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
589 ; XOPAVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
590 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,10,0,0,0,0,0,0,0,0,0,0]
591 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
594 ; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_00:
596 ; XOPAVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,10,0,0]
597 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
599 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 10, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
600 ret <32 x i8> %shuffle
603 define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
604 ; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_00:
606 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
607 ; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
608 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,11,0,0,0,0,0,0,0,0,0,0,0]
609 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
612 ; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_00:
614 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,11,0,0,0]
615 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
618 ; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_00:
619 ; AVX512VLBW: # %bb.0:
620 ; AVX512VLBW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,11,0,0,0]
621 ; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
622 ; AVX512VLBW-NEXT: retq
624 ; AVX512VLVBMI-SLOW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_00:
625 ; AVX512VLVBMI-SLOW: # %bb.0:
626 ; AVX512VLVBMI-SLOW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,11,0,0,0]
627 ; AVX512VLVBMI-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
628 ; AVX512VLVBMI-SLOW-NEXT: retq
630 ; AVX512VLVBMI-FAST-ALL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_00:
631 ; AVX512VLVBMI-FAST-ALL: # %bb.0:
632 ; AVX512VLVBMI-FAST-ALL-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,11,0,0,0,0,0,0,0,0,0,0,0]
633 ; AVX512VLVBMI-FAST-ALL-NEXT: vpermb %ymm0, %ymm1, %ymm0
634 ; AVX512VLVBMI-FAST-ALL-NEXT: retq
636 ; AVX512VLVBMI-FAST-PERLANE-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_00:
637 ; AVX512VLVBMI-FAST-PERLANE: # %bb.0:
638 ; AVX512VLVBMI-FAST-PERLANE-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,11,0,0,0]
639 ; AVX512VLVBMI-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
640 ; AVX512VLVBMI-FAST-PERLANE-NEXT: retq
642 ; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_00:
644 ; XOPAVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
645 ; XOPAVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
646 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,11,0,0,0,0,0,0,0,0,0,0,0]
647 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
650 ; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_00:
652 ; XOPAVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,11,0,0,0]
653 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
655 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 11, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
656 ret <32 x i8> %shuffle
659 define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_12_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
660 ; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_12_00_00_00_00_00_00_00_00_00_00_00_00:
662 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
663 ; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
664 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,12,0,0,0,0,0,0,0,0,0,0,0,0]
665 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
668 ; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_12_00_00_00_00_00_00_00_00_00_00_00_00:
670 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,12,0,0,0,0]
671 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
674 ; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_12_00_00_00_00_00_00_00_00_00_00_00_00:
675 ; AVX512VLBW: # %bb.0:
676 ; AVX512VLBW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,12,0,0,0,0]
677 ; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
678 ; AVX512VLBW-NEXT: retq
680 ; AVX512VLVBMI-SLOW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_12_00_00_00_00_00_00_00_00_00_00_00_00:
681 ; AVX512VLVBMI-SLOW: # %bb.0:
682 ; AVX512VLVBMI-SLOW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,12,0,0,0,0]
683 ; AVX512VLVBMI-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
684 ; AVX512VLVBMI-SLOW-NEXT: retq
686 ; AVX512VLVBMI-FAST-ALL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_12_00_00_00_00_00_00_00_00_00_00_00_00:
687 ; AVX512VLVBMI-FAST-ALL: # %bb.0:
688 ; AVX512VLVBMI-FAST-ALL-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,12,0,0,0,0,0,0,0,0,0,0,0,0]
689 ; AVX512VLVBMI-FAST-ALL-NEXT: vpermb %ymm0, %ymm1, %ymm0
690 ; AVX512VLVBMI-FAST-ALL-NEXT: retq
692 ; AVX512VLVBMI-FAST-PERLANE-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_12_00_00_00_00_00_00_00_00_00_00_00_00:
693 ; AVX512VLVBMI-FAST-PERLANE: # %bb.0:
694 ; AVX512VLVBMI-FAST-PERLANE-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,12,0,0,0,0]
695 ; AVX512VLVBMI-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
696 ; AVX512VLVBMI-FAST-PERLANE-NEXT: retq
698 ; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_12_00_00_00_00_00_00_00_00_00_00_00_00:
700 ; XOPAVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
701 ; XOPAVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
702 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,12,0,0,0,0,0,0,0,0,0,0,0,0]
703 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
706 ; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_12_00_00_00_00_00_00_00_00_00_00_00_00:
708 ; XOPAVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,12,0,0,0,0]
709 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
711 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 12, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
712 ret <32 x i8> %shuffle
715 define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_13_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
716 ; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_13_00_00_00_00_00_00_00_00_00_00_00_00_00:
718 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
719 ; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
720 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,13,0,0,0,0,0,0,0,0,0,0,0,0,0]
721 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
724 ; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_13_00_00_00_00_00_00_00_00_00_00_00_00_00:
726 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,13,0,0,0,0,0]
727 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
730 ; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_13_00_00_00_00_00_00_00_00_00_00_00_00_00:
731 ; AVX512VLBW: # %bb.0:
732 ; AVX512VLBW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,13,0,0,0,0,0]
733 ; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
734 ; AVX512VLBW-NEXT: retq
736 ; AVX512VLVBMI-SLOW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_13_00_00_00_00_00_00_00_00_00_00_00_00_00:
737 ; AVX512VLVBMI-SLOW: # %bb.0:
738 ; AVX512VLVBMI-SLOW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,13,0,0,0,0,0]
739 ; AVX512VLVBMI-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
740 ; AVX512VLVBMI-SLOW-NEXT: retq
742 ; AVX512VLVBMI-FAST-ALL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_13_00_00_00_00_00_00_00_00_00_00_00_00_00:
743 ; AVX512VLVBMI-FAST-ALL: # %bb.0:
744 ; AVX512VLVBMI-FAST-ALL-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,13,0,0,0,0,0,0,0,0,0,0,0,0,0]
745 ; AVX512VLVBMI-FAST-ALL-NEXT: vpermb %ymm0, %ymm1, %ymm0
746 ; AVX512VLVBMI-FAST-ALL-NEXT: retq
748 ; AVX512VLVBMI-FAST-PERLANE-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_13_00_00_00_00_00_00_00_00_00_00_00_00_00:
749 ; AVX512VLVBMI-FAST-PERLANE: # %bb.0:
750 ; AVX512VLVBMI-FAST-PERLANE-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,13,0,0,0,0,0]
751 ; AVX512VLVBMI-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
752 ; AVX512VLVBMI-FAST-PERLANE-NEXT: retq
754 ; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_13_00_00_00_00_00_00_00_00_00_00_00_00_00:
756 ; XOPAVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
757 ; XOPAVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
758 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,13,0,0,0,0,0,0,0,0,0,0,0,0,0]
759 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
762 ; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_13_00_00_00_00_00_00_00_00_00_00_00_00_00:
764 ; XOPAVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,13,0,0,0,0,0]
765 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
767 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 13, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
768 ret <32 x i8> %shuffle
771 define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
772 ; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
774 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
775 ; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
776 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
777 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
780 ; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
782 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,14,0,0,0,0,0,0]
783 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
786 ; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
787 ; AVX512VLBW: # %bb.0:
788 ; AVX512VLBW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,14,0,0,0,0,0,0]
789 ; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
790 ; AVX512VLBW-NEXT: retq
792 ; AVX512VLVBMI-SLOW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
793 ; AVX512VLVBMI-SLOW: # %bb.0:
794 ; AVX512VLVBMI-SLOW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,14,0,0,0,0,0,0]
795 ; AVX512VLVBMI-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
796 ; AVX512VLVBMI-SLOW-NEXT: retq
798 ; AVX512VLVBMI-FAST-ALL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
799 ; AVX512VLVBMI-FAST-ALL: # %bb.0:
800 ; AVX512VLVBMI-FAST-ALL-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
801 ; AVX512VLVBMI-FAST-ALL-NEXT: vpermb %ymm0, %ymm1, %ymm0
802 ; AVX512VLVBMI-FAST-ALL-NEXT: retq
804 ; AVX512VLVBMI-FAST-PERLANE-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
805 ; AVX512VLVBMI-FAST-PERLANE: # %bb.0:
806 ; AVX512VLVBMI-FAST-PERLANE-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,14,0,0,0,0,0,0]
807 ; AVX512VLVBMI-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
808 ; AVX512VLVBMI-FAST-PERLANE-NEXT: retq
810 ; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
812 ; XOPAVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
813 ; XOPAVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
814 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
815 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
818 ; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
820 ; XOPAVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,14,0,0,0,0,0,0]
821 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
823 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 14, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
824 ret <32 x i8> %shuffle
827 define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
828 ; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
830 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
831 ; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
832 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
833 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
836 ; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
838 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,15,0,0,0,0,0,0,0]
839 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
842 ; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
843 ; AVX512VLBW: # %bb.0:
844 ; AVX512VLBW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,15,0,0,0,0,0,0,0]
845 ; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
846 ; AVX512VLBW-NEXT: retq
848 ; AVX512VLVBMI-SLOW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
849 ; AVX512VLVBMI-SLOW: # %bb.0:
850 ; AVX512VLVBMI-SLOW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,15,0,0,0,0,0,0,0]
851 ; AVX512VLVBMI-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
852 ; AVX512VLVBMI-SLOW-NEXT: retq
854 ; AVX512VLVBMI-FAST-ALL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
855 ; AVX512VLVBMI-FAST-ALL: # %bb.0:
856 ; AVX512VLVBMI-FAST-ALL-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
857 ; AVX512VLVBMI-FAST-ALL-NEXT: vpermb %ymm0, %ymm1, %ymm0
858 ; AVX512VLVBMI-FAST-ALL-NEXT: retq
860 ; AVX512VLVBMI-FAST-PERLANE-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
861 ; AVX512VLVBMI-FAST-PERLANE: # %bb.0:
862 ; AVX512VLVBMI-FAST-PERLANE-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,15,0,0,0,0,0,0,0]
863 ; AVX512VLVBMI-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
864 ; AVX512VLVBMI-FAST-PERLANE-NEXT: retq
866 ; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
868 ; XOPAVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
869 ; XOPAVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
870 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
871 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
874 ; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
876 ; XOPAVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,15,0,0,0,0,0,0,0]
877 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
879 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
880 ret <32 x i8> %shuffle
883 define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
884 ; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
886 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
887 ; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
888 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
889 ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
890 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1]
891 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
894 ; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
896 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3]
897 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
900 ; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
901 ; AVX512VLBW: # %bb.0:
902 ; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3]
903 ; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
904 ; AVX512VLBW-NEXT: retq
906 ; AVX512VLVBMI-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
907 ; AVX512VLVBMI: # %bb.0:
908 ; AVX512VLVBMI-NEXT: vmovdqa {{.*#+}} xmm1 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,16]
909 ; AVX512VLVBMI-NEXT: vpermb %ymm0, %ymm1, %ymm0
910 ; AVX512VLVBMI-NEXT: retq
912 ; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
914 ; XOPAVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
915 ; XOPAVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
916 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
917 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],xmm2[0]
918 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
921 ; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
923 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3]
924 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
926 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
927 ret <32 x i8> %shuffle
930 define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_17_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
931 ; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_17_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
933 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
934 ; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
935 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
936 ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
937 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0]
938 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
941 ; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_17_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
943 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3]
944 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
947 ; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_17_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
948 ; AVX512VLBW: # %bb.0:
949 ; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3]
950 ; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
951 ; AVX512VLBW-NEXT: retq
953 ; AVX512VLVBMI-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_17_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
954 ; AVX512VLVBMI: # %bb.0:
955 ; AVX512VLVBMI-NEXT: vmovdqa {{.*#+}} xmm1 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,17,0]
956 ; AVX512VLVBMI-NEXT: vpermb %ymm0, %ymm1, %ymm0
957 ; AVX512VLVBMI-NEXT: retq
959 ; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_17_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
961 ; XOPAVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
962 ; XOPAVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
963 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
964 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0],xmm2[1],xmm0[0]
965 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
968 ; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_17_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
970 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3]
971 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
973 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 17, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
974 ret <32 x i8> %shuffle
977 define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_18_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
978 ; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_18_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
980 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
981 ; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
982 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
983 ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
984 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,5,0,0]
985 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
988 ; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_18_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
990 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3]
991 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,10,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
994 ; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_18_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
995 ; AVX512VLBW: # %bb.0:
996 ; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3]
997 ; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,10,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
998 ; AVX512VLBW-NEXT: retq
1000 ; AVX512VLVBMI-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_18_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1001 ; AVX512VLVBMI: # %bb.0:
1002 ; AVX512VLVBMI-NEXT: vmovdqa {{.*#+}} xmm1 = [0,0,0,0,0,0,0,0,0,0,0,0,0,18,0,0]
1003 ; AVX512VLVBMI-NEXT: vpermb %ymm0, %ymm1, %ymm0
1004 ; AVX512VLVBMI-NEXT: retq
1006 ; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_18_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1008 ; XOPAVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
1009 ; XOPAVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
1010 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1011 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0],xmm2[2],xmm0[0,0]
1012 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1013 ; XOPAVX1-NEXT: retq
1015 ; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_18_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1017 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3]
1018 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,10,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
1019 ; XOPAVX2-NEXT: retq
1020 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 18, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1021 ret <32 x i8> %shuffle
1024 define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_19_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
1025 ; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_19_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1027 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
1028 ; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
1029 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1030 ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
1031 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0]
1032 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1035 ; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_19_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1037 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3]
1038 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,11,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
1041 ; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_19_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1042 ; AVX512VLBW: # %bb.0:
1043 ; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3]
1044 ; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,11,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
1045 ; AVX512VLBW-NEXT: retq
1047 ; AVX512VLVBMI-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_19_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1048 ; AVX512VLVBMI: # %bb.0:
1049 ; AVX512VLVBMI-NEXT: vmovdqa {{.*#+}} xmm1 = [0,0,0,0,0,0,0,0,0,0,0,0,19,0,0,0]
1050 ; AVX512VLVBMI-NEXT: vpermb %ymm0, %ymm1, %ymm0
1051 ; AVX512VLVBMI-NEXT: retq
1053 ; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_19_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1055 ; XOPAVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
1056 ; XOPAVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
1057 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1058 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0],xmm2[3],xmm0[0,0,0]
1059 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1060 ; XOPAVX1-NEXT: retq
1062 ; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_19_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1064 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3]
1065 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,11,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
1066 ; XOPAVX2-NEXT: retq
1067 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 19, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1068 ret <32 x i8> %shuffle
1071 define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_20_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
1072 ; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_20_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1074 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
1075 ; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
1076 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1077 ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
1078 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,9,0,0,0,0]
1079 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1082 ; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_20_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1084 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3]
1085 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,12,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
1088 ; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_20_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1089 ; AVX512VLBW: # %bb.0:
1090 ; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3]
1091 ; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,12,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
1092 ; AVX512VLBW-NEXT: retq
1094 ; AVX512VLVBMI-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_20_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1095 ; AVX512VLVBMI: # %bb.0:
1096 ; AVX512VLVBMI-NEXT: vmovdqa {{.*#+}} xmm1 = [0,0,0,0,0,0,0,0,0,0,0,20,0,0,0,0]
1097 ; AVX512VLVBMI-NEXT: vpermb %ymm0, %ymm1, %ymm0
1098 ; AVX512VLVBMI-NEXT: retq
1100 ; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_20_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1102 ; XOPAVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
1103 ; XOPAVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
1104 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1105 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0],xmm2[4],xmm0[0,0,0,0]
1106 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1107 ; XOPAVX1-NEXT: retq
1109 ; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_20_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1111 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3]
1112 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,12,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
1113 ; XOPAVX2-NEXT: retq
1114 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 20, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1115 ret <32 x i8> %shuffle
1118 define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_21_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
1119 ; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_21_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1121 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
1122 ; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
1123 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1124 ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
1125 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,11,0,0,0,0,0]
1126 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1129 ; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_21_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1131 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3]
1132 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,13,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
1135 ; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_21_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1136 ; AVX512VLBW: # %bb.0:
1137 ; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3]
1138 ; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,13,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
1139 ; AVX512VLBW-NEXT: retq
1141 ; AVX512VLVBMI-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_21_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1142 ; AVX512VLVBMI: # %bb.0:
1143 ; AVX512VLVBMI-NEXT: vmovdqa {{.*#+}} xmm1 = [0,0,0,0,0,0,0,0,0,0,21,0,0,0,0,0]
1144 ; AVX512VLVBMI-NEXT: vpermb %ymm0, %ymm1, %ymm0
1145 ; AVX512VLVBMI-NEXT: retq
1147 ; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_21_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1149 ; XOPAVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
1150 ; XOPAVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
1151 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1152 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0],xmm2[5],xmm0[0,0,0,0,0]
1153 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1154 ; XOPAVX1-NEXT: retq
1156 ; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_21_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1158 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3]
1159 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,13,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
1160 ; XOPAVX2-NEXT: retq
1161 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 21, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1162 ret <32 x i8> %shuffle
1165 define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_22_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
1166 ; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_22_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1168 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
1169 ; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
1170 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1171 ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
1172 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,13,0,0,0,0,0,0]
1173 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1176 ; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_22_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1178 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3]
1179 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,14,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
1182 ; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_22_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1183 ; AVX512VLBW: # %bb.0:
1184 ; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3]
1185 ; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,14,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
1186 ; AVX512VLBW-NEXT: retq
1188 ; AVX512VLVBMI-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_22_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1189 ; AVX512VLVBMI: # %bb.0:
1190 ; AVX512VLVBMI-NEXT: vmovdqa {{.*#+}} xmm1 = [0,0,0,0,0,0,0,0,0,22,0,0,0,0,0,0]
1191 ; AVX512VLVBMI-NEXT: vpermb %ymm0, %ymm1, %ymm0
1192 ; AVX512VLVBMI-NEXT: retq
1194 ; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_22_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1196 ; XOPAVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
1197 ; XOPAVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
1198 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1199 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0],xmm2[6],xmm0[0,0,0,0,0,0]
1200 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1201 ; XOPAVX1-NEXT: retq
1203 ; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_22_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1205 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3]
1206 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,14,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
1207 ; XOPAVX2-NEXT: retq
1208 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 22, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1209 ret <32 x i8> %shuffle
1212 define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_23_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
1213 ; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_23_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1215 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
1216 ; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
1217 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1218 ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
1219 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,15,0,0,0,0,0,0,0]
1220 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1223 ; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_23_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1225 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3]
1226 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,15,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
1229 ; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_23_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1230 ; AVX512VLBW: # %bb.0:
1231 ; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3]
1232 ; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,15,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
1233 ; AVX512VLBW-NEXT: retq
1235 ; AVX512VLVBMI-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_23_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1236 ; AVX512VLVBMI: # %bb.0:
1237 ; AVX512VLVBMI-NEXT: vmovdqa {{.*#+}} xmm1 = [0,0,0,0,0,0,0,0,23,0,0,0,0,0,0,0]
1238 ; AVX512VLVBMI-NEXT: vpermb %ymm0, %ymm1, %ymm0
1239 ; AVX512VLVBMI-NEXT: retq
1241 ; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_23_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1243 ; XOPAVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
1244 ; XOPAVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
1245 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1246 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0],xmm2[7],xmm0[0,0,0,0,0,0,0]
1247 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1248 ; XOPAVX1-NEXT: retq
1250 ; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_23_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1252 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3]
1253 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,15,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
1254 ; XOPAVX2-NEXT: retq
1255 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 23, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1256 ret <32 x i8> %shuffle
1259 define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_24_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
1260 ; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_24_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1262 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
1263 ; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
1264 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1265 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm2[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
1266 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,8,8,8,8,8,8,0,8,8,8,8,8,8,8,8]
1267 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1270 ; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_24_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1272 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3]
1273 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
1276 ; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_24_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1277 ; AVX512VLBW: # %bb.0:
1278 ; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3]
1279 ; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
1280 ; AVX512VLBW-NEXT: retq
1282 ; AVX512VLVBMI-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_24_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1283 ; AVX512VLVBMI: # %bb.0:
1284 ; AVX512VLVBMI-NEXT: vmovdqa {{.*#+}} xmm1 = [0,0,0,0,0,0,0,24,0,0,0,0,0,0,0,0]
1285 ; AVX512VLVBMI-NEXT: vpermb %ymm0, %ymm1, %ymm0
1286 ; AVX512VLVBMI-NEXT: retq
1288 ; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_24_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1290 ; XOPAVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
1291 ; XOPAVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
1292 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1293 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0],xmm2[8],xmm0[0,0,0,0,0,0,0,0]
1294 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1295 ; XOPAVX1-NEXT: retq
1297 ; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_24_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1299 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3]
1300 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
1301 ; XOPAVX2-NEXT: retq
1302 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 24, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1303 ret <32 x i8> %shuffle
1306 define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_25_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
1307 ; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_25_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1309 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
1310 ; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
1311 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1312 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm2[9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8]
1313 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[7,7,7,7,7,7,0,7,7,7,7,7,7,7,7,7]
1314 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1317 ; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_25_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1319 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3]
1320 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,9,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
1323 ; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_25_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1324 ; AVX512VLBW: # %bb.0:
1325 ; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3]
1326 ; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,9,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
1327 ; AVX512VLBW-NEXT: retq
1329 ; AVX512VLVBMI-LABEL: shuffle_v32i8_00_00_00_00_00_00_25_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1330 ; AVX512VLVBMI: # %bb.0:
1331 ; AVX512VLVBMI-NEXT: vmovdqa {{.*#+}} xmm1 = [0,0,0,0,0,0,25,0,0,0,0,0,0,0,0,0]
1332 ; AVX512VLVBMI-NEXT: vpermb %ymm0, %ymm1, %ymm0
1333 ; AVX512VLVBMI-NEXT: retq
1335 ; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_25_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1337 ; XOPAVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
1338 ; XOPAVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
1339 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1340 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0],xmm2[9],xmm0[0,0,0,0,0,0,0,0,0]
1341 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1342 ; XOPAVX1-NEXT: retq
1344 ; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_25_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1346 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3]
1347 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,9,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
1348 ; XOPAVX2-NEXT: retq
1349 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 25, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1350 ret <32 x i8> %shuffle
1353 define <32 x i8> @shuffle_v32i8_00_00_00_00_00_26_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
1354 ; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_26_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1356 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
1357 ; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
1358 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1359 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm2[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
1360 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,6,6,6,6,0,6,6,6,6,6,6,6,6,6,6]
1361 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1364 ; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_26_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1366 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3]
1367 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,10,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
1370 ; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_26_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1371 ; AVX512VLBW: # %bb.0:
1372 ; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3]
1373 ; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,10,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
1374 ; AVX512VLBW-NEXT: retq
1376 ; AVX512VLVBMI-LABEL: shuffle_v32i8_00_00_00_00_00_26_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1377 ; AVX512VLVBMI: # %bb.0:
1378 ; AVX512VLVBMI-NEXT: vmovdqa {{.*#+}} xmm1 = [0,0,0,0,0,26,0,0,0,0,0,0,0,0,0,0]
1379 ; AVX512VLVBMI-NEXT: vpermb %ymm0, %ymm1, %ymm0
1380 ; AVX512VLVBMI-NEXT: retq
1382 ; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_26_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1384 ; XOPAVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
1385 ; XOPAVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
1386 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1387 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[0,0,0,0,0],xmm2[10],xmm0[0,0,0,0,0,0,0,0,0,0]
1388 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1389 ; XOPAVX1-NEXT: retq
1391 ; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_26_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1393 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3]
1394 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,10,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
1395 ; XOPAVX2-NEXT: retq
1396 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 26, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1397 ret <32 x i8> %shuffle
1400 define <32 x i8> @shuffle_v32i8_00_00_00_00_27_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
1401 ; AVX1-LABEL: shuffle_v32i8_00_00_00_00_27_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1403 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
1404 ; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
1405 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1406 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm2[11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10]
1407 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[5,5,5,5,0,5,5,5,5,5,5,5,5,5,5,5]
1408 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1411 ; AVX2-LABEL: shuffle_v32i8_00_00_00_00_27_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1413 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3]
1414 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,11,0,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
1417 ; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_27_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1418 ; AVX512VLBW: # %bb.0:
1419 ; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3]
1420 ; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,11,0,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
1421 ; AVX512VLBW-NEXT: retq
1423 ; AVX512VLVBMI-LABEL: shuffle_v32i8_00_00_00_00_27_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1424 ; AVX512VLVBMI: # %bb.0:
1425 ; AVX512VLVBMI-NEXT: vmovdqa {{.*#+}} xmm1 = [0,0,0,0,27,0,0,0,0,0,0,0,0,0,0,0]
1426 ; AVX512VLVBMI-NEXT: vpermb %ymm0, %ymm1, %ymm0
1427 ; AVX512VLVBMI-NEXT: retq
1429 ; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_27_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1431 ; XOPAVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
1432 ; XOPAVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
1433 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1434 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[0,0,0,0],xmm2[11],xmm0[0,0,0,0,0,0,0,0,0,0,0]
1435 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1436 ; XOPAVX1-NEXT: retq
1438 ; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_27_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1440 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3]
1441 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,11,0,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
1442 ; XOPAVX2-NEXT: retq
1443 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 27, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1444 ret <32 x i8> %shuffle
1447 define <32 x i8> @shuffle_v32i8_00_00_00_28_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
1448 ; AVX1-LABEL: shuffle_v32i8_00_00_00_28_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1450 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
1451 ; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
1452 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1453 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm2[12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11]
1454 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,4,4,0,4,4,4,4,4,4,4,4,4,4,4,4]
1455 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1458 ; AVX2-LABEL: shuffle_v32i8_00_00_00_28_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1460 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3]
1461 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,12,0,0,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
1464 ; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_28_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1465 ; AVX512VLBW: # %bb.0:
1466 ; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3]
1467 ; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,12,0,0,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
1468 ; AVX512VLBW-NEXT: retq
1470 ; AVX512VLVBMI-LABEL: shuffle_v32i8_00_00_00_28_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1471 ; AVX512VLVBMI: # %bb.0:
1472 ; AVX512VLVBMI-NEXT: vmovdqa {{.*#+}} xmm1 = [0,0,0,28,0,0,0,0,0,0,0,0,0,0,0,0]
1473 ; AVX512VLVBMI-NEXT: vpermb %ymm0, %ymm1, %ymm0
1474 ; AVX512VLVBMI-NEXT: retq
1476 ; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_28_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1478 ; XOPAVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
1479 ; XOPAVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
1480 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1481 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[0,0,0],xmm2[12],xmm0[0,0,0,0,0,0,0,0,0,0,0,0]
1482 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1483 ; XOPAVX1-NEXT: retq
1485 ; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_28_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1487 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3]
1488 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,12,0,0,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
1489 ; XOPAVX2-NEXT: retq
1490 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 28, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1491 ret <32 x i8> %shuffle
1494 define <32 x i8> @shuffle_v32i8_00_00_29_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
1495 ; AVX1-LABEL: shuffle_v32i8_00_00_29_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1497 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
1498 ; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
1499 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1500 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm2[13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12]
1501 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[3,3,0,3,3,3,3,3,3,3,3,3,3,3,3,3]
1502 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1505 ; AVX2-LABEL: shuffle_v32i8_00_00_29_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1507 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3]
1508 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,13,0,0,0,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
1511 ; AVX512VLBW-LABEL: shuffle_v32i8_00_00_29_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1512 ; AVX512VLBW: # %bb.0:
1513 ; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3]
1514 ; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,13,0,0,0,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
1515 ; AVX512VLBW-NEXT: retq
1517 ; AVX512VLVBMI-LABEL: shuffle_v32i8_00_00_29_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1518 ; AVX512VLVBMI: # %bb.0:
1519 ; AVX512VLVBMI-NEXT: vmovdqa {{.*#+}} xmm1 = [0,0,29,0,0,0,0,0,0,0,0,0,0,0,0,0]
1520 ; AVX512VLVBMI-NEXT: vpermb %ymm0, %ymm1, %ymm0
1521 ; AVX512VLVBMI-NEXT: retq
1523 ; XOPAVX1-LABEL: shuffle_v32i8_00_00_29_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1525 ; XOPAVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
1526 ; XOPAVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
1527 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1528 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[0,0],xmm2[13],xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0]
1529 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1530 ; XOPAVX1-NEXT: retq
1532 ; XOPAVX2-LABEL: shuffle_v32i8_00_00_29_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1534 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3]
1535 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,13,0,0,0,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
1536 ; XOPAVX2-NEXT: retq
1537 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 29, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1538 ret <32 x i8> %shuffle
1541 define <32 x i8> @shuffle_v32i8_00_30_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
1542 ; AVX1-LABEL: shuffle_v32i8_00_30_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1544 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
1545 ; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
1546 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1547 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm2[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
1548 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,0,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
1549 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1552 ; AVX2-LABEL: shuffle_v32i8_00_30_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1554 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3]
1555 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
1558 ; AVX512VLBW-LABEL: shuffle_v32i8_00_30_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1559 ; AVX512VLBW: # %bb.0:
1560 ; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3]
1561 ; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
1562 ; AVX512VLBW-NEXT: retq
1564 ; AVX512VLVBMI-LABEL: shuffle_v32i8_00_30_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1565 ; AVX512VLVBMI: # %bb.0:
1566 ; AVX512VLVBMI-NEXT: vmovdqa {{.*#+}} xmm1 = [0,30,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
1567 ; AVX512VLVBMI-NEXT: vpermb %ymm0, %ymm1, %ymm0
1568 ; AVX512VLVBMI-NEXT: retq
1570 ; XOPAVX1-LABEL: shuffle_v32i8_00_30_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1572 ; XOPAVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
1573 ; XOPAVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
1574 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1575 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[0],xmm2[14],xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0]
1576 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1577 ; XOPAVX1-NEXT: retq
1579 ; XOPAVX2-LABEL: shuffle_v32i8_00_30_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1581 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3]
1582 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
1583 ; XOPAVX2-NEXT: retq
1584 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 30, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1585 ret <32 x i8> %shuffle
1588 define <32 x i8> @shuffle_v32i8_31_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
1589 ; AVX1-LABEL: shuffle_v32i8_31_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1591 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
1592 ; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
1593 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1594 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm2[15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
1595 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
1596 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1599 ; AVX2-LABEL: shuffle_v32i8_31_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1601 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,0,0,3]
1602 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[7,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
1605 ; AVX512VLBW-LABEL: shuffle_v32i8_31_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1606 ; AVX512VLBW: # %bb.0:
1607 ; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,0,0,3]
1608 ; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[7,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
1609 ; AVX512VLBW-NEXT: retq
1611 ; AVX512VLVBMI-LABEL: shuffle_v32i8_31_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1612 ; AVX512VLVBMI: # %bb.0:
1613 ; AVX512VLVBMI-NEXT: vmovdqa {{.*#+}} xmm1 = [31,0,0,0]
1614 ; AVX512VLVBMI-NEXT: vpermb %ymm0, %ymm1, %ymm0
1615 ; AVX512VLVBMI-NEXT: retq
1617 ; XOPAVX1-LABEL: shuffle_v32i8_31_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1619 ; XOPAVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
1620 ; XOPAVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
1621 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1622 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm2[15],xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
1623 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1624 ; XOPAVX1-NEXT: retq
1626 ; XOPAVX2-LABEL: shuffle_v32i8_31_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
1628 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,0,0,3]
1629 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[7,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
1630 ; XOPAVX2-NEXT: retq
1631 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 31, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1632 ret <32 x i8> %shuffle
1635 define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16(<32 x i8> %a, <32 x i8> %b) {
1636 ; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16:
1638 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1639 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
1640 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
1641 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
1642 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1645 ; AVX2OR512VL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16:
1646 ; AVX2OR512VL: # %bb.0:
1647 ; AVX2OR512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
1648 ; AVX2OR512VL-NEXT: vpshufb %ymm1, %ymm0, %ymm0
1649 ; AVX2OR512VL-NEXT: retq
1651 ; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16:
1653 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1654 ; XOPAVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
1655 ; XOPAVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
1656 ; XOPAVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
1657 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1658 ; XOPAVX1-NEXT: retq
1660 ; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16:
1662 ; XOPAVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
1663 ; XOPAVX2-NEXT: vpshufb %ymm1, %ymm0, %ymm0
1664 ; XOPAVX2-NEXT: retq
1665 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
1666 ret <32 x i8> %shuffle
1669 define <32 x i8> @shuffle_v32i8_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15_31_31_31_31_31_31_31_31_31_31_31_31_31_31_31_31(<32 x i8> %a, <32 x i8> %b) {
1670 ; AVX1-LABEL: shuffle_v32i8_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15_31_31_31_31_31_31_31_31_31_31_31_31_31_31_31_31:
1672 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1673 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1674 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
1675 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
1676 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1679 ; AVX2OR512VL-LABEL: shuffle_v32i8_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15_31_31_31_31_31_31_31_31_31_31_31_31_31_31_31_31:
1680 ; AVX2OR512VL: # %bb.0:
1681 ; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31]
1682 ; AVX2OR512VL-NEXT: retq
1684 ; XOPAVX1-LABEL: shuffle_v32i8_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15_31_31_31_31_31_31_31_31_31_31_31_31_31_31_31_31:
1686 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1687 ; XOPAVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
1688 ; XOPAVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
1689 ; XOPAVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
1690 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1691 ; XOPAVX1-NEXT: retq
1693 ; XOPAVX2-LABEL: shuffle_v32i8_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15_31_31_31_31_31_31_31_31_31_31_31_31_31_31_31_31:
1695 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31]
1696 ; XOPAVX2-NEXT: retq
1697 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
1698 ret <32 x i8> %shuffle
1701 define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24(<32 x i8> %a, <32 x i8> %b) {
1702 ; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24:
1704 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1705 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8]
1706 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
1707 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
1708 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1711 ; AVX2OR512VL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24:
1712 ; AVX2OR512VL: # %bb.0:
1713 ; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8,16,16,16,16,16,16,16,16,24,24,24,24,24,24,24,24]
1714 ; AVX2OR512VL-NEXT: retq
1716 ; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24:
1718 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1719 ; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8]
1720 ; XOPAVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
1721 ; XOPAVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
1722 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1723 ; XOPAVX1-NEXT: retq
1725 ; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24:
1727 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8,16,16,16,16,16,16,16,16,24,24,24,24,24,24,24,24]
1728 ; XOPAVX2-NEXT: retq
1729 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24>
1730 ret <32 x i8> %shuffle
1733 define <32 x i8> @shuffle_v32i8_07_07_07_07_07_07_07_07_15_15_15_15_15_15_15_15_23_23_23_23_23_23_23_23_31_31_31_31_31_31_31_31(<32 x i8> %a, <32 x i8> %b) {
1734 ; AVX1-LABEL: shuffle_v32i8_07_07_07_07_07_07_07_07_15_15_15_15_15_15_15_15_23_23_23_23_23_23_23_23_31_31_31_31_31_31_31_31:
1736 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1737 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,15,15,15,15,15,15,15,15]
1738 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
1739 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
1740 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1743 ; AVX2OR512VL-LABEL: shuffle_v32i8_07_07_07_07_07_07_07_07_15_15_15_15_15_15_15_15_23_23_23_23_23_23_23_23_31_31_31_31_31_31_31_31:
1744 ; AVX2OR512VL: # %bb.0:
1745 ; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[7,7,7,7,7,7,7,7,15,15,15,15,15,15,15,15,23,23,23,23,23,23,23,23,31,31,31,31,31,31,31,31]
1746 ; AVX2OR512VL-NEXT: retq
1748 ; XOPAVX1-LABEL: shuffle_v32i8_07_07_07_07_07_07_07_07_15_15_15_15_15_15_15_15_23_23_23_23_23_23_23_23_31_31_31_31_31_31_31_31:
1750 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1751 ; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,15,15,15,15,15,15,15,15]
1752 ; XOPAVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
1753 ; XOPAVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
1754 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1755 ; XOPAVX1-NEXT: retq
1757 ; XOPAVX2-LABEL: shuffle_v32i8_07_07_07_07_07_07_07_07_15_15_15_15_15_15_15_15_23_23_23_23_23_23_23_23_31_31_31_31_31_31_31_31:
1759 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[7,7,7,7,7,7,7,7,15,15,15,15,15,15,15,15,23,23,23,23,23,23,23,23,31,31,31,31,31,31,31,31]
1760 ; XOPAVX2-NEXT: retq
1761 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
1762 ret <32 x i8> %shuffle
1765 define <32 x i8> @shuffle_v32i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12_16_16_16_16_20_20_20_20_24_24_24_24_28_28_28_28(<32 x i8> %a, <32 x i8> %b) {
1766 ; AVX1-LABEL: shuffle_v32i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12_16_16_16_16_20_20_20_20_24_24_24_24_28_28_28_28:
1768 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1769 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,0,0,0,4,4,4,4,8,8,8,8,12,12,12,12]
1770 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
1771 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
1772 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1775 ; AVX2OR512VL-LABEL: shuffle_v32i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12_16_16_16_16_20_20_20_20_24_24_24_24_28_28_28_28:
1776 ; AVX2OR512VL: # %bb.0:
1777 ; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4,8,8,8,8,12,12,12,12,16,16,16,16,20,20,20,20,24,24,24,24,28,28,28,28]
1778 ; AVX2OR512VL-NEXT: retq
1780 ; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12_16_16_16_16_20_20_20_20_24_24_24_24_28_28_28_28:
1782 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1783 ; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,0,0,0,4,4,4,4,8,8,8,8,12,12,12,12]
1784 ; XOPAVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
1785 ; XOPAVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
1786 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1787 ; XOPAVX1-NEXT: retq
1789 ; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12_16_16_16_16_20_20_20_20_24_24_24_24_28_28_28_28:
1791 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4,8,8,8,8,12,12,12,12,16,16,16,16,20,20,20,20,24,24,24,24,28,28,28,28]
1792 ; XOPAVX2-NEXT: retq
1793 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 8, i32 8, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12, i32 16, i32 16, i32 16, i32 16, i32 20, i32 20, i32 20, i32 20, i32 24, i32 24, i32 24, i32 24, i32 28, i32 28, i32 28, i32 28>
1794 ret <32 x i8> %shuffle
1797 define <32 x i8> @shuffle_v32i8_03_03_03_03_07_07_07_07_11_11_11_11_15_15_15_15_19_19_19_19_23_23_23_23_27_27_27_27_31_31_31_31(<32 x i8> %a, <32 x i8> %b) {
1798 ; AVX1-LABEL: shuffle_v32i8_03_03_03_03_07_07_07_07_11_11_11_11_15_15_15_15_19_19_19_19_23_23_23_23_27_27_27_27_31_31_31_31:
1800 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1801 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [3,3,3,3,7,7,7,7,11,11,11,11,15,15,15,15]
1802 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
1803 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
1804 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1807 ; AVX2OR512VL-LABEL: shuffle_v32i8_03_03_03_03_07_07_07_07_11_11_11_11_15_15_15_15_19_19_19_19_23_23_23_23_27_27_27_27_31_31_31_31:
1808 ; AVX2OR512VL: # %bb.0:
1809 ; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[3,3,3,3,7,7,7,7,11,11,11,11,15,15,15,15,19,19,19,19,23,23,23,23,27,27,27,27,31,31,31,31]
1810 ; AVX2OR512VL-NEXT: retq
1812 ; XOPAVX1-LABEL: shuffle_v32i8_03_03_03_03_07_07_07_07_11_11_11_11_15_15_15_15_19_19_19_19_23_23_23_23_27_27_27_27_31_31_31_31:
1814 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1815 ; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [3,3,3,3,7,7,7,7,11,11,11,11,15,15,15,15]
1816 ; XOPAVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
1817 ; XOPAVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
1818 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1819 ; XOPAVX1-NEXT: retq
1821 ; XOPAVX2-LABEL: shuffle_v32i8_03_03_03_03_07_07_07_07_11_11_11_11_15_15_15_15_19_19_19_19_23_23_23_23_27_27_27_27_31_31_31_31:
1823 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[3,3,3,3,7,7,7,7,11,11,11,11,15,15,15,15,19,19,19,19,23,23,23,23,27,27,27,27,31,31,31,31]
1824 ; XOPAVX2-NEXT: retq
1825 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 7, i32 7, i32 7, i32 7, i32 11, i32 11, i32 11, i32 11, i32 15, i32 15, i32 15, i32 15, i32 19, i32 19, i32 19, i32 19, i32 23, i32 23, i32 23, i32 23, i32 27, i32 27, i32 27, i32 27, i32 31, i32 31, i32 31, i32 31>
1826 ret <32 x i8> %shuffle
1829 define <32 x i8> @shuffle_v32i8_00_00_02_02_04_04_06_06_08_08_10_10_12_12_14_14_16_16_18_18_20_20_22_22_24_24_26_26_28_28_30_30(<32 x i8> %a, <32 x i8> %b) {
1830 ; AVX1-LABEL: shuffle_v32i8_00_00_02_02_04_04_06_06_08_08_10_10_12_12_14_14_16_16_18_18_20_20_22_22_24_24_26_26_28_28_30_30:
1832 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1833 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
1834 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
1835 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
1836 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1839 ; AVX2OR512VL-LABEL: shuffle_v32i8_00_00_02_02_04_04_06_06_08_08_10_10_12_12_14_14_16_16_18_18_20_20_22_22_24_24_26_26_28_28_30_30:
1840 ; AVX2OR512VL: # %bb.0:
1841 ; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14,16,16,18,18,20,20,22,22,24,24,26,26,28,28,30,30]
1842 ; AVX2OR512VL-NEXT: retq
1844 ; XOPAVX1-LABEL: shuffle_v32i8_00_00_02_02_04_04_06_06_08_08_10_10_12_12_14_14_16_16_18_18_20_20_22_22_24_24_26_26_28_28_30_30:
1846 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1847 ; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
1848 ; XOPAVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
1849 ; XOPAVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
1850 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1851 ; XOPAVX1-NEXT: retq
1853 ; XOPAVX2-LABEL: shuffle_v32i8_00_00_02_02_04_04_06_06_08_08_10_10_12_12_14_14_16_16_18_18_20_20_22_22_24_24_26_26_28_28_30_30:
1855 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14,16,16,18,18,20,20,22,22,24,24,26,26,28,28,30,30]
1856 ; XOPAVX2-NEXT: retq
1857 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14, i32 16, i32 16, i32 18, i32 18, i32 20, i32 20, i32 22, i32 22, i32 24, i32 24, i32 26, i32 26, i32 28, i32 28, i32 30, i32 30>
1858 ret <32 x i8> %shuffle
1861 define <32 x i8> @shuffle_v32i8_01_01_03_03_05_05_07_07_09_09_11_11_13_13_15_15_17_17_19_19_21_21_23_23_25_25_27_27_29_29_31_31(<32 x i8> %a, <32 x i8> %b) {
1862 ; AVX1-LABEL: shuffle_v32i8_01_01_03_03_05_05_07_07_09_09_11_11_13_13_15_15_17_17_19_19_21_21_23_23_25_25_27_27_29_29_31_31:
1864 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1865 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
1866 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
1867 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
1868 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1871 ; AVX2OR512VL-LABEL: shuffle_v32i8_01_01_03_03_05_05_07_07_09_09_11_11_13_13_15_15_17_17_19_19_21_21_23_23_25_25_27_27_29_29_31_31:
1872 ; AVX2OR512VL: # %bb.0:
1873 ; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15,17,17,19,19,21,21,23,23,25,25,27,27,29,29,31,31]
1874 ; AVX2OR512VL-NEXT: retq
1876 ; XOPAVX1-LABEL: shuffle_v32i8_01_01_03_03_05_05_07_07_09_09_11_11_13_13_15_15_17_17_19_19_21_21_23_23_25_25_27_27_29_29_31_31:
1878 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1879 ; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
1880 ; XOPAVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
1881 ; XOPAVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
1882 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1883 ; XOPAVX1-NEXT: retq
1885 ; XOPAVX2-LABEL: shuffle_v32i8_01_01_03_03_05_05_07_07_09_09_11_11_13_13_15_15_17_17_19_19_21_21_23_23_25_25_27_27_29_29_31_31:
1887 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15,17,17,19,19,21,21,23,23,25,25,27,27,29,29,31,31]
1888 ; XOPAVX2-NEXT: retq
1889 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7, i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15, i32 15, i32 17, i32 17, i32 19, i32 19, i32 21, i32 21, i32 23, i32 23, i32 25, i32 25, i32 27, i32 27, i32 29, i32 29, i32 31, i32 31>
1890 ret <32 x i8> %shuffle
1893 define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00(<32 x i8> %a, <32 x i8> %b) {
1894 ; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00:
1896 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0]
1897 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
1900 ; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00:
1902 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0]
1903 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
1906 ; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00:
1907 ; AVX512VLBW: # %bb.0:
1908 ; AVX512VLBW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0]
1909 ; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
1910 ; AVX512VLBW-NEXT: retq
1912 ; AVX512VLVBMI-SLOW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00:
1913 ; AVX512VLVBMI-SLOW: # %bb.0:
1914 ; AVX512VLVBMI-SLOW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0]
1915 ; AVX512VLVBMI-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
1916 ; AVX512VLVBMI-SLOW-NEXT: retq
1918 ; AVX512VLVBMI-FAST-ALL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00:
1919 ; AVX512VLVBMI-FAST-ALL: # %bb.0:
1920 ; AVX512VLVBMI-FAST-ALL-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0]
1921 ; AVX512VLVBMI-FAST-ALL-NEXT: # ymm1 = mem[0,1,0,1]
1922 ; AVX512VLVBMI-FAST-ALL-NEXT: vpermb %ymm0, %ymm1, %ymm0
1923 ; AVX512VLVBMI-FAST-ALL-NEXT: retq
1925 ; AVX512VLVBMI-FAST-PERLANE-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00:
1926 ; AVX512VLVBMI-FAST-PERLANE: # %bb.0:
1927 ; AVX512VLVBMI-FAST-PERLANE-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0]
1928 ; AVX512VLVBMI-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
1929 ; AVX512VLVBMI-FAST-PERLANE-NEXT: retq
1931 ; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00:
1933 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0]
1934 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
1935 ; XOPAVX1-NEXT: retq
1937 ; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00:
1939 ; XOPAVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0]
1940 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
1941 ; XOPAVX2-NEXT: retq
1942 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0>
1943 ret <32 x i8> %shuffle
1946 define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00(<32 x i8> %a, <32 x i8> %b) {
1947 ; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00:
1949 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0]
1950 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
1953 ; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00:
1955 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0]
1956 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
1959 ; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00:
1960 ; AVX512VLBW: # %bb.0:
1961 ; AVX512VLBW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0]
1962 ; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
1963 ; AVX512VLBW-NEXT: retq
1965 ; AVX512VLVBMI-SLOW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00:
1966 ; AVX512VLVBMI-SLOW: # %bb.0:
1967 ; AVX512VLVBMI-SLOW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0]
1968 ; AVX512VLVBMI-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
1969 ; AVX512VLVBMI-SLOW-NEXT: retq
1971 ; AVX512VLVBMI-FAST-ALL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00:
1972 ; AVX512VLVBMI-FAST-ALL: # %bb.0:
1973 ; AVX512VLVBMI-FAST-ALL-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0]
1974 ; AVX512VLVBMI-FAST-ALL-NEXT: # ymm1 = mem[0,1,0,1]
1975 ; AVX512VLVBMI-FAST-ALL-NEXT: vpermb %ymm0, %ymm1, %ymm0
1976 ; AVX512VLVBMI-FAST-ALL-NEXT: retq
1978 ; AVX512VLVBMI-FAST-PERLANE-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00:
1979 ; AVX512VLVBMI-FAST-PERLANE: # %bb.0:
1980 ; AVX512VLVBMI-FAST-PERLANE-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0]
1981 ; AVX512VLVBMI-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
1982 ; AVX512VLVBMI-FAST-PERLANE-NEXT: retq
1984 ; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00:
1986 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0]
1987 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
1988 ; XOPAVX1-NEXT: retq
1990 ; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00:
1992 ; XOPAVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0]
1993 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
1994 ; XOPAVX2-NEXT: retq
1995 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0>
1996 ret <32 x i8> %shuffle
1999 define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
2000 ; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00:
2002 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0]
2003 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
2006 ; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00:
2008 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0]
2009 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
2012 ; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00:
2013 ; AVX512VLBW: # %bb.0:
2014 ; AVX512VLBW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0]
2015 ; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
2016 ; AVX512VLBW-NEXT: retq
2018 ; AVX512VLVBMI-SLOW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00:
2019 ; AVX512VLVBMI-SLOW: # %bb.0:
2020 ; AVX512VLVBMI-SLOW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0]
2021 ; AVX512VLVBMI-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
2022 ; AVX512VLVBMI-SLOW-NEXT: retq
2024 ; AVX512VLVBMI-FAST-ALL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00:
2025 ; AVX512VLVBMI-FAST-ALL: # %bb.0:
2026 ; AVX512VLVBMI-FAST-ALL-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0]
2027 ; AVX512VLVBMI-FAST-ALL-NEXT: # ymm1 = mem[0,1,0,1]
2028 ; AVX512VLVBMI-FAST-ALL-NEXT: vpermb %ymm0, %ymm1, %ymm0
2029 ; AVX512VLVBMI-FAST-ALL-NEXT: retq
2031 ; AVX512VLVBMI-FAST-PERLANE-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00:
2032 ; AVX512VLVBMI-FAST-PERLANE: # %bb.0:
2033 ; AVX512VLVBMI-FAST-PERLANE-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0]
2034 ; AVX512VLVBMI-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
2035 ; AVX512VLVBMI-FAST-PERLANE-NEXT: retq
2037 ; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00:
2039 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0]
2040 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
2041 ; XOPAVX1-NEXT: retq
2043 ; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00:
2045 ; XOPAVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0]
2046 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
2047 ; XOPAVX2-NEXT: retq
2048 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
2049 ret <32 x i8> %shuffle
2052 define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
2053 ; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00:
2055 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0]
2056 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
2059 ; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00:
2061 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0]
2062 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
2065 ; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00:
2066 ; AVX512VLBW: # %bb.0:
2067 ; AVX512VLBW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0]
2068 ; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
2069 ; AVX512VLBW-NEXT: retq
2071 ; AVX512VLVBMI-SLOW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00:
2072 ; AVX512VLVBMI-SLOW: # %bb.0:
2073 ; AVX512VLVBMI-SLOW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0]
2074 ; AVX512VLVBMI-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
2075 ; AVX512VLVBMI-SLOW-NEXT: retq
2077 ; AVX512VLVBMI-FAST-ALL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00:
2078 ; AVX512VLVBMI-FAST-ALL: # %bb.0:
2079 ; AVX512VLVBMI-FAST-ALL-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0]
2080 ; AVX512VLVBMI-FAST-ALL-NEXT: # ymm1 = mem[0,1,0,1]
2081 ; AVX512VLVBMI-FAST-ALL-NEXT: vpermb %ymm0, %ymm1, %ymm0
2082 ; AVX512VLVBMI-FAST-ALL-NEXT: retq
2084 ; AVX512VLVBMI-FAST-PERLANE-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00:
2085 ; AVX512VLVBMI-FAST-PERLANE: # %bb.0:
2086 ; AVX512VLVBMI-FAST-PERLANE-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0]
2087 ; AVX512VLVBMI-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
2088 ; AVX512VLVBMI-FAST-PERLANE-NEXT: retq
2090 ; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00:
2092 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0]
2093 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
2094 ; XOPAVX1-NEXT: retq
2096 ; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00:
2098 ; XOPAVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0]
2099 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
2100 ; XOPAVX2-NEXT: retq
2101 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
2102 ret <32 x i8> %shuffle
2105 define <32 x i8> @shuffle_v32i8_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
2106 ; AVX1-LABEL: shuffle_v32i8_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
2108 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
2109 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
2112 ; AVX2-LABEL: shuffle_v32i8_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
2114 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
2115 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
2118 ; AVX512VLBW-LABEL: shuffle_v32i8_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
2119 ; AVX512VLBW: # %bb.0:
2120 ; AVX512VLBW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
2121 ; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
2122 ; AVX512VLBW-NEXT: retq
2124 ; AVX512VLVBMI-SLOW-LABEL: shuffle_v32i8_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
2125 ; AVX512VLVBMI-SLOW: # %bb.0:
2126 ; AVX512VLVBMI-SLOW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
2127 ; AVX512VLVBMI-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
2128 ; AVX512VLVBMI-SLOW-NEXT: retq
2130 ; AVX512VLVBMI-FAST-ALL-LABEL: shuffle_v32i8_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
2131 ; AVX512VLVBMI-FAST-ALL: # %bb.0:
2132 ; AVX512VLVBMI-FAST-ALL-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
2133 ; AVX512VLVBMI-FAST-ALL-NEXT: # ymm1 = mem[0,1,0,1]
2134 ; AVX512VLVBMI-FAST-ALL-NEXT: vpermb %ymm0, %ymm1, %ymm0
2135 ; AVX512VLVBMI-FAST-ALL-NEXT: retq
2137 ; AVX512VLVBMI-FAST-PERLANE-LABEL: shuffle_v32i8_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
2138 ; AVX512VLVBMI-FAST-PERLANE: # %bb.0:
2139 ; AVX512VLVBMI-FAST-PERLANE-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
2140 ; AVX512VLVBMI-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
2141 ; AVX512VLVBMI-FAST-PERLANE-NEXT: retq
2143 ; XOPAVX1-LABEL: shuffle_v32i8_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
2145 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
2146 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
2147 ; XOPAVX1-NEXT: retq
2149 ; XOPAVX2-LABEL: shuffle_v32i8_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
2151 ; XOPAVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
2152 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
2153 ; XOPAVX2-NEXT: retq
2154 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 14, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 14, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
2155 ret <32 x i8> %shuffle
2158 define <32 x i8> @shuffle_v32i8_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
2159 ; AVX1-LABEL: shuffle_v32i8_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
2161 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
2162 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
2165 ; AVX2-LABEL: shuffle_v32i8_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
2167 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
2168 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
2171 ; AVX512VLBW-LABEL: shuffle_v32i8_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
2172 ; AVX512VLBW: # %bb.0:
2173 ; AVX512VLBW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
2174 ; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
2175 ; AVX512VLBW-NEXT: retq
2177 ; AVX512VLVBMI-SLOW-LABEL: shuffle_v32i8_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
2178 ; AVX512VLVBMI-SLOW: # %bb.0:
2179 ; AVX512VLVBMI-SLOW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
2180 ; AVX512VLVBMI-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
2181 ; AVX512VLVBMI-SLOW-NEXT: retq
2183 ; AVX512VLVBMI-FAST-ALL-LABEL: shuffle_v32i8_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
2184 ; AVX512VLVBMI-FAST-ALL: # %bb.0:
2185 ; AVX512VLVBMI-FAST-ALL-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
2186 ; AVX512VLVBMI-FAST-ALL-NEXT: # ymm1 = mem[0,1,0,1]
2187 ; AVX512VLVBMI-FAST-ALL-NEXT: vpermb %ymm0, %ymm1, %ymm0
2188 ; AVX512VLVBMI-FAST-ALL-NEXT: retq
2190 ; AVX512VLVBMI-FAST-PERLANE-LABEL: shuffle_v32i8_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
2191 ; AVX512VLVBMI-FAST-PERLANE: # %bb.0:
2192 ; AVX512VLVBMI-FAST-PERLANE-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
2193 ; AVX512VLVBMI-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
2194 ; AVX512VLVBMI-FAST-PERLANE-NEXT: retq
2196 ; XOPAVX1-LABEL: shuffle_v32i8_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
2198 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
2199 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
2200 ; XOPAVX1-NEXT: retq
2202 ; XOPAVX2-LABEL: shuffle_v32i8_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
2204 ; XOPAVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
2205 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
2206 ; XOPAVX2-NEXT: retq
2207 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
2208 ret <32 x i8> %shuffle
2211 define <32 x i8> @shuffle_v32i8_00_33_02_35_04_37_06_39_08_41_10_43_12_45_14_47_16_49_18_51_20_53_22_55_24_57_26_59_28_61_30_63(<32 x i8> %a, <32 x i8> %b) {
2212 ; AVX1-LABEL: shuffle_v32i8_00_33_02_35_04_37_06_39_08_41_10_43_12_45_14_47_16_49_18_51_20_53_22_55_24_57_26_59_28_61_30_63:
2214 ; AVX1-NEXT: vbroadcastss {{.*#+}} ymm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
2215 ; AVX1-NEXT: vandnps %ymm1, %ymm2, %ymm1
2216 ; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
2217 ; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
2220 ; AVX2-LABEL: shuffle_v32i8_00_33_02_35_04_37_06_39_08_41_10_43_12_45_14_47_16_49_18_51_20_53_22_55_24_57_26_59_28_61_30_63:
2222 ; AVX2-NEXT: vpbroadcastw {{.*#+}} ymm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
2223 ; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
2226 ; AVX512VL-LABEL: shuffle_v32i8_00_33_02_35_04_37_06_39_08_41_10_43_12_45_14_47_16_49_18_51_20_53_22_55_24_57_26_59_28_61_30_63:
2227 ; AVX512VL: # %bb.0:
2228 ; AVX512VL-NEXT: movl $-1431655766, %eax # imm = 0xAAAAAAAA
2229 ; AVX512VL-NEXT: kmovd %eax, %k1
2230 ; AVX512VL-NEXT: vmovdqu8 %ymm1, %ymm0 {%k1}
2231 ; AVX512VL-NEXT: retq
2233 ; XOPAVX1-LABEL: shuffle_v32i8_00_33_02_35_04_37_06_39_08_41_10_43_12_45_14_47_16_49_18_51_20_53_22_55_24_57_26_59_28_61_30_63:
2235 ; XOPAVX1-NEXT: vpcmov {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0, %ymm0
2236 ; XOPAVX1-NEXT: retq
2238 ; XOPAVX2-LABEL: shuffle_v32i8_00_33_02_35_04_37_06_39_08_41_10_43_12_45_14_47_16_49_18_51_20_53_22_55_24_57_26_59_28_61_30_63:
2240 ; XOPAVX2-NEXT: vpbroadcastw {{.*#+}} ymm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
2241 ; XOPAVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
2242 ; XOPAVX2-NEXT: retq
2243 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 33, i32 2, i32 35, i32 4, i32 37, i32 6, i32 39, i32 8, i32 41, i32 10, i32 43, i32 12, i32 45, i32 14, i32 47, i32 16, i32 49, i32 18, i32 51, i32 20, i32 53, i32 22, i32 55, i32 24, i32 57, i32 26, i32 59, i32 28, i32 61, i32 30, i32 63>
2244 ret <32 x i8> %shuffle
2247 define <32 x i8> @shuffle_v32i8_32_01_34_03_36_05_38_07_40_09_42_11_44_13_46_15_48_17_50_19_52_21_54_23_56_25_58_27_60_29_62_31(<32 x i8> %a, <32 x i8> %b) {
2248 ; AVX1-LABEL: shuffle_v32i8_32_01_34_03_36_05_38_07_40_09_42_11_44_13_46_15_48_17_50_19_52_21_54_23_56_25_58_27_60_29_62_31:
2250 ; AVX1-NEXT: vbroadcastss {{.*#+}} ymm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
2251 ; AVX1-NEXT: vandnps %ymm0, %ymm2, %ymm0
2252 ; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
2253 ; AVX1-NEXT: vorps %ymm0, %ymm1, %ymm0
2256 ; AVX2-LABEL: shuffle_v32i8_32_01_34_03_36_05_38_07_40_09_42_11_44_13_46_15_48_17_50_19_52_21_54_23_56_25_58_27_60_29_62_31:
2258 ; AVX2-NEXT: vpbroadcastw {{.*#+}} ymm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
2259 ; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
2262 ; AVX512VL-LABEL: shuffle_v32i8_32_01_34_03_36_05_38_07_40_09_42_11_44_13_46_15_48_17_50_19_52_21_54_23_56_25_58_27_60_29_62_31:
2263 ; AVX512VL: # %bb.0:
2264 ; AVX512VL-NEXT: movl $-1431655766, %eax # imm = 0xAAAAAAAA
2265 ; AVX512VL-NEXT: kmovd %eax, %k1
2266 ; AVX512VL-NEXT: vpblendmb %ymm0, %ymm1, %ymm0 {%k1}
2267 ; AVX512VL-NEXT: retq
2269 ; XOPAVX1-LABEL: shuffle_v32i8_32_01_34_03_36_05_38_07_40_09_42_11_44_13_46_15_48_17_50_19_52_21_54_23_56_25_58_27_60_29_62_31:
2271 ; XOPAVX1-NEXT: vpcmov {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1, %ymm0
2272 ; XOPAVX1-NEXT: retq
2274 ; XOPAVX2-LABEL: shuffle_v32i8_32_01_34_03_36_05_38_07_40_09_42_11_44_13_46_15_48_17_50_19_52_21_54_23_56_25_58_27_60_29_62_31:
2276 ; XOPAVX2-NEXT: vpbroadcastw {{.*#+}} ymm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
2277 ; XOPAVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
2278 ; XOPAVX2-NEXT: retq
2279 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 32, i32 1, i32 34, i32 3, i32 36, i32 5, i32 38, i32 7, i32 40, i32 9, i32 42, i32 11, i32 44, i32 13, i32 46, i32 15, i32 48, i32 17, i32 50, i32 19, i32 52, i32 21, i32 54, i32 23, i32 56, i32 25, i32 58, i32 27, i32 60, i32 29, i32 62, i32 31>
2280 ret <32 x i8> %shuffle
2283 ; PR27780 - https://bugs.llvm.org/show_bug.cgi?id=27780
2285 define <32 x i8> @load_fold_pblendvb(ptr %px, <32 x i8> %y) {
2286 ; AVX1-LABEL: load_fold_pblendvb:
2288 ; AVX1-NEXT: vbroadcastsd {{.*#+}} ymm1 = [255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255]
2289 ; AVX1-NEXT: vandnps (%rdi), %ymm1, %ymm2
2290 ; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
2291 ; AVX1-NEXT: vorps %ymm2, %ymm0, %ymm0
2294 ; AVX2-LABEL: load_fold_pblendvb:
2296 ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [0,0,255,0,255,255,255,0,0,0,255,0,255,255,255,0,0,0,255,0,255,255,255,0,0,0,255,0,255,255,255,0]
2297 ; AVX2-NEXT: vpblendvb %ymm1, (%rdi), %ymm0, %ymm0
2300 ; AVX512VL-LABEL: load_fold_pblendvb:
2301 ; AVX512VL: # %bb.0:
2302 ; AVX512VL-NEXT: movl $1953789044, %eax # imm = 0x74747474
2303 ; AVX512VL-NEXT: kmovd %eax, %k1
2304 ; AVX512VL-NEXT: vmovdqu8 (%rdi), %ymm0 {%k1}
2305 ; AVX512VL-NEXT: retq
2307 ; XOPAVX1-LABEL: load_fold_pblendvb:
2309 ; XOPAVX1-NEXT: vbroadcastsd {{.*#+}} ymm1 = [255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255]
2310 ; XOPAVX1-NEXT: vpcmov %ymm1, (%rdi), %ymm0, %ymm0
2311 ; XOPAVX1-NEXT: retq
2313 ; XOPAVX2-LABEL: load_fold_pblendvb:
2315 ; XOPAVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [0,0,255,0,255,255,255,0,0,0,255,0,255,255,255,0,0,0,255,0,255,255,255,0,0,0,255,0,255,255,255,0]
2316 ; XOPAVX2-NEXT: vpblendvb %ymm1, (%rdi), %ymm0, %ymm0
2317 ; XOPAVX2-NEXT: retq
2318 %x = load <32 x i8>, ptr %px, align 32
2319 %select = shufflevector <32 x i8> %x, <32 x i8> %y, <32 x i32> <i32 32, i32 33, i32 2, i32 35, i32 4, i32 5, i32 6, i32 39, i32 40, i32 41, i32 10, i32 43, i32 12, i32 13, i32 14, i32 47, i32 48, i32 49, i32 18, i32 51, i32 20, i32 21, i32 22, i32 55, i32 56, i32 57, i32 26, i32 59, i32 28, i32 29, i32 30, i32 63>
2320 ret <32 x i8> %select
2323 define <32 x i8> @load_fold_pblendvb_commute(ptr %px, <32 x i8> %y) {
2324 ; AVX1-LABEL: load_fold_pblendvb_commute:
2326 ; AVX1-NEXT: vbroadcastsd {{.*#+}} ymm1 = [255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255]
2327 ; AVX1-NEXT: vandnps %ymm0, %ymm1, %ymm0
2328 ; AVX1-NEXT: vandps (%rdi), %ymm1, %ymm1
2329 ; AVX1-NEXT: vorps %ymm0, %ymm1, %ymm0
2332 ; AVX2-LABEL: load_fold_pblendvb_commute:
2334 ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255]
2335 ; AVX2-NEXT: vpblendvb %ymm1, (%rdi), %ymm0, %ymm0
2338 ; AVX512VL-LABEL: load_fold_pblendvb_commute:
2339 ; AVX512VL: # %bb.0:
2340 ; AVX512VL-NEXT: vmovdqa (%rdi), %ymm1
2341 ; AVX512VL-NEXT: movl $1953789044, %eax # imm = 0x74747474
2342 ; AVX512VL-NEXT: kmovd %eax, %k1
2343 ; AVX512VL-NEXT: vmovdqu8 %ymm0, %ymm1 {%k1}
2344 ; AVX512VL-NEXT: vmovdqa %ymm1, %ymm0
2345 ; AVX512VL-NEXT: retq
2347 ; XOPAVX1-LABEL: load_fold_pblendvb_commute:
2349 ; XOPAVX1-NEXT: vmovdqa (%rdi), %ymm1
2350 ; XOPAVX1-NEXT: vbroadcastsd {{.*#+}} ymm2 = [255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255]
2351 ; XOPAVX1-NEXT: vpcmov %ymm2, %ymm0, %ymm1, %ymm0
2352 ; XOPAVX1-NEXT: retq
2354 ; XOPAVX2-LABEL: load_fold_pblendvb_commute:
2356 ; XOPAVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255]
2357 ; XOPAVX2-NEXT: vpblendvb %ymm1, (%rdi), %ymm0, %ymm0
2358 ; XOPAVX2-NEXT: retq
2359 %x = load <32 x i8>, ptr %px, align 32
2360 %select = shufflevector <32 x i8> %y, <32 x i8> %x, <32 x i32> <i32 32, i32 33, i32 2, i32 35, i32 4, i32 5, i32 6, i32 39, i32 40, i32 41, i32 10, i32 43, i32 12, i32 13, i32 14, i32 47, i32 48, i32 49, i32 18, i32 51, i32 20, i32 21, i32 22, i32 55, i32 56, i32 57, i32 26, i32 59, i32 28, i32 29, i32 30, i32 63>
2361 ret <32 x i8> %select
2364 define <32 x i8> @shuffle_v32i8_zz_01_zz_03_zz_05_zz_07_zz_09_zz_11_zz_13_zz_15_zz_17_zz_19_zz_21_zz_23_zz_25_zz_27_zz_29_zz_31(<32 x i8> %a) {
2365 ; ALL-LABEL: shuffle_v32i8_zz_01_zz_03_zz_05_zz_07_zz_09_zz_11_zz_13_zz_15_zz_17_zz_19_zz_21_zz_23_zz_25_zz_27_zz_29_zz_31:
2367 ; ALL-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
2369 %shuffle = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 32, i32 1, i32 34, i32 3, i32 36, i32 5, i32 38, i32 7, i32 40, i32 9, i32 42, i32 11, i32 44, i32 13, i32 46, i32 15, i32 48, i32 17, i32 50, i32 19, i32 52, i32 21, i32 54, i32 23, i32 56, i32 25, i32 58, i32 27, i32 60, i32 29, i32 62, i32 31>
2370 ret <32 x i8> %shuffle
2373 define <32 x i8> @shuffle_v32i8_01_zz_02_zz_04_uu_06_07_08_09_10_11_12_13_14_15_u6_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31(<32 x i8> %a) {
2374 ; AVX1-LABEL: shuffle_v32i8_01_zz_02_zz_04_uu_06_07_08_09_10_11_12_13_14_15_u6_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31:
2376 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[1],zero,xmm0[2],zero,xmm0[4,u,6,7,8,9,10,11,12,13,14,15]
2377 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
2380 ; AVX2OR512VL-LABEL: shuffle_v32i8_01_zz_02_zz_04_uu_06_07_08_09_10_11_12_13_14_15_u6_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31:
2381 ; AVX2OR512VL: # %bb.0:
2382 ; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[1],zero,ymm0[2],zero,ymm0[4,u,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31]
2383 ; AVX2OR512VL-NEXT: retq
2385 ; XOPAVX1-LABEL: shuffle_v32i8_01_zz_02_zz_04_uu_06_07_08_09_10_11_12_13_14_15_u6_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31:
2387 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[1],zero,xmm0[2],zero,xmm0[4,u,6,7,8,9,10,11,12,13,14,15]
2388 ; XOPAVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
2389 ; XOPAVX1-NEXT: retq
2391 ; XOPAVX2-LABEL: shuffle_v32i8_01_zz_02_zz_04_uu_06_07_08_09_10_11_12_13_14_15_u6_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31:
2393 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[1],zero,ymm0[2],zero,ymm0[4,u,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31]
2394 ; XOPAVX2-NEXT: retq
2395 %shuffle = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 1, i32 32, i32 2, i32 32, i32 4, i32 undef, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
2396 ret <32 x i8> %shuffle
2399 define <32 x i8> @shuffle_v32i8_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32(<32 x i8> %a, <32 x i8> %b) {
2400 ; AVX1-LABEL: shuffle_v32i8_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32:
2402 ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2403 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2404 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
2405 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
2408 ; AVX2OR512VL-LABEL: shuffle_v32i8_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32:
2409 ; AVX2OR512VL: # %bb.0:
2410 ; AVX2OR512VL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2411 ; AVX2OR512VL-NEXT: vpbroadcastw %xmm0, %ymm0
2412 ; AVX2OR512VL-NEXT: retq
2414 ; XOPAVX1-LABEL: shuffle_v32i8_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32:
2416 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[0],xmm1[0],xmm0[0],xmm1[0],xmm0[0],xmm1[0],xmm0[0],xmm1[0],xmm0[0],xmm1[0],xmm0[0],xmm1[0],xmm0[0],xmm1[0]
2417 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
2418 ; XOPAVX1-NEXT: retq
2420 ; XOPAVX2-LABEL: shuffle_v32i8_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32:
2422 ; XOPAVX2-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2423 ; XOPAVX2-NEXT: vpbroadcastw %xmm0, %ymm0
2424 ; XOPAVX2-NEXT: retq
2425 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 32, i32 0, i32 32, i32 0, i32 32, i32 0, i32 32, i32 0, i32 32, i32 0, i32 32, i32 0, i32 32, i32 0, i32 32, i32 0, i32 32, i32 0, i32 32, i32 0, i32 32, i32 0, i32 32, i32 0, i32 32, i32 0, i32 32, i32 0, i32 32, i32 0, i32 32>
2426 ret <32 x i8> %shuffle
2429 define <32 x i8> @shuffle_v32i8_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_16_48_16_48_16_48_16_48_16_48_16_48_16_48_16_48(<32 x i8> %a, <32 x i8> %b) {
2430 ; AVX1-LABEL: shuffle_v32i8_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_16_48_16_48_16_48_16_48_16_48_16_48_16_48_16_48:
2432 ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2433 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[0,0,0,0,4,5,6,7]
2434 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
2435 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2436 ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2437 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
2438 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
2439 ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
2442 ; AVX2-SLOW-LABEL: shuffle_v32i8_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_16_48_16_48_16_48_16_48_16_48_16_48_16_48_16_48:
2443 ; AVX2-SLOW: # %bb.0:
2444 ; AVX2-SLOW-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
2445 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
2446 ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
2447 ; AVX2-SLOW-NEXT: retq
2449 ; AVX2-FAST-LABEL: shuffle_v32i8_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_16_48_16_48_16_48_16_48_16_48_16_48_16_48_16_48:
2450 ; AVX2-FAST: # %bb.0:
2451 ; AVX2-FAST-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
2452 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
2453 ; AVX2-FAST-NEXT: retq
2455 ; AVX512VLBW-SLOW-LABEL: shuffle_v32i8_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_16_48_16_48_16_48_16_48_16_48_16_48_16_48_16_48:
2456 ; AVX512VLBW-SLOW: # %bb.0:
2457 ; AVX512VLBW-SLOW-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
2458 ; AVX512VLBW-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
2459 ; AVX512VLBW-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
2460 ; AVX512VLBW-SLOW-NEXT: retq
2462 ; AVX512VLBW-FAST-LABEL: shuffle_v32i8_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_16_48_16_48_16_48_16_48_16_48_16_48_16_48_16_48:
2463 ; AVX512VLBW-FAST: # %bb.0:
2464 ; AVX512VLBW-FAST-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
2465 ; AVX512VLBW-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
2466 ; AVX512VLBW-FAST-NEXT: retq
2468 ; AVX512VLVBMI-LABEL: shuffle_v32i8_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_16_48_16_48_16_48_16_48_16_48_16_48_16_48_16_48:
2469 ; AVX512VLVBMI: # %bb.0:
2470 ; AVX512VLVBMI-NEXT: vmovdqa {{.*#+}} ymm2 = [0,32,0,32,0,32,0,32,0,32,0,32,0,32,0,32,16,48,16,48,16,48,16,48,16,48,16,48,16,48,16,48]
2471 ; AVX512VLVBMI-NEXT: vpermt2b %ymm1, %ymm2, %ymm0
2472 ; AVX512VLVBMI-NEXT: retq
2474 ; XOPAVX1-LABEL: shuffle_v32i8_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_16_48_16_48_16_48_16_48_16_48_16_48_16_48_16_48:
2476 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
2477 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
2478 ; XOPAVX1-NEXT: vbroadcastss {{.*#+}} xmm4 = [0,16,0,16,0,16,0,16,0,16,0,16,0,16,0,16]
2479 ; XOPAVX1-NEXT: vpperm %xmm4, %xmm2, %xmm3, %xmm2
2480 ; XOPAVX1-NEXT: vpperm %xmm4, %xmm1, %xmm0, %xmm0
2481 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
2482 ; XOPAVX1-NEXT: retq
2484 ; XOPAVX2-LABEL: shuffle_v32i8_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_16_48_16_48_16_48_16_48_16_48_16_48_16_48_16_48:
2486 ; XOPAVX2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
2487 ; XOPAVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
2488 ; XOPAVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
2489 ; XOPAVX2-NEXT: retq
2490 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 32, i32 0, i32 32, i32 0, i32 32, i32 0, i32 32, i32 0, i32 32, i32 0, i32 32, i32 0, i32 32, i32 0, i32 32, i32 16, i32 48, i32 16, i32 48, i32 16, i32 48, i32 16, i32 48, i32 16, i32 48, i32 16, i32 48, i32 16, i32 48, i32 16, i32 48>
2491 ret <32 x i8> %shuffle
2494 define <32 x i8> @shuffle_v32i8_32_32_32_32_32_32_32_32_08_09_10_11_12_13_14_15_48_48_48_48_48_48_48_48_24_25_26_27_28_29_30_31(<32 x i8> %a, <32 x i8> %b) {
2495 ; AVX1-LABEL: shuffle_v32i8_32_32_32_32_32_32_32_32_08_09_10_11_12_13_14_15_48_48_48_48_48_48_48_48_24_25_26_27_28_29_30_31:
2497 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
2498 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
2499 ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
2500 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm3 = xmm3[0,0,0,0,4,5,6,7]
2501 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7]
2502 ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
2503 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
2504 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
2505 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
2508 ; AVX2-LABEL: shuffle_v32i8_32_32_32_32_32_32_32_32_08_09_10_11_12_13_14_15_48_48_48_48_48_48_48_48_24_25_26_27_28_29_30_31:
2510 ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
2511 ; AVX2-NEXT: vpshufb %ymm2, %ymm1, %ymm1
2512 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
2515 ; AVX512VLBW-LABEL: shuffle_v32i8_32_32_32_32_32_32_32_32_08_09_10_11_12_13_14_15_48_48_48_48_48_48_48_48_24_25_26_27_28_29_30_31:
2516 ; AVX512VLBW: # %bb.0:
2517 ; AVX512VLBW-NEXT: vpxor %xmm2, %xmm2, %xmm2
2518 ; AVX512VLBW-NEXT: vpshufb %ymm2, %ymm1, %ymm1
2519 ; AVX512VLBW-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
2520 ; AVX512VLBW-NEXT: retq
2522 ; AVX512VLVBMI-LABEL: shuffle_v32i8_32_32_32_32_32_32_32_32_08_09_10_11_12_13_14_15_48_48_48_48_48_48_48_48_24_25_26_27_28_29_30_31:
2523 ; AVX512VLVBMI: # %bb.0:
2524 ; AVX512VLVBMI-NEXT: vmovdqa {{.*#+}} ymm2 = [0,0,0,0,0,0,0,0,40,41,42,43,44,45,46,47,16,16,16,16,16,16,16,16,56,57,58,59,60,61,62,63]
2525 ; AVX512VLVBMI-NEXT: vpermi2b %ymm0, %ymm1, %ymm2
2526 ; AVX512VLVBMI-NEXT: vmovdqa %ymm2, %ymm0
2527 ; AVX512VLVBMI-NEXT: retq
2529 ; XOPAVX1-LABEL: shuffle_v32i8_32_32_32_32_32_32_32_32_08_09_10_11_12_13_14_15_48_48_48_48_48_48_48_48_24_25_26_27_28_29_30_31:
2531 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
2532 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
2533 ; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [0,0,0,0,0,0,0,0,24,25,26,27,28,29,30,31]
2534 ; XOPAVX1-NEXT: vpperm %xmm4, %xmm2, %xmm3, %xmm2
2535 ; XOPAVX1-NEXT: vpperm %xmm4, %xmm0, %xmm1, %xmm0
2536 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
2537 ; XOPAVX1-NEXT: retq
2539 ; XOPAVX2-LABEL: shuffle_v32i8_32_32_32_32_32_32_32_32_08_09_10_11_12_13_14_15_48_48_48_48_48_48_48_48_24_25_26_27_28_29_30_31:
2541 ; XOPAVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
2542 ; XOPAVX2-NEXT: vpshufb %ymm2, %ymm1, %ymm1
2543 ; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
2544 ; XOPAVX2-NEXT: retq
2545 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
2546 ret <32 x i8> %shuffle
2549 define <32 x i8> @shuffle_v32i8_39_38_37_36_35_34_33_32_15_14_13_12_11_10_09_08_55_54_53_52_51_50_49_48_31_30_29_28_27_26_25_24(<32 x i8> %a, <32 x i8> %b) {
2550 ; AVX1-LABEL: shuffle_v32i8_39_38_37_36_35_34_33_32_15_14_13_12_11_10_09_08_55_54_53_52_51_50_49_48_31_30_29_28_27_26_25_24:
2552 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
2553 ; AVX1-NEXT: vmovddup {{.*#+}} xmm3 = [15,14,13,12,11,10,9,8,15,14,13,12,11,10,9,8]
2554 ; AVX1-NEXT: # xmm3 = mem[0,0]
2555 ; AVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2
2556 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
2557 ; AVX1-NEXT: vmovddup {{.*#+}} xmm5 = [7,6,5,4,3,2,1,0,7,6,5,4,3,2,1,0]
2558 ; AVX1-NEXT: # xmm5 = mem[0,0]
2559 ; AVX1-NEXT: vpshufb %xmm5, %xmm4, %xmm4
2560 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm4[0],xmm2[0]
2561 ; AVX1-NEXT: vpshufb %xmm3, %xmm0, %xmm0
2562 ; AVX1-NEXT: vpshufb %xmm5, %xmm1, %xmm1
2563 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
2564 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
2567 ; AVX2-LABEL: shuffle_v32i8_39_38_37_36_35_34_33_32_15_14_13_12_11_10_09_08_55_54_53_52_51_50_49_48_31_30_29_28_27_26_25_24:
2569 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
2570 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8,23,22,21,20,19,18,17,16,31,30,29,28,27,26,25,24]
2573 ; AVX512VLBW-LABEL: shuffle_v32i8_39_38_37_36_35_34_33_32_15_14_13_12_11_10_09_08_55_54_53_52_51_50_49_48_31_30_29_28_27_26_25_24:
2574 ; AVX512VLBW: # %bb.0:
2575 ; AVX512VLBW-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
2576 ; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8,23,22,21,20,19,18,17,16,31,30,29,28,27,26,25,24]
2577 ; AVX512VLBW-NEXT: retq
2579 ; AVX512VLVBMI-LABEL: shuffle_v32i8_39_38_37_36_35_34_33_32_15_14_13_12_11_10_09_08_55_54_53_52_51_50_49_48_31_30_29_28_27_26_25_24:
2580 ; AVX512VLVBMI: # %bb.0:
2581 ; AVX512VLVBMI-NEXT: vmovdqa {{.*#+}} ymm2 = [7,6,5,4,3,2,1,0,47,46,45,44,43,42,41,40,23,22,21,20,19,18,17,16,63,62,61,60,59,58,57,56]
2582 ; AVX512VLVBMI-NEXT: vpermi2b %ymm0, %ymm1, %ymm2
2583 ; AVX512VLVBMI-NEXT: vmovdqa %ymm2, %ymm0
2584 ; AVX512VLVBMI-NEXT: retq
2586 ; XOPAVX1-LABEL: shuffle_v32i8_39_38_37_36_35_34_33_32_15_14_13_12_11_10_09_08_55_54_53_52_51_50_49_48_31_30_29_28_27_26_25_24:
2588 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
2589 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
2590 ; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [7,6,5,4,3,2,1,0,31,30,29,28,27,26,25,24]
2591 ; XOPAVX1-NEXT: vpperm %xmm4, %xmm2, %xmm3, %xmm2
2592 ; XOPAVX1-NEXT: vpperm %xmm4, %xmm0, %xmm1, %xmm0
2593 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
2594 ; XOPAVX1-NEXT: retq
2596 ; XOPAVX2-LABEL: shuffle_v32i8_39_38_37_36_35_34_33_32_15_14_13_12_11_10_09_08_55_54_53_52_51_50_49_48_31_30_29_28_27_26_25_24:
2598 ; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
2599 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8,23,22,21,20,19,18,17,16,31,30,29,28,27,26,25,24]
2600 ; XOPAVX2-NEXT: retq
2601 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24>
2602 ret <32 x i8> %shuffle
2605 define <32 x i8> @shuffle_v32i8_39_38_37_36_35_34_33_32_07_06_05_04_03_02_01_00_55_54_53_52_51_50_49_48_23_22_21_20_19_18_17_16(<32 x i8> %a, <32 x i8> %b) {
2606 ; AVX1-LABEL: shuffle_v32i8_39_38_37_36_35_34_33_32_07_06_05_04_03_02_01_00_55_54_53_52_51_50_49_48_23_22_21_20_19_18_17_16:
2608 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
2609 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
2610 ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
2611 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [14,12,10,8,6,4,2,0,15,13,11,9,7,5,3,1]
2612 ; AVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2
2613 ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
2614 ; AVX1-NEXT: vpshufb %xmm3, %xmm0, %xmm0
2615 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
2618 ; AVX2-LABEL: shuffle_v32i8_39_38_37_36_35_34_33_32_07_06_05_04_03_02_01_00_55_54_53_52_51_50_49_48_23_22_21_20_19_18_17_16:
2620 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,u,u,u,u,u,u,7,6,5,4,3,2,1,0,u,u,u,u,u,u,u,u,23,22,21,20,19,18,17,16]
2621 ; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[7,6,5,4,3,2,1,0,u,u,u,u,u,u,u,u,23,22,21,20,19,18,17,16,u,u,u,u,u,u,u,u]
2622 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
2625 ; AVX512VLBW-LABEL: shuffle_v32i8_39_38_37_36_35_34_33_32_07_06_05_04_03_02_01_00_55_54_53_52_51_50_49_48_23_22_21_20_19_18_17_16:
2626 ; AVX512VLBW: # %bb.0:
2627 ; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,u,u,u,u,u,u,7,6,5,4,3,2,1,0,u,u,u,u,u,u,u,u,23,22,21,20,19,18,17,16]
2628 ; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[7,6,5,4,3,2,1,0,u,u,u,u,u,u,u,u,23,22,21,20,19,18,17,16,u,u,u,u,u,u,u,u]
2629 ; AVX512VLBW-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
2630 ; AVX512VLBW-NEXT: retq
2632 ; AVX512VLVBMI-LABEL: shuffle_v32i8_39_38_37_36_35_34_33_32_07_06_05_04_03_02_01_00_55_54_53_52_51_50_49_48_23_22_21_20_19_18_17_16:
2633 ; AVX512VLVBMI: # %bb.0:
2634 ; AVX512VLVBMI-NEXT: vmovdqa {{.*#+}} ymm2 = [7,6,5,4,3,2,1,0,39,38,37,36,35,34,33,32,23,22,21,20,19,18,17,16,55,54,53,52,51,50,49,48]
2635 ; AVX512VLVBMI-NEXT: vpermi2b %ymm0, %ymm1, %ymm2
2636 ; AVX512VLVBMI-NEXT: vmovdqa %ymm2, %ymm0
2637 ; AVX512VLVBMI-NEXT: retq
2639 ; XOPAVX1-LABEL: shuffle_v32i8_39_38_37_36_35_34_33_32_07_06_05_04_03_02_01_00_55_54_53_52_51_50_49_48_23_22_21_20_19_18_17_16:
2641 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
2642 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
2643 ; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [7,6,5,4,3,2,1,0,23,22,21,20,19,18,17,16]
2644 ; XOPAVX1-NEXT: vpperm %xmm4, %xmm2, %xmm3, %xmm2
2645 ; XOPAVX1-NEXT: vpperm %xmm4, %xmm0, %xmm1, %xmm0
2646 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
2647 ; XOPAVX1-NEXT: retq
2649 ; XOPAVX2-LABEL: shuffle_v32i8_39_38_37_36_35_34_33_32_07_06_05_04_03_02_01_00_55_54_53_52_51_50_49_48_23_22_21_20_19_18_17_16:
2651 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,u,u,u,u,u,u,7,6,5,4,3,2,1,0,u,u,u,u,u,u,u,u,23,22,21,20,19,18,17,16]
2652 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[7,6,5,4,3,2,1,0,u,u,u,u,u,u,u,u,23,22,21,20,19,18,17,16,u,u,u,u,u,u,u,u]
2653 ; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
2654 ; XOPAVX2-NEXT: retq
2655 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16>
2656 ret <32 x i8> %shuffle
2659 define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00_16_16_16_16_16_16_16_16_16_16_16_16_16_16_17_16(<32 x i8> %a, <32 x i8> %b) {
2660 ; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00_16_16_16_16_16_16_16_16_16_16_16_16_16_16_17_16:
2662 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
2663 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0]
2664 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
2665 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
2666 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2669 ; AVX2OR512VL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00_16_16_16_16_16_16_16_16_16_16_16_16_16_16_17_16:
2670 ; AVX2OR512VL: # %bb.0:
2671 ; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,17,16]
2672 ; AVX2OR512VL-NEXT: retq
2674 ; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00_16_16_16_16_16_16_16_16_16_16_16_16_16_16_17_16:
2676 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
2677 ; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0]
2678 ; XOPAVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
2679 ; XOPAVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
2680 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2681 ; XOPAVX1-NEXT: retq
2683 ; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00_16_16_16_16_16_16_16_16_16_16_16_16_16_16_17_16:
2685 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,17,16]
2686 ; XOPAVX2-NEXT: retq
2687 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 16>
2688 ret <32 x i8> %shuffle
2691 define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00_16_16_16_16_16_16_16_16_16_16_16_16_16_18_16_16(<32 x i8> %a, <32 x i8> %b) {
2692 ; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00_16_16_16_16_16_16_16_16_16_16_16_16_16_18_16_16:
2694 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
2695 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0]
2696 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
2697 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
2698 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2701 ; AVX2OR512VL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00_16_16_16_16_16_16_16_16_16_16_16_16_16_18_16_16:
2702 ; AVX2OR512VL: # %bb.0:
2703 ; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,18,16,16]
2704 ; AVX2OR512VL-NEXT: retq
2706 ; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00_16_16_16_16_16_16_16_16_16_16_16_16_16_18_16_16:
2708 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
2709 ; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0]
2710 ; XOPAVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
2711 ; XOPAVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
2712 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2713 ; XOPAVX1-NEXT: retq
2715 ; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00_16_16_16_16_16_16_16_16_16_16_16_16_16_18_16_16:
2717 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,18,16,16]
2718 ; XOPAVX2-NEXT: retq
2719 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 18, i32 16, i32 16>
2720 ret <32 x i8> %shuffle
2723 define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_23_16_16_16_16_16_16_16(<32 x i8> %a, <32 x i8> %b) {
2724 ; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_23_16_16_16_16_16_16_16:
2726 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
2727 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0]
2728 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
2729 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
2730 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2733 ; AVX2OR512VL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_23_16_16_16_16_16_16_16:
2734 ; AVX2OR512VL: # %bb.0:
2735 ; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,23,16,16,16,16,16,16,16]
2736 ; AVX2OR512VL-NEXT: retq
2738 ; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_23_16_16_16_16_16_16_16:
2740 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
2741 ; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0]
2742 ; XOPAVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
2743 ; XOPAVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
2744 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2745 ; XOPAVX1-NEXT: retq
2747 ; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_23_16_16_16_16_16_16_16:
2749 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,23,16,16,16,16,16,16,16]
2750 ; XOPAVX2-NEXT: retq
2751 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 23, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
2752 ret <32 x i8> %shuffle
2755 define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_24_16_16_16_16_16_16_16_16(<32 x i8> %a, <32 x i8> %b) {
2756 ; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_24_16_16_16_16_16_16_16_16:
2758 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
2759 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0]
2760 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
2761 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
2762 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2765 ; AVX2OR512VL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_24_16_16_16_16_16_16_16_16:
2766 ; AVX2OR512VL: # %bb.0:
2767 ; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,24,16,16,16,16,16,16,16,16]
2768 ; AVX2OR512VL-NEXT: retq
2770 ; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_24_16_16_16_16_16_16_16_16:
2772 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
2773 ; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0]
2774 ; XOPAVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
2775 ; XOPAVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
2776 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2777 ; XOPAVX1-NEXT: retq
2779 ; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_24_16_16_16_16_16_16_16_16:
2781 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,24,16,16,16,16,16,16,16,16]
2782 ; XOPAVX2-NEXT: retq
2783 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 24, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
2784 ret <32 x i8> %shuffle
2787 define <32 x i8> @shuffle_v32i8_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_30_16_16_16_16_16_16_16_16_16_16_16_16_16_16(<32 x i8> %a, <32 x i8> %b) {
2788 ; AVX1-LABEL: shuffle_v32i8_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_30_16_16_16_16_16_16_16_16_16_16_16_16_16_16:
2790 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
2791 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
2792 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
2793 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
2794 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2797 ; AVX2OR512VL-LABEL: shuffle_v32i8_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_30_16_16_16_16_16_16_16_16_16_16_16_16_16_16:
2798 ; AVX2OR512VL: # %bb.0:
2799 ; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,16,30,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
2800 ; AVX2OR512VL-NEXT: retq
2802 ; XOPAVX1-LABEL: shuffle_v32i8_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_30_16_16_16_16_16_16_16_16_16_16_16_16_16_16:
2804 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
2805 ; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
2806 ; XOPAVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
2807 ; XOPAVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
2808 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2809 ; XOPAVX1-NEXT: retq
2811 ; XOPAVX2-LABEL: shuffle_v32i8_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_30_16_16_16_16_16_16_16_16_16_16_16_16_16_16:
2813 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,16,30,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
2814 ; XOPAVX2-NEXT: retq
2815 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 14, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 30, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
2816 ret <32 x i8> %shuffle
2819 define <32 x i8> @shuffle_v32i8_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_31_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16(<32 x i8> %a, <32 x i8> %b) {
2820 ; AVX1-LABEL: shuffle_v32i8_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_31_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16:
2822 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
2823 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
2824 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
2825 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
2826 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2829 ; AVX2OR512VL-LABEL: shuffle_v32i8_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_31_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16:
2830 ; AVX2OR512VL: # %bb.0:
2831 ; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,31,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
2832 ; AVX2OR512VL-NEXT: retq
2834 ; XOPAVX1-LABEL: shuffle_v32i8_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_31_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16:
2836 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
2837 ; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [15,0,0,0]
2838 ; XOPAVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
2839 ; XOPAVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
2840 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2841 ; XOPAVX1-NEXT: retq
2843 ; XOPAVX2-LABEL: shuffle_v32i8_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_31_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16:
2845 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,31,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
2846 ; XOPAVX2-NEXT: retq
2847 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 31, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
2848 ret <32 x i8> %shuffle
2851 define <32 x i8> @shuffle_v32i8_00_32_01_33_02_34_03_35_04_36_05_37_06_38_07_39_16_48_17_49_18_50_19_51_20_52_21_53_22_54_23_55(<32 x i8> %a, <32 x i8> %b) {
2852 ; AVX1-LABEL: shuffle_v32i8_00_32_01_33_02_34_03_35_04_36_05_37_06_38_07_39_16_48_17_49_18_50_19_51_20_52_21_53_22_54_23_55:
2854 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
2855 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
2856 ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
2857 ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2858 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
2861 ; AVX2OR512VL-LABEL: shuffle_v32i8_00_32_01_33_02_34_03_35_04_36_05_37_06_38_07_39_16_48_17_49_18_50_19_51_20_52_21_53_22_54_23_55:
2862 ; AVX2OR512VL: # %bb.0:
2863 ; AVX2OR512VL-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
2864 ; AVX2OR512VL-NEXT: retq
2866 ; XOPAVX1-LABEL: shuffle_v32i8_00_32_01_33_02_34_03_35_04_36_05_37_06_38_07_39_16_48_17_49_18_50_19_51_20_52_21_53_22_54_23_55:
2868 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
2869 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
2870 ; XOPAVX1-NEXT: vpunpcklbw {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
2871 ; XOPAVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2872 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
2873 ; XOPAVX1-NEXT: retq
2875 ; XOPAVX2-LABEL: shuffle_v32i8_00_32_01_33_02_34_03_35_04_36_05_37_06_38_07_39_16_48_17_49_18_50_19_51_20_52_21_53_22_54_23_55:
2877 ; XOPAVX2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
2878 ; XOPAVX2-NEXT: retq
2879 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 16, i32 48, i32 17, i32 49, i32 18, i32 50, i32 19, i32 51, i32 20, i32 52, i32 21, i32 53, i32 22, i32 54, i32 23, i32 55>
2880 ret <32 x i8> %shuffle
2883 define <32 x i8> @shuffle_v32i8_08_40_09_41_10_42_11_43_12_44_13_45_14_46_15_47_24_56_25_57_26_58_27_59_28_60_29_61_30_62_31_63(<32 x i8> %a, <32 x i8> %b) {
2884 ; AVX1-LABEL: shuffle_v32i8_08_40_09_41_10_42_11_43_12_44_13_45_14_46_15_47_24_56_25_57_26_58_27_59_28_60_29_61_30_62_31_63:
2886 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
2887 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
2888 ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm3[8],xmm2[8],xmm3[9],xmm2[9],xmm3[10],xmm2[10],xmm3[11],xmm2[11],xmm3[12],xmm2[12],xmm3[13],xmm2[13],xmm3[14],xmm2[14],xmm3[15],xmm2[15]
2889 ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
2890 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
2893 ; AVX2OR512VL-LABEL: shuffle_v32i8_08_40_09_41_10_42_11_43_12_44_13_45_14_46_15_47_24_56_25_57_26_58_27_59_28_60_29_61_30_62_31_63:
2894 ; AVX2OR512VL: # %bb.0:
2895 ; AVX2OR512VL-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31]
2896 ; AVX2OR512VL-NEXT: retq
2898 ; XOPAVX1-LABEL: shuffle_v32i8_08_40_09_41_10_42_11_43_12_44_13_45_14_46_15_47_24_56_25_57_26_58_27_59_28_60_29_61_30_62_31_63:
2900 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
2901 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
2902 ; XOPAVX1-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm3[8],xmm2[8],xmm3[9],xmm2[9],xmm3[10],xmm2[10],xmm3[11],xmm2[11],xmm3[12],xmm2[12],xmm3[13],xmm2[13],xmm3[14],xmm2[14],xmm3[15],xmm2[15]
2903 ; XOPAVX1-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
2904 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
2905 ; XOPAVX1-NEXT: retq
2907 ; XOPAVX2-LABEL: shuffle_v32i8_08_40_09_41_10_42_11_43_12_44_13_45_14_46_15_47_24_56_25_57_26_58_27_59_28_60_29_61_30_62_31_63:
2909 ; XOPAVX2-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31]
2910 ; XOPAVX2-NEXT: retq
2911 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 8, i32 40, i32 9, i32 41, i32 10, i32 42, i32 11, i32 43, i32 12, i32 44, i32 13, i32 45, i32 14, i32 46, i32 15, i32 47, i32 24, i32 56, i32 25, i32 57, i32 26, i32 58, i32 27, i32 59, i32 28, i32 60, i32 29, i32 61, i32 30, i32 62, i32 31, i32 63>
2912 ret <32 x i8> %shuffle
2915 define <32 x i8> @shuffle_v32i8_00_32_01_33_02_34_03_35_04_36_05_37_06_38_07_39_24_56_25_57_26_58_27_59_28_60_29_61_30_62_31_63(<32 x i8> %a, <32 x i8> %b) {
2916 ; AVX1-LABEL: shuffle_v32i8_00_32_01_33_02_34_03_35_04_36_05_37_06_38_07_39_24_56_25_57_26_58_27_59_28_60_29_61_30_62_31_63:
2918 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
2919 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
2920 ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm3[8],xmm2[8],xmm3[9],xmm2[9],xmm3[10],xmm2[10],xmm3[11],xmm2[11],xmm3[12],xmm2[12],xmm3[13],xmm2[13],xmm3[14],xmm2[14],xmm3[15],xmm2[15]
2921 ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2922 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
2925 ; AVX2-LABEL: shuffle_v32i8_00_32_01_33_02_34_03_35_04_36_05_37_06_38_07_39_24_56_25_57_26_58_27_59_28_60_29_61_30_62_31_63:
2927 ; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,1,3,3]
2928 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,3,3]
2929 ; AVX2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
2932 ; AVX512VLBW-LABEL: shuffle_v32i8_00_32_01_33_02_34_03_35_04_36_05_37_06_38_07_39_24_56_25_57_26_58_27_59_28_60_29_61_30_62_31_63:
2933 ; AVX512VLBW: # %bb.0:
2934 ; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,1,3,3]
2935 ; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,3,3]
2936 ; AVX512VLBW-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
2937 ; AVX512VLBW-NEXT: retq
2939 ; AVX512VLVBMI-LABEL: shuffle_v32i8_00_32_01_33_02_34_03_35_04_36_05_37_06_38_07_39_24_56_25_57_26_58_27_59_28_60_29_61_30_62_31_63:
2940 ; AVX512VLVBMI: # %bb.0:
2941 ; AVX512VLVBMI-NEXT: vmovdqa {{.*#+}} ymm2 = [0,32,1,33,2,34,3,35,4,36,5,37,6,38,7,39,24,56,25,57,26,58,27,59,28,60,29,61,30,62,31,63]
2942 ; AVX512VLVBMI-NEXT: vpermt2b %ymm1, %ymm2, %ymm0
2943 ; AVX512VLVBMI-NEXT: retq
2945 ; XOPAVX1-LABEL: shuffle_v32i8_00_32_01_33_02_34_03_35_04_36_05_37_06_38_07_39_24_56_25_57_26_58_27_59_28_60_29_61_30_62_31_63:
2947 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
2948 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
2949 ; XOPAVX1-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm3[8],xmm2[8],xmm3[9],xmm2[9],xmm3[10],xmm2[10],xmm3[11],xmm2[11],xmm3[12],xmm2[12],xmm3[13],xmm2[13],xmm3[14],xmm2[14],xmm3[15],xmm2[15]
2950 ; XOPAVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2951 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
2952 ; XOPAVX1-NEXT: retq
2954 ; XOPAVX2-LABEL: shuffle_v32i8_00_32_01_33_02_34_03_35_04_36_05_37_06_38_07_39_24_56_25_57_26_58_27_59_28_60_29_61_30_62_31_63:
2956 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,1,3,3]
2957 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,3,3]
2958 ; XOPAVX2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
2959 ; XOPAVX2-NEXT: retq
2960 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 24, i32 56, i32 25, i32 57, i32 26, i32 58, i32 27, i32 59, i32 28, i32 60, i32 29, i32 61, i32 30, i32 62, i32 31, i32 63>
2961 ret <32 x i8> %shuffle
2964 define <32 x i8> @shuffle_v32i8_08_40_09_41_10_42_11_43_12_44_13_45_14_46_15_47_16_48_17_49_18_50_19_51_20_52_21_53_22_54_23_55(<32 x i8> %a, <32 x i8> %b) {
2965 ; AVX1-LABEL: shuffle_v32i8_08_40_09_41_10_42_11_43_12_44_13_45_14_46_15_47_16_48_17_49_18_50_19_51_20_52_21_53_22_54_23_55:
2967 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
2968 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
2969 ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
2970 ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
2971 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
2974 ; AVX2-LABEL: shuffle_v32i8_08_40_09_41_10_42_11_43_12_44_13_45_14_46_15_47_16_48_17_49_18_50_19_51_20_52_21_53_22_54_23_55:
2976 ; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[1,1,2,3]
2977 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,1,2,3]
2978 ; AVX2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
2981 ; AVX512VLBW-LABEL: shuffle_v32i8_08_40_09_41_10_42_11_43_12_44_13_45_14_46_15_47_16_48_17_49_18_50_19_51_20_52_21_53_22_54_23_55:
2982 ; AVX512VLBW: # %bb.0:
2983 ; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm1 = ymm1[1,1,2,3]
2984 ; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,1,2,3]
2985 ; AVX512VLBW-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
2986 ; AVX512VLBW-NEXT: retq
2988 ; AVX512VLVBMI-LABEL: shuffle_v32i8_08_40_09_41_10_42_11_43_12_44_13_45_14_46_15_47_16_48_17_49_18_50_19_51_20_52_21_53_22_54_23_55:
2989 ; AVX512VLVBMI: # %bb.0:
2990 ; AVX512VLVBMI-NEXT: vmovdqa {{.*#+}} ymm2 = [8,40,9,41,10,42,11,43,12,44,13,45,14,46,15,47,16,48,17,49,18,50,19,51,20,52,21,53,22,54,23,55]
2991 ; AVX512VLVBMI-NEXT: vpermt2b %ymm1, %ymm2, %ymm0
2992 ; AVX512VLVBMI-NEXT: retq
2994 ; XOPAVX1-LABEL: shuffle_v32i8_08_40_09_41_10_42_11_43_12_44_13_45_14_46_15_47_16_48_17_49_18_50_19_51_20_52_21_53_22_54_23_55:
2996 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
2997 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
2998 ; XOPAVX1-NEXT: vpunpcklbw {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
2999 ; XOPAVX1-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
3000 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
3001 ; XOPAVX1-NEXT: retq
3003 ; XOPAVX2-LABEL: shuffle_v32i8_08_40_09_41_10_42_11_43_12_44_13_45_14_46_15_47_16_48_17_49_18_50_19_51_20_52_21_53_22_54_23_55:
3005 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[1,1,2,3]
3006 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,1,2,3]
3007 ; XOPAVX2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
3008 ; XOPAVX2-NEXT: retq
3009 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 8, i32 40, i32 9, i32 41, i32 10, i32 42, i32 11, i32 43, i32 12, i32 44, i32 13, i32 45, i32 14, i32 46, i32 15, i32 47, i32 16, i32 48, i32 17, i32 49, i32 18, i32 50, i32 19, i32 51, i32 20, i32 52, i32 21, i32 53, i32 22, i32 54, i32 23, i32 55>
3010 ret <32 x i8> %shuffle
3013 define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00_16_17_16_16_16_16_16_16_16_16_16_16_16_16_16_16(<32 x i8> %a, <32 x i8> %b) {
3014 ; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00_16_17_16_16_16_16_16_16_16_16_16_16_16_16_16_16:
3016 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0]
3017 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
3018 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
3019 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
3022 ; AVX2OR512VL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00_16_17_16_16_16_16_16_16_16_16_16_16_16_16_16_16:
3023 ; AVX2OR512VL: # %bb.0:
3024 ; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,16,17,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
3025 ; AVX2OR512VL-NEXT: retq
3027 ; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00_16_17_16_16_16_16_16_16_16_16_16_16_16_16_16_16:
3029 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0]
3030 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
3031 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
3032 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
3033 ; XOPAVX1-NEXT: retq
3035 ; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00_16_17_16_16_16_16_16_16_16_16_16_16_16_16_16_16:
3037 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,16,17,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
3038 ; XOPAVX2-NEXT: retq
3039 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 16, i32 17, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
3040 ret <32 x i8> %shuffle
3043 define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00_16_16_18_16_16_16_16_16_16_16_16_16_16_16_16_16(<32 x i8> %a, <32 x i8> %b) {
3044 ; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00_16_16_18_16_16_16_16_16_16_16_16_16_16_16_16_16:
3046 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0]
3047 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
3048 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0]
3049 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
3052 ; AVX2OR512VL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00_16_16_18_16_16_16_16_16_16_16_16_16_16_16_16_16:
3053 ; AVX2OR512VL: # %bb.0:
3054 ; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,16,16,18,16,16,16,16,16,16,16,16,16,16,16,16,16]
3055 ; AVX2OR512VL-NEXT: retq
3057 ; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00_16_16_18_16_16_16_16_16_16_16_16_16_16_16_16_16:
3059 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0]
3060 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
3061 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0]
3062 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
3063 ; XOPAVX1-NEXT: retq
3065 ; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00_16_16_18_16_16_16_16_16_16_16_16_16_16_16_16_16:
3067 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,16,16,18,16,16,16,16,16,16,16,16,16,16,16,16,16]
3068 ; XOPAVX2-NEXT: retq
3069 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0, i32 16, i32 16, i32 18, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
3070 ret <32 x i8> %shuffle
3073 define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00_16_16_16_16_16_16_16_23_16_16_16_16_16_16_16_16(<32 x i8> %a, <32 x i8> %b) {
3074 ; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00_16_16_16_16_16_16_16_23_16_16_16_16_16_16_16_16:
3076 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0]
3077 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
3078 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,0]
3079 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
3082 ; AVX2OR512VL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00_16_16_16_16_16_16_16_23_16_16_16_16_16_16_16_16:
3083 ; AVX2OR512VL: # %bb.0:
3084 ; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,16,16,16,16,16,16,16,23,16,16,16,16,16,16,16,16]
3085 ; AVX2OR512VL-NEXT: retq
3087 ; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00_16_16_16_16_16_16_16_23_16_16_16_16_16_16_16_16:
3089 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0]
3090 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
3091 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,0]
3092 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
3093 ; XOPAVX1-NEXT: retq
3095 ; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00_16_16_16_16_16_16_16_23_16_16_16_16_16_16_16_16:
3097 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,16,16,16,16,16,16,16,23,16,16,16,16,16,16,16,16]
3098 ; XOPAVX2-NEXT: retq
3099 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 23, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
3100 ret <32 x i8> %shuffle
3103 define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_24_16_16_16_16_16_16_16(<32 x i8> %a, <32 x i8> %b) {
3104 ; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_24_16_16_16_16_16_16_16:
3106 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0]
3107 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
3108 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,8,0,0,0,0,0,0,0]
3109 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
3112 ; AVX2OR512VL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_24_16_16_16_16_16_16_16:
3113 ; AVX2OR512VL: # %bb.0:
3114 ; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,24,16,16,16,16,16,16,16]
3115 ; AVX2OR512VL-NEXT: retq
3117 ; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_24_16_16_16_16_16_16_16:
3119 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0]
3120 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
3121 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,8,0,0,0,0,0,0,0]
3122 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
3123 ; XOPAVX1-NEXT: retq
3125 ; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_24_16_16_16_16_16_16_16:
3127 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,24,16,16,16,16,16,16,16]
3128 ; XOPAVX2-NEXT: retq
3129 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 24, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
3130 ret <32 x i8> %shuffle
3133 define <32 x i8> @shuffle_v32i8_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_16_16_16_16_16_16_30_16(<32 x i8> %a, <32 x i8> %b) {
3134 ; AVX1-LABEL: shuffle_v32i8_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_16_16_16_16_16_16_30_16:
3136 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
3137 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
3138 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,14,0]
3139 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
3142 ; AVX2OR512VL-LABEL: shuffle_v32i8_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_16_16_16_16_16_16_30_16:
3143 ; AVX2OR512VL: # %bb.0:
3144 ; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,30,16]
3145 ; AVX2OR512VL-NEXT: retq
3147 ; XOPAVX1-LABEL: shuffle_v32i8_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_16_16_16_16_16_16_30_16:
3149 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
3150 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
3151 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,14,0]
3152 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
3153 ; XOPAVX1-NEXT: retq
3155 ; XOPAVX2-LABEL: shuffle_v32i8_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_16_16_16_16_16_16_30_16:
3157 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,30,16]
3158 ; XOPAVX2-NEXT: retq
3159 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 14, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 30, i32 16>
3160 ret <32 x i8> %shuffle
3163 define <32 x i8> @shuffle_v32i8_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_31(<32 x i8> %a, <32 x i8> %b) {
3164 ; AVX1-LABEL: shuffle_v32i8_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_31:
3166 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
3167 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
3168 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,15]
3169 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
3172 ; AVX2OR512VL-LABEL: shuffle_v32i8_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_31:
3173 ; AVX2OR512VL: # %bb.0:
3174 ; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,31]
3175 ; AVX2OR512VL-NEXT: retq
3177 ; XOPAVX1-LABEL: shuffle_v32i8_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_31:
3179 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
3180 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
3181 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,15]
3182 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
3183 ; XOPAVX1-NEXT: retq
3185 ; XOPAVX2-LABEL: shuffle_v32i8_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_31:
3187 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,31]
3188 ; XOPAVX2-NEXT: retq
3189 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 31>
3190 ret <32 x i8> %shuffle
3193 define <32 x i8> @shuffle_v32i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12_28_28_28_28_24_24_24_24_20_20_20_20_16_16_16_16(<32 x i8> %a, <32 x i8> %b) {
3194 ; AVX1-LABEL: shuffle_v32i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12_28_28_28_28_24_24_24_24_20_20_20_20_16_16_16_16:
3196 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,0,0,0,4,4,4,4,8,8,8,8,12,12,12,12]
3197 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
3198 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[12,12,12,12,8,8,8,8,4,4,4,4,0,0,0,0]
3199 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
3202 ; AVX2OR512VL-LABEL: shuffle_v32i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12_28_28_28_28_24_24_24_24_20_20_20_20_16_16_16_16:
3203 ; AVX2OR512VL: # %bb.0:
3204 ; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4,8,8,8,8,12,12,12,12,28,28,28,28,24,24,24,24,20,20,20,20,16,16,16,16]
3205 ; AVX2OR512VL-NEXT: retq
3207 ; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12_28_28_28_28_24_24_24_24_20_20_20_20_16_16_16_16:
3209 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,0,0,0,4,4,4,4,8,8,8,8,12,12,12,12]
3210 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
3211 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[12,12,12,12,8,8,8,8,4,4,4,4,0,0,0,0]
3212 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
3213 ; XOPAVX1-NEXT: retq
3215 ; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12_28_28_28_28_24_24_24_24_20_20_20_20_16_16_16_16:
3217 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4,8,8,8,8,12,12,12,12,28,28,28,28,24,24,24,24,20,20,20,20,16,16,16,16]
3218 ; XOPAVX2-NEXT: retq
3219 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 8, i32 8, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12, i32 28, i32 28, i32 28, i32 28, i32 24, i32 24, i32 24, i32 24, i32 20, i32 20, i32 20, i32 20, i32 16, i32 16, i32 16, i32 16>
3220 ret <32 x i8> %shuffle
3223 define <32 x i8> @shuffle_v32i8_08_08_08_08_08_08_08_08_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24(<32 x i8> %a, <32 x i8> %b) {
3224 ; AVX1-LABEL: shuffle_v32i8_08_08_08_08_08_08_08_08_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24:
3226 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[8,8,8,8,8,8,8,8,0,0,0,0,0,0,0,0]
3227 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
3228 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8]
3229 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
3232 ; AVX2OR512VL-LABEL: shuffle_v32i8_08_08_08_08_08_08_08_08_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24:
3233 ; AVX2OR512VL: # %bb.0:
3234 ; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,8,8,8,8,8,8,8,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,24,24,24,24,24,24,24,24]
3235 ; AVX2OR512VL-NEXT: retq
3237 ; XOPAVX1-LABEL: shuffle_v32i8_08_08_08_08_08_08_08_08_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24:
3239 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[8,8,8,8,8,8,8,8,0,0,0,0,0,0,0,0]
3240 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
3241 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8]
3242 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
3243 ; XOPAVX1-NEXT: retq
3245 ; XOPAVX2-LABEL: shuffle_v32i8_08_08_08_08_08_08_08_08_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24:
3247 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,8,8,8,8,8,8,8,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,24,24,24,24,24,24,24,24]
3248 ; XOPAVX2-NEXT: retq
3249 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24>
3250 ret <32 x i8> %shuffle
3253 define <32 x i8> @shuffle_v32i8_00_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_16_16_16_16_uu_uu_uu_uu_uu_16_16_16_16_16_30_16(<32 x i8> %a, <32 x i8> %b) {
3254 ; AVX1-LABEL: shuffle_v32i8_00_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_16_16_16_16_uu_uu_uu_uu_uu_16_16_16_16_16_30_16:
3256 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
3257 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,0,0,0,u,u,u,u,u,0,0,0,0,0,14,0]
3258 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3261 ; AVX2OR512VL-LABEL: shuffle_v32i8_00_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_16_16_16_16_uu_uu_uu_uu_uu_16_16_16_16_16_30_16:
3262 ; AVX2OR512VL: # %bb.0:
3263 ; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,16,16,16,16,u,u,u,u,u,16,16,16,16,16,30,16]
3264 ; AVX2OR512VL-NEXT: retq
3266 ; XOPAVX1-LABEL: shuffle_v32i8_00_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_16_16_16_16_uu_uu_uu_uu_uu_16_16_16_16_16_30_16:
3268 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
3269 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,0,0,0,u,u,u,u,u,0,0,0,0,0,14,0]
3270 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3271 ; XOPAVX1-NEXT: retq
3273 ; XOPAVX2-LABEL: shuffle_v32i8_00_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_16_16_16_16_uu_uu_uu_uu_uu_16_16_16_16_16_30_16:
3275 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,16,16,16,16,u,u,u,u,u,16,16,16,16,16,30,16]
3276 ; XOPAVX2-NEXT: retq
3277 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 16, i32 16, i32 16, i32 16, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 16, i32 16, i32 16, i32 16, i32 16, i32 30, i32 16>
3278 ret <32 x i8> %shuffle
3281 define <32 x i8> @shuffle_v32i8_uu_14_uu_uu_00_00_00_00_00_00_00_00_00_00_00_00_16_16_uu_16_uu_uu_uu_uu_16_16_16_16_16_16_30_16(<32 x i8> %a, <32 x i8> %b) {
3282 ; AVX1-LABEL: shuffle_v32i8_uu_14_uu_uu_00_00_00_00_00_00_00_00_00_00_00_00_16_16_uu_16_uu_uu_uu_uu_16_16_16_16_16_16_30_16:
3284 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[14,14,1,1,0,0,0,0,0,0,0,0,0,0,0,0]
3285 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
3286 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,u,0,u,u,u,u,0,0,0,0,0,0,14,0]
3287 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
3290 ; AVX2OR512VL-LABEL: shuffle_v32i8_uu_14_uu_uu_00_00_00_00_00_00_00_00_00_00_00_00_16_16_uu_16_uu_uu_uu_uu_16_16_16_16_16_16_30_16:
3291 ; AVX2OR512VL: # %bb.0:
3292 ; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,14,u,u,0,0,0,0,0,0,0,0,0,0,0,0,16,16,u,16,u,u,u,u,16,16,16,16,16,16,30,16]
3293 ; AVX2OR512VL-NEXT: retq
3295 ; XOPAVX1-LABEL: shuffle_v32i8_uu_14_uu_uu_00_00_00_00_00_00_00_00_00_00_00_00_16_16_uu_16_uu_uu_uu_uu_16_16_16_16_16_16_30_16:
3297 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[14,14,1,1,0,0,0,0,0,0,0,0,0,0,0,0]
3298 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
3299 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,u,0,u,u,u,u,0,0,0,0,0,0,14,0]
3300 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
3301 ; XOPAVX1-NEXT: retq
3303 ; XOPAVX2-LABEL: shuffle_v32i8_uu_14_uu_uu_00_00_00_00_00_00_00_00_00_00_00_00_16_16_uu_16_uu_uu_uu_uu_16_16_16_16_16_16_30_16:
3305 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,14,u,u,0,0,0,0,0,0,0,0,0,0,0,0,16,16,u,16,u,u,u,u,16,16,16,16,16,16,30,16]
3306 ; XOPAVX2-NEXT: retq
3307 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 undef, i32 14, i32 undef, i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 16, i32 undef, i32 16, i32 undef, i32 undef, i32 undef, i32 undef, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 30, i32 16>
3308 ret <32 x i8> %shuffle
3311 define <32 x i8> @shuffle_v32i8_00_00_00_uu_uu_uu_04_uu_08_08_08_08_uu_uu_12_uu_28_28_28_28_uu_uu_uu_24_20_20_20_20_16_16_16_16(<32 x i8> %a, <32 x i8> %b) {
3312 ; AVX1-LABEL: shuffle_v32i8_00_00_00_uu_uu_uu_04_uu_08_08_08_08_uu_uu_12_uu_28_28_28_28_uu_uu_uu_24_20_20_20_20_16_16_16_16:
3314 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,0,0,0,4,4,4,4,8,8,8,8,12,12,12,12]
3315 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
3316 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[12,12,12,12,8,8,8,8,4,4,4,4,0,0,0,0]
3317 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
3320 ; AVX2OR512VL-LABEL: shuffle_v32i8_00_00_00_uu_uu_uu_04_uu_08_08_08_08_uu_uu_12_uu_28_28_28_28_uu_uu_uu_24_20_20_20_20_16_16_16_16:
3321 ; AVX2OR512VL: # %bb.0:
3322 ; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,u,u,u,4,u,8,8,8,8,u,u,12,u,28,28,28,28,u,u,u,24,20,20,20,20,16,16,16,16]
3323 ; AVX2OR512VL-NEXT: retq
3325 ; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_uu_uu_uu_04_uu_08_08_08_08_uu_uu_12_uu_28_28_28_28_uu_uu_uu_24_20_20_20_20_16_16_16_16:
3327 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,0,0,0,4,4,4,4,8,8,8,8,12,12,12,12]
3328 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
3329 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[12,12,12,12,8,8,8,8,4,4,4,4,0,0,0,0]
3330 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
3331 ; XOPAVX1-NEXT: retq
3333 ; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_uu_uu_uu_04_uu_08_08_08_08_uu_uu_12_uu_28_28_28_28_uu_uu_uu_24_20_20_20_20_16_16_16_16:
3335 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,u,u,u,4,u,8,8,8,8,u,u,12,u,28,28,28,28,u,u,u,24,20,20,20,20,16,16,16,16]
3336 ; XOPAVX2-NEXT: retq
3337 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 undef, i32 undef, i32 undef, i32 4, i32 undef, i32 8, i32 8, i32 8, i32 8, i32 undef, i32 undef, i32 12, i32 undef, i32 28, i32 28, i32 28, i32 28, i32 undef, i32 undef, i32 undef, i32 24, i32 20, i32 20, i32 20, i32 20, i32 16, i32 16, i32 16, i32 16>
3338 ret <32 x i8> %shuffle
3341 define <32 x i8> @shuffle_v32i8_08_08_08_08_08_08_08_08_uu_uu_uu_uu_uu_uu_uu_uu_16_16_16_uu_uu_uu_uu_uu_uu_uu_24_24_24_24_24_24(<32 x i8> %a, <32 x i8> %b) {
3342 ; AVX1-LABEL: shuffle_v32i8_08_08_08_08_08_08_08_08_uu_uu_uu_uu_uu_uu_uu_uu_16_16_16_uu_uu_uu_uu_uu_uu_uu_24_24_24_24_24_24:
3344 ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
3345 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
3346 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
3347 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,8,8,8,8,8,8,8,8,8,8,8,8]
3348 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
3351 ; AVX2OR512VL-LABEL: shuffle_v32i8_08_08_08_08_08_08_08_08_uu_uu_uu_uu_uu_uu_uu_uu_16_16_16_uu_uu_uu_uu_uu_uu_uu_24_24_24_24_24_24:
3352 ; AVX2OR512VL: # %bb.0:
3353 ; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,8,8,8,8,8,8,8,u,u,u,u,u,u,u,u,16,16,16,u,u,u,u,u,u,u,24,24,24,24,24,24]
3354 ; AVX2OR512VL-NEXT: retq
3356 ; XOPAVX1-LABEL: shuffle_v32i8_08_08_08_08_08_08_08_08_uu_uu_uu_uu_uu_uu_uu_uu_16_16_16_uu_uu_uu_uu_uu_uu_uu_24_24_24_24_24_24:
3358 ; XOPAVX1-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
3359 ; XOPAVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
3360 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
3361 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,8,8,8,8,8,8,8,8,8,8,8,8]
3362 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
3363 ; XOPAVX1-NEXT: retq
3365 ; XOPAVX2-LABEL: shuffle_v32i8_08_08_08_08_08_08_08_08_uu_uu_uu_uu_uu_uu_uu_uu_16_16_16_uu_uu_uu_uu_uu_uu_uu_24_24_24_24_24_24:
3367 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,8,8,8,8,8,8,8,u,u,u,u,u,u,u,u,16,16,16,u,u,u,u,u,u,u,24,24,24,24,24,24]
3368 ; XOPAVX2-NEXT: retq
3369 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 16, i32 16, i32 16, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24>
3370 ret <32 x i8> %shuffle
3373 define <32 x i8> @shuffle_v32i8_42_45_12_13_35_35_60_40_17_22_29_44_33_12_48_51_20_19_52_19_49_54_37_32_48_42_59_07_36_34_36_39(<32 x i8> %a, <32 x i8> %b) {
3374 ; AVX1-LABEL: shuffle_v32i8_42_45_12_13_35_35_60_40_17_22_29_44_33_12_48_51_20_19_52_19_49_54_37_32_48_42_59_07_36_34_36_39:
3376 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
3377 ; AVX1-NEXT: vpshufb {{.*#+}} xmm3 = xmm2[u,u,4,u,1,6],zero,zero,xmm2[0],zero,xmm2[11,u],zero,zero,zero,zero
3378 ; AVX1-NEXT: vpshufb {{.*#+}} xmm4 = xmm1[u,u],zero,xmm1[u],zero,zero,xmm1[5,0],zero,xmm1[10],zero,xmm1[u,4,2,4,7]
3379 ; AVX1-NEXT: vpor %xmm3, %xmm4, %xmm3
3380 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
3381 ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm5 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3],xmm4[4],xmm0[4],xmm4[5],xmm0[5],xmm4[6],xmm0[6],xmm4[7],xmm0[7]
3382 ; AVX1-NEXT: vpshufb {{.*#+}} xmm5 = xmm5[8,6,u,6,u,u,u,u,u,u,u,15,u,u,u,u]
3383 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm6 = [0,0,255,0,255,255,255,255,255,255,255,0,255,255,255,255]
3384 ; AVX1-NEXT: vpblendvb %xmm6, %xmm3, %xmm5, %xmm3
3385 ; AVX1-NEXT: vpshufb {{.*#+}} xmm2 = zero,zero,xmm2[u,u],zero,zero,xmm2[12],zero,xmm2[u,u,u],zero,zero,xmm2[u,0,3]
3386 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[10,13,u,u,3,3],zero,xmm1[8,u,u,u,12,1,u],zero,zero
3387 ; AVX1-NEXT: vpor %xmm2, %xmm1, %xmm1
3388 ; AVX1-NEXT: vpshufb {{.*#+}} xmm2 = xmm4[u,u],zero,zero,xmm4[u,u,u,u,1,6,13,u,u],zero,xmm4[u,u]
3389 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[u,u,12,13,u,u,u,u],zero,zero,zero,xmm0[u,u,12,u,u]
3390 ; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0
3391 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [255,255,0,0,255,255,255,255,0,0,0,255,255,0,255,255]
3392 ; AVX1-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
3393 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
3396 ; AVX2-LABEL: shuffle_v32i8_42_45_12_13_35_35_60_40_17_22_29_44_33_12_48_51_20_19_52_19_49_54_37_32_48_42_59_07_36_34_36_39:
3398 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,12,12,13,1,6,13,7,u,u,u,u,u,u,u,u,u,u,u,u,17,22,29,23,20,19,u,19,u,u,u,u]
3399 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <0,u,5,0,6,u,1,u>
3400 ; AVX2-NEXT: vpermd %ymm0, %ymm2, %ymm0
3401 ; AVX2-NEXT: vpshufb {{.*#+}} ymm2 = ymm1[10,13],zero,zero,ymm1[3,3],zero,ymm1[8],zero,zero,zero,ymm1[12,1],zero,zero,zero,zero,zero,ymm1[20],zero,ymm1[17,22],zero,zero,ymm1[16],zero,ymm1[27],zero,zero,zero,zero,zero
3402 ; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,3,0,1]
3403 ; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = zero,zero,zero,zero,zero,zero,ymm1[12],zero,zero,zero,zero,zero,zero,zero,ymm1[0,3],zero,zero,zero,zero,zero,zero,ymm1[21,16],zero,ymm1[26],zero,zero,ymm1[20,18,20,23]
3404 ; AVX2-NEXT: vpor %ymm1, %ymm2, %ymm1
3405 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,0,0,255,255,255,255,0,0,0,255,255,0,255,255,0,0,255,0,255,255,255,255,255,255,255,0,255,255,255,255]
3406 ; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
3409 ; AVX512VLBW-LABEL: shuffle_v32i8_42_45_12_13_35_35_60_40_17_22_29_44_33_12_48_51_20_19_52_19_49_54_37_32_48_42_59_07_36_34_36_39:
3410 ; AVX512VLBW: # %bb.0:
3411 ; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,12,12,13,1,6,13,7,u,u,u,u,u,u,u,u,u,u,u,u,17,22,29,23,20,19,u,19,u,u,u,u]
3412 ; AVX512VLBW-NEXT: vmovdqa {{.*#+}} ymm2 = <0,u,5,0,6,u,1,u>
3413 ; AVX512VLBW-NEXT: vpermd %ymm0, %ymm2, %ymm2
3414 ; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm1[10,13],zero,zero,ymm1[3,3],zero,ymm1[8],zero,zero,zero,ymm1[12,1],zero,zero,zero,zero,zero,ymm1[20],zero,ymm1[17,22],zero,zero,ymm1[16],zero,ymm1[27],zero,zero,zero,zero,zero
3415 ; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,3,0,1]
3416 ; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm1 = zero,zero,zero,zero,zero,zero,ymm1[12],zero,zero,zero,zero,zero,zero,zero,ymm1[0,3],zero,zero,zero,zero,zero,zero,ymm1[21,16],zero,ymm1[26],zero,zero,ymm1[20,18,20,23]
3417 ; AVX512VLBW-NEXT: vpor %ymm0, %ymm1, %ymm0
3418 ; AVX512VLBW-NEXT: movl $134948620, %eax # imm = 0x80B270C
3419 ; AVX512VLBW-NEXT: kmovd %eax, %k1
3420 ; AVX512VLBW-NEXT: vmovdqu8 %ymm2, %ymm0 {%k1}
3421 ; AVX512VLBW-NEXT: retq
3423 ; AVX512VLVBMI-LABEL: shuffle_v32i8_42_45_12_13_35_35_60_40_17_22_29_44_33_12_48_51_20_19_52_19_49_54_37_32_48_42_59_07_36_34_36_39:
3424 ; AVX512VLVBMI: # %bb.0:
3425 ; AVX512VLVBMI-NEXT: vmovdqa {{.*#+}} ymm2 = [10,13,44,45,3,3,28,8,49,54,61,12,1,44,16,19,52,51,20,51,17,22,5,0,16,10,27,39,4,2,4,7]
3426 ; AVX512VLVBMI-NEXT: vpermi2b %ymm0, %ymm1, %ymm2
3427 ; AVX512VLVBMI-NEXT: vmovdqa %ymm2, %ymm0
3428 ; AVX512VLVBMI-NEXT: retq
3430 ; XOPAVX1-LABEL: shuffle_v32i8_42_45_12_13_35_35_60_40_17_22_29_44_33_12_48_51_20_19_52_19_49_54_37_32_48_42_59_07_36_34_36_39:
3432 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
3433 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm3 = xmm1[u,u],xmm2[4],xmm1[u],xmm2[1,6],xmm1[5,0],xmm2[0],xmm1[10],xmm2[11],xmm1[u,4,2,4,7]
3434 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
3435 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm5 = xmm4[4,3,u,3,u,u,u,u,u,u,u],xmm0[7],xmm4[u,u,u,u]
3436 ; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm6 = [0,0,255,0,255,255,255,255,255,255,255,0,255,255,255,255]
3437 ; XOPAVX1-NEXT: vpblendvb %xmm6, %xmm3, %xmm5, %xmm3
3438 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm1 = xmm1[10,13,u,u,3,3],xmm2[12],xmm1[8,u,u,u,12,1,u],xmm2[0,3]
3439 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[u,u,12,13,u,u,u,u],xmm4[1,6,13],xmm0[u,u,12,u,u]
3440 ; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [255,255,0,0,255,255,255,255,0,0,0,255,255,0,255,255]
3441 ; XOPAVX1-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
3442 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
3443 ; XOPAVX1-NEXT: retq
3445 ; XOPAVX2-LABEL: shuffle_v32i8_42_45_12_13_35_35_60_40_17_22_29_44_33_12_48_51_20_19_52_19_49_54_37_32_48_42_59_07_36_34_36_39:
3447 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,12,12,13,1,6,13,7,u,u,u,u,u,u,u,u,u,u,u,u,17,22,29,23,20,19,u,19,u,u,u,u]
3448 ; XOPAVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <0,u,5,0,6,u,1,u>
3449 ; XOPAVX2-NEXT: vpermd %ymm0, %ymm2, %ymm0
3450 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm2 = ymm1[10,13],zero,zero,ymm1[3,3],zero,ymm1[8],zero,zero,zero,ymm1[12,1],zero,zero,zero,zero,zero,ymm1[20],zero,ymm1[17,22],zero,zero,ymm1[16],zero,ymm1[27],zero,zero,zero,zero,zero
3451 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,3,0,1]
3452 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm1 = zero,zero,zero,zero,zero,zero,ymm1[12],zero,zero,zero,zero,zero,zero,zero,ymm1[0,3],zero,zero,zero,zero,zero,zero,ymm1[21,16],zero,ymm1[26],zero,zero,ymm1[20,18,20,23]
3453 ; XOPAVX2-NEXT: vpor %ymm1, %ymm2, %ymm1
3454 ; XOPAVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,0,0,255,255,255,255,0,0,0,255,255,0,255,255,0,0,255,0,255,255,255,255,255,255,255,0,255,255,255,255]
3455 ; XOPAVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
3456 ; XOPAVX2-NEXT: retq
3457 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 42, i32 45, i32 12, i32 13, i32 35, i32 35, i32 60, i32 40, i32 17, i32 22, i32 29, i32 44, i32 33, i32 12, i32 48, i32 51, i32 20, i32 19, i32 52, i32 19, i32 49, i32 54, i32 37, i32 32, i32 48, i32 42, i32 59, i32 7, i32 36, i32 34, i32 36, i32 39>
3458 ret <32 x i8> %shuffle
3461 define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_32_32_32_32_32_32_32_32_40_40_40_40_40_40_40_40(<32 x i8> %a, <32 x i8> %b) {
3462 ; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_32_32_32_32_32_32_32_32_40_40_40_40_40_40_40_40:
3464 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8]
3465 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
3466 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
3467 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3470 ; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_32_32_32_32_32_32_32_32_40_40_40_40_40_40_40_40:
3472 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
3473 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8,16,16,16,16,16,16,16,16,24,24,24,24,24,24,24,24]
3476 ; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_32_32_32_32_32_32_32_32_40_40_40_40_40_40_40_40:
3477 ; AVX512VLBW: # %bb.0:
3478 ; AVX512VLBW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
3479 ; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8,16,16,16,16,16,16,16,16,24,24,24,24,24,24,24,24]
3480 ; AVX512VLBW-NEXT: retq
3482 ; AVX512VLVBMI-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_32_32_32_32_32_32_32_32_40_40_40_40_40_40_40_40:
3483 ; AVX512VLVBMI: # %bb.0:
3484 ; AVX512VLVBMI-NEXT: vmovdqa {{.*#+}} ymm2 = [0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8,32,32,32,32,32,32,32,32,40,40,40,40,40,40,40,40]
3485 ; AVX512VLVBMI-NEXT: vpermt2b %ymm1, %ymm2, %ymm0
3486 ; AVX512VLVBMI-NEXT: retq
3488 ; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_32_32_32_32_32_32_32_32_40_40_40_40_40_40_40_40:
3490 ; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8]
3491 ; XOPAVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
3492 ; XOPAVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
3493 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3494 ; XOPAVX1-NEXT: retq
3496 ; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_32_32_32_32_32_32_32_32_40_40_40_40_40_40_40_40:
3498 ; XOPAVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
3499 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8,16,16,16,16,16,16,16,16,24,24,24,24,24,24,24,24]
3500 ; XOPAVX2-NEXT: retq
3501 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40>
3502 ret <32 x i8> %shuffle
3505 define <32 x i8> @shuffle_v32i8_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24_32_32_32_32_32_32_32_32_40_40_40_40_40_40_40_40(<32 x i8> %a, <32 x i8> %b) {
3506 ; AVX1-LABEL: shuffle_v32i8_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24_32_32_32_32_32_32_32_32_40_40_40_40_40_40_40_40:
3508 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8]
3509 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
3510 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
3511 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
3512 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3515 ; AVX2-LABEL: shuffle_v32i8_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24_32_32_32_32_32_32_32_32_40_40_40_40_40_40_40_40:
3517 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
3518 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8,16,16,16,16,16,16,16,16,24,24,24,24,24,24,24,24]
3521 ; AVX512VLBW-LABEL: shuffle_v32i8_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24_32_32_32_32_32_32_32_32_40_40_40_40_40_40_40_40:
3522 ; AVX512VLBW: # %bb.0:
3523 ; AVX512VLBW-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
3524 ; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8,16,16,16,16,16,16,16,16,24,24,24,24,24,24,24,24]
3525 ; AVX512VLBW-NEXT: retq
3527 ; AVX512VLVBMI-LABEL: shuffle_v32i8_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24_32_32_32_32_32_32_32_32_40_40_40_40_40_40_40_40:
3528 ; AVX512VLVBMI: # %bb.0:
3529 ; AVX512VLVBMI-NEXT: vmovdqa {{.*#+}} ymm2 = [16,16,16,16,16,16,16,16,24,24,24,24,24,24,24,24,32,32,32,32,32,32,32,32,40,40,40,40,40,40,40,40]
3530 ; AVX512VLVBMI-NEXT: vpermt2b %ymm1, %ymm2, %ymm0
3531 ; AVX512VLVBMI-NEXT: retq
3533 ; XOPAVX1-LABEL: shuffle_v32i8_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24_32_32_32_32_32_32_32_32_40_40_40_40_40_40_40_40:
3535 ; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8]
3536 ; XOPAVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
3537 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
3538 ; XOPAVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
3539 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3540 ; XOPAVX1-NEXT: retq
3542 ; XOPAVX2-LABEL: shuffle_v32i8_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24_32_32_32_32_32_32_32_32_40_40_40_40_40_40_40_40:
3544 ; XOPAVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
3545 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8,16,16,16,16,16,16,16,16,24,24,24,24,24,24,24,24]
3546 ; XOPAVX2-NEXT: retq
3547 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40>
3548 ret <32 x i8> %shuffle
3551 define <32 x i8> @shuffle_v32i8_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24_48_48_48_48_48_48_48_48_56_56_56_56_56_56_56_56(<32 x i8> %a, <32 x i8> %b) {
3552 ; AVX1-LABEL: shuffle_v32i8_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24_48_48_48_48_48_48_48_48_56_56_56_56_56_56_56_56:
3554 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
3555 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8]
3556 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
3557 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
3558 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
3559 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3562 ; AVX2-LABEL: shuffle_v32i8_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24_48_48_48_48_48_48_48_48_56_56_56_56_56_56_56_56:
3564 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
3565 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8,16,16,16,16,16,16,16,16,24,24,24,24,24,24,24,24]
3568 ; AVX512VLBW-LABEL: shuffle_v32i8_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24_48_48_48_48_48_48_48_48_56_56_56_56_56_56_56_56:
3569 ; AVX512VLBW: # %bb.0:
3570 ; AVX512VLBW-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
3571 ; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8,16,16,16,16,16,16,16,16,24,24,24,24,24,24,24,24]
3572 ; AVX512VLBW-NEXT: retq
3574 ; AVX512VLVBMI-LABEL: shuffle_v32i8_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24_48_48_48_48_48_48_48_48_56_56_56_56_56_56_56_56:
3575 ; AVX512VLVBMI: # %bb.0:
3576 ; AVX512VLVBMI-NEXT: vmovdqa {{.*#+}} ymm2 = [16,16,16,16,16,16,16,16,24,24,24,24,24,24,24,24,48,48,48,48,48,48,48,48,56,56,56,56,56,56,56,56]
3577 ; AVX512VLVBMI-NEXT: vpermt2b %ymm1, %ymm2, %ymm0
3578 ; AVX512VLVBMI-NEXT: retq
3580 ; XOPAVX1-LABEL: shuffle_v32i8_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24_48_48_48_48_48_48_48_48_56_56_56_56_56_56_56_56:
3582 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
3583 ; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8]
3584 ; XOPAVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
3585 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
3586 ; XOPAVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
3587 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3588 ; XOPAVX1-NEXT: retq
3590 ; XOPAVX2-LABEL: shuffle_v32i8_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24_48_48_48_48_48_48_48_48_56_56_56_56_56_56_56_56:
3592 ; XOPAVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
3593 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8,16,16,16,16,16,16,16,16,24,24,24,24,24,24,24,24]
3594 ; XOPAVX2-NEXT: retq
3595 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56>
3596 ret <32 x i8> %shuffle
3599 define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_48_48_48_48_48_48_48_48_56_56_56_56_56_56_56_56(<32 x i8> %a, <32 x i8> %b) {
3600 ; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_48_48_48_48_48_48_48_48_56_56_56_56_56_56_56_56:
3602 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
3603 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8]
3604 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
3605 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
3606 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3609 ; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_48_48_48_48_48_48_48_48_56_56_56_56_56_56_56_56:
3611 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
3612 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8,16,16,16,16,16,16,16,16,24,24,24,24,24,24,24,24]
3615 ; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_48_48_48_48_48_48_48_48_56_56_56_56_56_56_56_56:
3616 ; AVX512VLBW: # %bb.0:
3617 ; AVX512VLBW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
3618 ; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8,16,16,16,16,16,16,16,16,24,24,24,24,24,24,24,24]
3619 ; AVX512VLBW-NEXT: retq
3621 ; AVX512VLVBMI-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_48_48_48_48_48_48_48_48_56_56_56_56_56_56_56_56:
3622 ; AVX512VLVBMI: # %bb.0:
3623 ; AVX512VLVBMI-NEXT: vmovdqa {{.*#+}} ymm2 = [0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8,48,48,48,48,48,48,48,48,56,56,56,56,56,56,56,56]
3624 ; AVX512VLVBMI-NEXT: vpermt2b %ymm1, %ymm2, %ymm0
3625 ; AVX512VLVBMI-NEXT: retq
3627 ; XOPAVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_48_48_48_48_48_48_48_48_56_56_56_56_56_56_56_56:
3629 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
3630 ; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8]
3631 ; XOPAVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
3632 ; XOPAVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
3633 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3634 ; XOPAVX1-NEXT: retq
3636 ; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_48_48_48_48_48_48_48_48_56_56_56_56_56_56_56_56:
3638 ; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
3639 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8,16,16,16,16,16,16,16,16,24,24,24,24,24,24,24,24]
3640 ; XOPAVX2-NEXT: retq
3641 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56>
3642 ret <32 x i8> %shuffle
3645 define <32 x i8> @shuffle_v32i8_00_32_01_33_02_34_03_35_04_36_05_37_06_38_07_39_08_40_09_41_10_42_11_43_12_44_13_45_14_46_15_47(<32 x i8> %a, <32 x i8> %b) {
3646 ; AVX1-LABEL: shuffle_v32i8_00_32_01_33_02_34_03_35_04_36_05_37_06_38_07_39_08_40_09_41_10_42_11_43_12_44_13_45_14_46_15_47:
3648 ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
3649 ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
3650 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
3653 ; AVX2-LABEL: shuffle_v32i8_00_32_01_33_02_34_03_35_04_36_05_37_06_38_07_39_08_40_09_41_10_42_11_43_12_44_13_45_14_46_15_47:
3655 ; AVX2-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
3656 ; AVX2-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
3657 ; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
3660 ; AVX512VLBW-LABEL: shuffle_v32i8_00_32_01_33_02_34_03_35_04_36_05_37_06_38_07_39_08_40_09_41_10_42_11_43_12_44_13_45_14_46_15_47:
3661 ; AVX512VLBW: # %bb.0:
3662 ; AVX512VLBW-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
3663 ; AVX512VLBW-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
3664 ; AVX512VLBW-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
3665 ; AVX512VLBW-NEXT: retq
3667 ; AVX512VLVBMI-LABEL: shuffle_v32i8_00_32_01_33_02_34_03_35_04_36_05_37_06_38_07_39_08_40_09_41_10_42_11_43_12_44_13_45_14_46_15_47:
3668 ; AVX512VLVBMI: # %bb.0:
3669 ; AVX512VLVBMI-NEXT: vmovdqa {{.*#+}} ymm2 = [0,32,1,33,2,34,3,35,4,36,5,37,6,38,7,39,8,40,9,41,10,42,11,43,12,44,13,45,14,46,15,47]
3670 ; AVX512VLVBMI-NEXT: vpermt2b %ymm1, %ymm2, %ymm0
3671 ; AVX512VLVBMI-NEXT: retq
3673 ; XOPAVX1-LABEL: shuffle_v32i8_00_32_01_33_02_34_03_35_04_36_05_37_06_38_07_39_08_40_09_41_10_42_11_43_12_44_13_45_14_46_15_47:
3675 ; XOPAVX1-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
3676 ; XOPAVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
3677 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
3678 ; XOPAVX1-NEXT: retq
3680 ; XOPAVX2-LABEL: shuffle_v32i8_00_32_01_33_02_34_03_35_04_36_05_37_06_38_07_39_08_40_09_41_10_42_11_43_12_44_13_45_14_46_15_47:
3682 ; XOPAVX2-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
3683 ; XOPAVX2-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
3684 ; XOPAVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
3685 ; XOPAVX2-NEXT: retq
3686 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 8, i32 40, i32 9, i32 41, i32 10, i32 42, i32 11, i32 43, i32 12, i32 44, i32 13, i32 45, i32 14, i32 46, i32 15, i32 47>
3687 ret <32 x i8> %shuffle
3690 define <32 x i8> @shuffle_v32i8_15_14_13_12_11_10_09_08_07_06_05_04_03_02_01_00_32_34_36_38_40_42_44_46_33_35_37_39_41_43_45_47(<32 x i8> %a, <32 x i8> %b) {
3691 ; AVX1-LABEL: shuffle_v32i8_15_14_13_12_11_10_09_08_07_06_05_04_03_02_01_00_32_34_36_38_40_42_44_46_33_35_37_39_41_43_45_47:
3693 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
3694 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,2,4,6,8,10,12,14,1,3,5,7,9,11,13,15]
3695 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3698 ; AVX2-LABEL: shuffle_v32i8_15_14_13_12_11_10_09_08_07_06_05_04_03_02_01_00_32_34_36_38_40_42_44_46_33_35_37_39_41_43_45_47:
3700 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
3701 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,16,18,20,22,24,26,28,30,17,19,21,23,25,27,29,31]
3704 ; AVX512VLBW-LABEL: shuffle_v32i8_15_14_13_12_11_10_09_08_07_06_05_04_03_02_01_00_32_34_36_38_40_42_44_46_33_35_37_39_41_43_45_47:
3705 ; AVX512VLBW: # %bb.0:
3706 ; AVX512VLBW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
3707 ; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,16,18,20,22,24,26,28,30,17,19,21,23,25,27,29,31]
3708 ; AVX512VLBW-NEXT: retq
3710 ; AVX512VLVBMI-LABEL: shuffle_v32i8_15_14_13_12_11_10_09_08_07_06_05_04_03_02_01_00_32_34_36_38_40_42_44_46_33_35_37_39_41_43_45_47:
3711 ; AVX512VLVBMI: # %bb.0:
3712 ; AVX512VLVBMI-NEXT: vmovdqa {{.*#+}} ymm2 = [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,32,34,36,38,40,42,44,46,33,35,37,39,41,43,45,47]
3713 ; AVX512VLVBMI-NEXT: vpermt2b %ymm1, %ymm2, %ymm0
3714 ; AVX512VLVBMI-NEXT: retq
3716 ; XOPAVX1-LABEL: shuffle_v32i8_15_14_13_12_11_10_09_08_07_06_05_04_03_02_01_00_32_34_36_38_40_42_44_46_33_35_37_39_41_43_45_47:
3718 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
3719 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[0,2,4,6,8,10,12,14,1,3,5,7,9,11,13,15]
3720 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3721 ; XOPAVX1-NEXT: retq
3723 ; XOPAVX2-LABEL: shuffle_v32i8_15_14_13_12_11_10_09_08_07_06_05_04_03_02_01_00_32_34_36_38_40_42_44_46_33_35_37_39_41_43_45_47:
3725 ; XOPAVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
3726 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,16,18,20,22,24,26,28,30,17,19,21,23,25,27,29,31]
3727 ; XOPAVX2-NEXT: retq
3728 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 32, i32 34, i32 36, i32 38, i32 40, i32 42, i32 44, i32 46, i32 33, i32 35, i32 37, i32 39, i32 41, i32 43, i32 45, i32 47>
3729 ret <32 x i8> %shuffle
3732 define <32 x i8> @shuffle_v32i8_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_32_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_48(<32 x i8> %a) {
3733 ; AVX1-LABEL: shuffle_v32i8_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_32_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_48:
3735 ; AVX1-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0]
3736 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
3737 ; AVX1-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0]
3738 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
3741 ; AVX2OR512VL-LABEL: shuffle_v32i8_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_32_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_48:
3742 ; AVX2OR512VL: # %bb.0:
3743 ; AVX2OR512VL-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16]
3744 ; AVX2OR512VL-NEXT: retq
3746 ; XOPAVX1-LABEL: shuffle_v32i8_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_32_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_48:
3748 ; XOPAVX1-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0]
3749 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
3750 ; XOPAVX1-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0]
3751 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
3752 ; XOPAVX1-NEXT: retq
3754 ; XOPAVX2-LABEL: shuffle_v32i8_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_32_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_48:
3756 ; XOPAVX2-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16]
3757 ; XOPAVX2-NEXT: retq
3758 %shuffle = shufflevector <32 x i8> zeroinitializer, <32 x i8> %a, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 32, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 48>
3759 ret <32 x i8> %shuffle
3762 define <32 x i8> @shuffle_v32i8_47_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_63_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz(<32 x i8> %a) {
3763 ; AVX1-LABEL: shuffle_v32i8_47_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_63_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
3765 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm1 = xmm0[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
3766 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
3767 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
3768 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
3771 ; AVX2OR512VL-LABEL: shuffle_v32i8_47_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_63_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
3772 ; AVX2OR512VL: # %bb.0:
3773 ; AVX2OR512VL-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[31],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
3774 ; AVX2OR512VL-NEXT: retq
3776 ; XOPAVX1-LABEL: shuffle_v32i8_47_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_63_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
3778 ; XOPAVX1-NEXT: vpsrldq {{.*#+}} xmm1 = xmm0[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
3779 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
3780 ; XOPAVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
3781 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
3782 ; XOPAVX1-NEXT: retq
3784 ; XOPAVX2-LABEL: shuffle_v32i8_47_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_63_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
3786 ; XOPAVX2-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[31],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
3787 ; XOPAVX2-NEXT: retq
3788 %shuffle = shufflevector <32 x i8> zeroinitializer, <32 x i8> %a, <32 x i32> <i32 47, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 63, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
3789 ret <32 x i8> %shuffle
3793 ; Shuffle to logical bit shifts
3796 define <32 x i8> @shuffle_v32i8_zz_00_zz_02_zz_04_zz_06_zz_08_zz_10_zz_12_zz_14_zz_16_zz_18_zz_20_zz_22_zz_24_zz_26_zz_28_zz_30(<32 x i8> %a) {
3797 ; AVX1-LABEL: shuffle_v32i8_zz_00_zz_02_zz_04_zz_06_zz_08_zz_10_zz_12_zz_14_zz_16_zz_18_zz_20_zz_22_zz_24_zz_26_zz_28_zz_30:
3799 ; AVX1-NEXT: vpsllw $8, %xmm0, %xmm1
3800 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
3801 ; AVX1-NEXT: vpsllw $8, %xmm0, %xmm0
3802 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
3805 ; AVX2OR512VL-LABEL: shuffle_v32i8_zz_00_zz_02_zz_04_zz_06_zz_08_zz_10_zz_12_zz_14_zz_16_zz_18_zz_20_zz_22_zz_24_zz_26_zz_28_zz_30:
3806 ; AVX2OR512VL: # %bb.0:
3807 ; AVX2OR512VL-NEXT: vpsllw $8, %ymm0, %ymm0
3808 ; AVX2OR512VL-NEXT: retq
3810 ; XOPAVX1-LABEL: shuffle_v32i8_zz_00_zz_02_zz_04_zz_06_zz_08_zz_10_zz_12_zz_14_zz_16_zz_18_zz_20_zz_22_zz_24_zz_26_zz_28_zz_30:
3812 ; XOPAVX1-NEXT: vpsllw $8, %xmm0, %xmm1
3813 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
3814 ; XOPAVX1-NEXT: vpsllw $8, %xmm0, %xmm0
3815 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
3816 ; XOPAVX1-NEXT: retq
3818 ; XOPAVX2-LABEL: shuffle_v32i8_zz_00_zz_02_zz_04_zz_06_zz_08_zz_10_zz_12_zz_14_zz_16_zz_18_zz_20_zz_22_zz_24_zz_26_zz_28_zz_30:
3820 ; XOPAVX2-NEXT: vpsllw $8, %ymm0, %ymm0
3821 ; XOPAVX2-NEXT: retq
3822 %shuffle = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 32, i32 0, i32 32, i32 2, i32 32, i32 4, i32 32, i32 6, i32 32, i32 8, i32 32, i32 10, i32 32, i32 12, i32 32, i32 14, i32 32, i32 16, i32 32, i32 18, i32 32, i32 20, i32 32, i32 22, i32 32, i32 24, i32 32, i32 26, i32 32, i32 28, i32 32, i32 30>
3823 ret <32 x i8> %shuffle
3826 define <32 x i8> @shuffle_v32i8_zz_zz_00_01_zz_zz_04_05_zz_zz_08_09_zz_zz_12_13_zz_zz_16_17_zz_zz_20_21_zz_zz_24_25_zz_zz_28_29(<32 x i8> %a) {
3827 ; AVX1-LABEL: shuffle_v32i8_zz_zz_00_01_zz_zz_04_05_zz_zz_08_09_zz_zz_12_13_zz_zz_16_17_zz_zz_20_21_zz_zz_24_25_zz_zz_28_29:
3829 ; AVX1-NEXT: vpslld $16, %xmm0, %xmm1
3830 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
3831 ; AVX1-NEXT: vpslld $16, %xmm0, %xmm0
3832 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
3835 ; AVX2OR512VL-LABEL: shuffle_v32i8_zz_zz_00_01_zz_zz_04_05_zz_zz_08_09_zz_zz_12_13_zz_zz_16_17_zz_zz_20_21_zz_zz_24_25_zz_zz_28_29:
3836 ; AVX2OR512VL: # %bb.0:
3837 ; AVX2OR512VL-NEXT: vpslld $16, %ymm0, %ymm0
3838 ; AVX2OR512VL-NEXT: retq
3840 ; XOPAVX1-LABEL: shuffle_v32i8_zz_zz_00_01_zz_zz_04_05_zz_zz_08_09_zz_zz_12_13_zz_zz_16_17_zz_zz_20_21_zz_zz_24_25_zz_zz_28_29:
3842 ; XOPAVX1-NEXT: vpslld $16, %xmm0, %xmm1
3843 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
3844 ; XOPAVX1-NEXT: vpslld $16, %xmm0, %xmm0
3845 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
3846 ; XOPAVX1-NEXT: retq
3848 ; XOPAVX2-LABEL: shuffle_v32i8_zz_zz_00_01_zz_zz_04_05_zz_zz_08_09_zz_zz_12_13_zz_zz_16_17_zz_zz_20_21_zz_zz_24_25_zz_zz_28_29:
3850 ; XOPAVX2-NEXT: vpslld $16, %ymm0, %ymm0
3851 ; XOPAVX2-NEXT: retq
3852 %shuffle = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 32, i32 32, i32 0, i32 1, i32 32, i32 32, i32 4, i32 5, i32 32, i32 32, i32 8, i32 9, i32 32, i32 32, i32 12, i32 13, i32 32, i32 32, i32 16, i32 17, i32 32, i32 32, i32 20, i32 21, i32 32, i32 32, i32 24, i32 25, i32 32, i32 32, i32 28, i32 29>
3853 ret <32 x i8> %shuffle
3856 define <32 x i8> @shuffle_v32i8_zz_zz_zz_zz_zz_zz_00_01_zz_zz_zz_zz_zz_zz_08_09_zz_zz_zz_zz_zz_zz_16_17_zz_zz_zz_zz_zz_zz_24_25(<32 x i8> %a) {
3857 ; AVX1-LABEL: shuffle_v32i8_zz_zz_zz_zz_zz_zz_00_01_zz_zz_zz_zz_zz_zz_08_09_zz_zz_zz_zz_zz_zz_16_17_zz_zz_zz_zz_zz_zz_24_25:
3859 ; AVX1-NEXT: vpsllq $48, %xmm0, %xmm1
3860 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
3861 ; AVX1-NEXT: vpsllq $48, %xmm0, %xmm0
3862 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
3865 ; AVX2OR512VL-LABEL: shuffle_v32i8_zz_zz_zz_zz_zz_zz_00_01_zz_zz_zz_zz_zz_zz_08_09_zz_zz_zz_zz_zz_zz_16_17_zz_zz_zz_zz_zz_zz_24_25:
3866 ; AVX2OR512VL: # %bb.0:
3867 ; AVX2OR512VL-NEXT: vpsllq $48, %ymm0, %ymm0
3868 ; AVX2OR512VL-NEXT: retq
3870 ; XOPAVX1-LABEL: shuffle_v32i8_zz_zz_zz_zz_zz_zz_00_01_zz_zz_zz_zz_zz_zz_08_09_zz_zz_zz_zz_zz_zz_16_17_zz_zz_zz_zz_zz_zz_24_25:
3872 ; XOPAVX1-NEXT: vpsllq $48, %xmm0, %xmm1
3873 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
3874 ; XOPAVX1-NEXT: vpsllq $48, %xmm0, %xmm0
3875 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
3876 ; XOPAVX1-NEXT: retq
3878 ; XOPAVX2-LABEL: shuffle_v32i8_zz_zz_zz_zz_zz_zz_00_01_zz_zz_zz_zz_zz_zz_08_09_zz_zz_zz_zz_zz_zz_16_17_zz_zz_zz_zz_zz_zz_24_25:
3880 ; XOPAVX2-NEXT: vpsllq $48, %ymm0, %ymm0
3881 ; XOPAVX2-NEXT: retq
3882 %shuffle = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 0, i32 1, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 8, i32 9, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 16, i32 17, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 24, i32 25>
3883 ret <32 x i8> %shuffle
3886 define <32 x i8> @shuffle_v32i8_01_zz_03_zz_05_zz_07_zz_09_zz_11_zz_13_zz_15_zz_17_zz_19_zz_21_zz_23_zz_25_zz_27_zz_29_zz_31_zz(<32 x i8> %a) {
3887 ; AVX1-LABEL: shuffle_v32i8_01_zz_03_zz_05_zz_07_zz_09_zz_11_zz_13_zz_15_zz_17_zz_19_zz_21_zz_23_zz_25_zz_27_zz_29_zz_31_zz:
3889 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
3890 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
3891 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0
3892 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
3895 ; AVX2OR512VL-LABEL: shuffle_v32i8_01_zz_03_zz_05_zz_07_zz_09_zz_11_zz_13_zz_15_zz_17_zz_19_zz_21_zz_23_zz_25_zz_27_zz_29_zz_31_zz:
3896 ; AVX2OR512VL: # %bb.0:
3897 ; AVX2OR512VL-NEXT: vpsrlw $8, %ymm0, %ymm0
3898 ; AVX2OR512VL-NEXT: retq
3900 ; XOPAVX1-LABEL: shuffle_v32i8_01_zz_03_zz_05_zz_07_zz_09_zz_11_zz_13_zz_15_zz_17_zz_19_zz_21_zz_23_zz_25_zz_27_zz_29_zz_31_zz:
3902 ; XOPAVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
3903 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
3904 ; XOPAVX1-NEXT: vpsrlw $8, %xmm0, %xmm0
3905 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
3906 ; XOPAVX1-NEXT: retq
3908 ; XOPAVX2-LABEL: shuffle_v32i8_01_zz_03_zz_05_zz_07_zz_09_zz_11_zz_13_zz_15_zz_17_zz_19_zz_21_zz_23_zz_25_zz_27_zz_29_zz_31_zz:
3910 ; XOPAVX2-NEXT: vpsrlw $8, %ymm0, %ymm0
3911 ; XOPAVX2-NEXT: retq
3912 %shuffle = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 1, i32 32, i32 3, i32 32, i32 5, i32 32, i32 7, i32 32, i32 9, i32 32, i32 11, i32 32, i32 13, i32 32, i32 15, i32 32, i32 17, i32 32, i32 19, i32 32, i32 21, i32 32, i32 23, i32 32, i32 25, i32 32, i32 27, i32 32, i32 29, i32 32, i32 31, i32 32>
3913 ret <32 x i8> %shuffle
3916 define <32 x i8> @shuffle_v32i8_02_03_zz_zz_06_07_zz_zz_10_11_zz_zz_14_15_zz_zz_18_19_zz_zz_22_23_zz_zz_26_27_zz_zz_30_31_zz_zz(<32 x i8> %a) {
3917 ; AVX1-LABEL: shuffle_v32i8_02_03_zz_zz_06_07_zz_zz_10_11_zz_zz_14_15_zz_zz_18_19_zz_zz_22_23_zz_zz_26_27_zz_zz_30_31_zz_zz:
3919 ; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
3920 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
3921 ; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0
3922 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
3925 ; AVX2OR512VL-LABEL: shuffle_v32i8_02_03_zz_zz_06_07_zz_zz_10_11_zz_zz_14_15_zz_zz_18_19_zz_zz_22_23_zz_zz_26_27_zz_zz_30_31_zz_zz:
3926 ; AVX2OR512VL: # %bb.0:
3927 ; AVX2OR512VL-NEXT: vpsrld $16, %ymm0, %ymm0
3928 ; AVX2OR512VL-NEXT: retq
3930 ; XOPAVX1-LABEL: shuffle_v32i8_02_03_zz_zz_06_07_zz_zz_10_11_zz_zz_14_15_zz_zz_18_19_zz_zz_22_23_zz_zz_26_27_zz_zz_30_31_zz_zz:
3932 ; XOPAVX1-NEXT: vpsrld $16, %xmm0, %xmm1
3933 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
3934 ; XOPAVX1-NEXT: vpsrld $16, %xmm0, %xmm0
3935 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
3936 ; XOPAVX1-NEXT: retq
3938 ; XOPAVX2-LABEL: shuffle_v32i8_02_03_zz_zz_06_07_zz_zz_10_11_zz_zz_14_15_zz_zz_18_19_zz_zz_22_23_zz_zz_26_27_zz_zz_30_31_zz_zz:
3940 ; XOPAVX2-NEXT: vpsrld $16, %ymm0, %ymm0
3941 ; XOPAVX2-NEXT: retq
3942 %shuffle = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 2, i32 3, i32 32, i32 32, i32 6, i32 7, i32 32, i32 32, i32 10, i32 11, i32 32, i32 32, i32 14, i32 15, i32 32, i32 32, i32 18, i32 19, i32 32, i32 32, i32 22, i32 23, i32 32, i32 32, i32 26, i32 27, i32 32, i32 32, i32 30, i32 31, i32 32, i32 32>
3943 ret <32 x i8> %shuffle
3946 define <32 x i8> @shuffle_v32i8_07_zz_zz_zz_zz_zz_zz_zz_15_zz_zz_zz_zz_z_zz_zz_23_zz_zz_zz_zz_zz_zz_zz_31_zz_zz_zz_zz_zz_zz_zz(<32 x i8> %a) {
3947 ; AVX1-LABEL: shuffle_v32i8_07_zz_zz_zz_zz_zz_zz_zz_15_zz_zz_zz_zz_z_zz_zz_23_zz_zz_zz_zz_zz_zz_zz_31_zz_zz_zz_zz_zz_zz_zz:
3949 ; AVX1-NEXT: vpsrlq $56, %xmm0, %xmm1
3950 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
3951 ; AVX1-NEXT: vpsrlq $56, %xmm0, %xmm0
3952 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
3955 ; AVX2OR512VL-LABEL: shuffle_v32i8_07_zz_zz_zz_zz_zz_zz_zz_15_zz_zz_zz_zz_z_zz_zz_23_zz_zz_zz_zz_zz_zz_zz_31_zz_zz_zz_zz_zz_zz_zz:
3956 ; AVX2OR512VL: # %bb.0:
3957 ; AVX2OR512VL-NEXT: vpsrlq $56, %ymm0, %ymm0
3958 ; AVX2OR512VL-NEXT: retq
3960 ; XOPAVX1-LABEL: shuffle_v32i8_07_zz_zz_zz_zz_zz_zz_zz_15_zz_zz_zz_zz_z_zz_zz_23_zz_zz_zz_zz_zz_zz_zz_31_zz_zz_zz_zz_zz_zz_zz:
3962 ; XOPAVX1-NEXT: vpsrlq $56, %xmm0, %xmm1
3963 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
3964 ; XOPAVX1-NEXT: vpsrlq $56, %xmm0, %xmm0
3965 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
3966 ; XOPAVX1-NEXT: retq
3968 ; XOPAVX2-LABEL: shuffle_v32i8_07_zz_zz_zz_zz_zz_zz_zz_15_zz_zz_zz_zz_z_zz_zz_23_zz_zz_zz_zz_zz_zz_zz_31_zz_zz_zz_zz_zz_zz_zz:
3970 ; XOPAVX2-NEXT: vpsrlq $56, %ymm0, %ymm0
3971 ; XOPAVX2-NEXT: retq
3972 %shuffle = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 7, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 15, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 23, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32>
3973 ret <32 x i8> %shuffle
3976 define <32 x i8> @shuffle_v32i8_32_zz_zz_zz_zz_zz_zz_zz_33_zz_zz_zz_zz_zz_zz_zz_34_zz_zz_zz_zz_zz_zz_zz_35_zz_zz_zz_zz_zz_zz_zz(<32 x i8> %a) {
3977 ; AVX1-LABEL: shuffle_v32i8_32_zz_zz_zz_zz_zz_zz_zz_33_zz_zz_zz_zz_zz_zz_zz_34_zz_zz_zz_zz_zz_zz_zz_35_zz_zz_zz_zz_zz_zz_zz:
3979 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
3980 ; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0
3981 ; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
3982 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
3985 ; AVX2OR512VL-LABEL: shuffle_v32i8_32_zz_zz_zz_zz_zz_zz_zz_33_zz_zz_zz_zz_zz_zz_zz_34_zz_zz_zz_zz_zz_zz_zz_35_zz_zz_zz_zz_zz_zz_zz:
3986 ; AVX2OR512VL: # %bb.0:
3987 ; AVX2OR512VL-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
3988 ; AVX2OR512VL-NEXT: retq
3990 ; XOPAVX1-LABEL: shuffle_v32i8_32_zz_zz_zz_zz_zz_zz_zz_33_zz_zz_zz_zz_zz_zz_zz_34_zz_zz_zz_zz_zz_zz_zz_35_zz_zz_zz_zz_zz_zz_zz:
3992 ; XOPAVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
3993 ; XOPAVX1-NEXT: vpsrld $16, %xmm0, %xmm0
3994 ; XOPAVX1-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
3995 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
3996 ; XOPAVX1-NEXT: retq
3998 ; XOPAVX2-LABEL: shuffle_v32i8_32_zz_zz_zz_zz_zz_zz_zz_33_zz_zz_zz_zz_zz_zz_zz_34_zz_zz_zz_zz_zz_zz_zz_35_zz_zz_zz_zz_zz_zz_zz:
4000 ; XOPAVX2-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
4001 ; XOPAVX2-NEXT: retq
4002 %shuffle = shufflevector <32 x i8> zeroinitializer, <32 x i8> %a, <32 x i32> <i32 32, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 33, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 34, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 35, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
4003 ret <32 x i8> %shuffle
4006 define <32 x i8> @shuffle_v32i8_32_zz_zz_zz_33_zz_zz_zz_34_zz_zz_zz_35_zz_zz_zz_36_zz_zz_zz_37_zz_zz_zz_38_zz_zz_zz_39_zz_zz_zz(<32 x i8> %a) {
4007 ; AVX1-LABEL: shuffle_v32i8_32_zz_zz_zz_33_zz_zz_zz_34_zz_zz_zz_35_zz_zz_zz_36_zz_zz_zz_37_zz_zz_zz_38_zz_zz_zz_39_zz_zz_zz:
4009 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
4010 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
4011 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
4012 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
4015 ; AVX2OR512VL-LABEL: shuffle_v32i8_32_zz_zz_zz_33_zz_zz_zz_34_zz_zz_zz_35_zz_zz_zz_36_zz_zz_zz_37_zz_zz_zz_38_zz_zz_zz_39_zz_zz_zz:
4016 ; AVX2OR512VL: # %bb.0:
4017 ; AVX2OR512VL-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
4018 ; AVX2OR512VL-NEXT: retq
4020 ; XOPAVX1-LABEL: shuffle_v32i8_32_zz_zz_zz_33_zz_zz_zz_34_zz_zz_zz_35_zz_zz_zz_36_zz_zz_zz_37_zz_zz_zz_38_zz_zz_zz_39_zz_zz_zz:
4022 ; XOPAVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
4023 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
4024 ; XOPAVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
4025 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
4026 ; XOPAVX1-NEXT: retq
4028 ; XOPAVX2-LABEL: shuffle_v32i8_32_zz_zz_zz_33_zz_zz_zz_34_zz_zz_zz_35_zz_zz_zz_36_zz_zz_zz_37_zz_zz_zz_38_zz_zz_zz_39_zz_zz_zz:
4030 ; XOPAVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
4031 ; XOPAVX2-NEXT: retq
4032 %shuffle = shufflevector <32 x i8> zeroinitializer, <32 x i8> %a, <32 x i32> <i32 32, i32 0, i32 0, i32 0, i32 33, i32 0, i32 0, i32 0, i32 34, i32 0, i32 0, i32 0, i32 35, i32 0, i32 0, i32 0, i32 36, i32 0, i32 0, i32 0, i32 37, i32 0, i32 0, i32 0, i32 38, i32 0, i32 0, i32 0, i32 39, i32 0, i32 0, i32 0>
4033 ret <32 x i8> %shuffle
4036 define <32 x i8> @shuffle_v32i8_32_zz_33_zz_34_zz_35_zz_36_zz_37_zz_38_zz_39_zz_40_zz_41_zz_42_zz_43_zz_44_zz_45_zz_46_zz_47_zz(<32 x i8> %a) {
4037 ; AVX1-LABEL: shuffle_v32i8_32_zz_33_zz_34_zz_35_zz_36_zz_37_zz_38_zz_39_zz_40_zz_41_zz_42_zz_43_zz_44_zz_45_zz_46_zz_47_zz:
4039 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
4040 ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
4041 ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
4042 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
4045 ; AVX2OR512VL-LABEL: shuffle_v32i8_32_zz_33_zz_34_zz_35_zz_36_zz_37_zz_38_zz_39_zz_40_zz_41_zz_42_zz_43_zz_44_zz_45_zz_46_zz_47_zz:
4046 ; AVX2OR512VL: # %bb.0:
4047 ; AVX2OR512VL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
4048 ; AVX2OR512VL-NEXT: retq
4050 ; XOPAVX1-LABEL: shuffle_v32i8_32_zz_33_zz_34_zz_35_zz_36_zz_37_zz_38_zz_39_zz_40_zz_41_zz_42_zz_43_zz_44_zz_45_zz_46_zz_47_zz:
4052 ; XOPAVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
4053 ; XOPAVX1-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
4054 ; XOPAVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
4055 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
4056 ; XOPAVX1-NEXT: retq
4058 ; XOPAVX2-LABEL: shuffle_v32i8_32_zz_33_zz_34_zz_35_zz_36_zz_37_zz_38_zz_39_zz_40_zz_41_zz_42_zz_43_zz_44_zz_45_zz_46_zz_47_zz:
4060 ; XOPAVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
4061 ; XOPAVX2-NEXT: retq
4062 %shuffle = shufflevector <32 x i8> zeroinitializer, <32 x i8> %a, <32 x i32> <i32 32, i32 0, i32 33, i32 0, i32 34, i32 0, i32 35, i32 0, i32 36, i32 0, i32 37, i32 0, i32 38, i32 0, i32 39, i32 0, i32 40, i32 0, i32 41, i32 0, i32 42, i32 0, i32 43, i32 0, i32 44, i32 0, i32 45, i32 0, i32 46, i32 0, i32 47, i32 0>
4063 ret <32 x i8> %shuffle
4066 define <32 x i8> @shuffle_v32i8_56_zz_zz_zz_57_zz_zz_zz_58_zz_zz_zz__zz_59_zz_zz_zz_60_zz_zz_zz_61_zz_zz_zz_62_zz_zz_zz_63_zz_zz_zz(<32 x i8> %a) {
4067 ; AVX1-LABEL: shuffle_v32i8_56_zz_zz_zz_57_zz_zz_zz_58_zz_zz_zz__zz_59_zz_zz_zz_60_zz_zz_zz_61_zz_zz_zz_62_zz_zz_zz_63_zz_zz_zz:
4069 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
4070 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
4071 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
4072 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
4073 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
4074 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
4077 ; AVX2-LABEL: shuffle_v32i8_56_zz_zz_zz_57_zz_zz_zz_58_zz_zz_zz__zz_59_zz_zz_zz_60_zz_zz_zz_61_zz_zz_zz_62_zz_zz_zz_63_zz_zz_zz:
4079 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
4080 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8],zero,zero,zero,ymm0[9],zero,zero,zero,ymm0[10],zero,zero,zero,ymm0[11],zero,zero,zero,ymm0[28],zero,zero,zero,ymm0[29],zero,zero,zero,ymm0[30],zero,zero,zero,ymm0[31],zero,zero,zero
4083 ; AVX512VLBW-LABEL: shuffle_v32i8_56_zz_zz_zz_57_zz_zz_zz_58_zz_zz_zz__zz_59_zz_zz_zz_60_zz_zz_zz_61_zz_zz_zz_62_zz_zz_zz_63_zz_zz_zz:
4084 ; AVX512VLBW: # %bb.0:
4085 ; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
4086 ; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8],zero,zero,zero,ymm0[9],zero,zero,zero,ymm0[10],zero,zero,zero,ymm0[11],zero,zero,zero,ymm0[28],zero,zero,zero,ymm0[29],zero,zero,zero,ymm0[30],zero,zero,zero,ymm0[31],zero,zero,zero
4087 ; AVX512VLBW-NEXT: retq
4089 ; AVX512VLVBMI-LABEL: shuffle_v32i8_56_zz_zz_zz_57_zz_zz_zz_58_zz_zz_zz__zz_59_zz_zz_zz_60_zz_zz_zz_61_zz_zz_zz_62_zz_zz_zz_63_zz_zz_zz:
4090 ; AVX512VLVBMI: # %bb.0:
4091 ; AVX512VLVBMI-NEXT: vmovdqa {{.*#+}} ymm2 = [56,1,2,3,57,5,6,7,58,9,10,11,59,13,14,15,60,17,18,19,61,21,22,23,62,25,26,27,63,29,30,31]
4092 ; AVX512VLVBMI-NEXT: vpxor %xmm1, %xmm1, %xmm1
4093 ; AVX512VLVBMI-NEXT: vpermt2b %ymm0, %ymm2, %ymm1
4094 ; AVX512VLVBMI-NEXT: vmovdqa %ymm1, %ymm0
4095 ; AVX512VLVBMI-NEXT: retq
4097 ; XOPAVX1-LABEL: shuffle_v32i8_56_zz_zz_zz_57_zz_zz_zz_58_zz_zz_zz__zz_59_zz_zz_zz_60_zz_zz_zz_61_zz_zz_zz_62_zz_zz_zz_63_zz_zz_zz:
4099 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
4100 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
4101 ; XOPAVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
4102 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
4103 ; XOPAVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
4104 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
4105 ; XOPAVX1-NEXT: retq
4107 ; XOPAVX2-LABEL: shuffle_v32i8_56_zz_zz_zz_57_zz_zz_zz_58_zz_zz_zz__zz_59_zz_zz_zz_60_zz_zz_zz_61_zz_zz_zz_62_zz_zz_zz_63_zz_zz_zz:
4109 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
4110 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8],zero,zero,zero,ymm0[9],zero,zero,zero,ymm0[10],zero,zero,zero,ymm0[11],zero,zero,zero,ymm0[28],zero,zero,zero,ymm0[29],zero,zero,zero,ymm0[30],zero,zero,zero,ymm0[31],zero,zero,zero
4111 ; XOPAVX2-NEXT: retq
4112 %shuffle = shufflevector <32 x i8> zeroinitializer, <32 x i8> %a, <32 x i32> <i32 56, i32 1, i32 2, i32 3, i32 57, i32 5, i32 6, i32 7, i32 58, i32 9, i32 10, i32 11, i32 59, i32 13, i32 14, i32 15, i32 60, i32 17, i32 18, i32 19, i32 61, i32 21, i32 22, i32 23, i32 62, i32 25, i32 26, i32 27, i32 63, i32 29, i32 30, i32 31>
4113 ret <32 x i8> %shuffle
4116 define <32 x i8> @shuffle_v32i8_47_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_63_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30(<32 x i8> %a, <32 x i8> %b) {
4117 ; AVX1-LABEL: shuffle_v32i8_47_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_63_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30:
4119 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
4120 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
4121 ; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[15],xmm3[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
4122 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
4123 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
4126 ; AVX2OR512VL-LABEL: shuffle_v32i8_47_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_63_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30:
4127 ; AVX2OR512VL: # %bb.0:
4128 ; AVX2OR512VL-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],ymm1[31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30]
4129 ; AVX2OR512VL-NEXT: retq
4131 ; XOPAVX1-LABEL: shuffle_v32i8_47_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_63_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30:
4133 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
4134 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
4135 ; XOPAVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[15],xmm3[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
4136 ; XOPAVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
4137 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
4138 ; XOPAVX1-NEXT: retq
4140 ; XOPAVX2-LABEL: shuffle_v32i8_47_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_63_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30:
4142 ; XOPAVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],ymm1[31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30]
4143 ; XOPAVX2-NEXT: retq
4144 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 47, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 63, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
4145 ret <32 x i8> %shuffle
4148 define <32 x i8> @shuffle_v32i8_uu_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_63_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30(<32 x i8> %a, <32 x i8> %b) {
4149 ; AVX1-LABEL: shuffle_v32i8_uu_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_63_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30:
4151 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
4152 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
4153 ; AVX1-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[15],xmm2[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
4154 ; AVX1-NEXT: vpslldq {{.*#+}} xmm0 = zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
4155 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
4158 ; AVX2OR512VL-LABEL: shuffle_v32i8_uu_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_63_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30:
4159 ; AVX2OR512VL: # %bb.0:
4160 ; AVX2OR512VL-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],ymm1[31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30]
4161 ; AVX2OR512VL-NEXT: retq
4163 ; XOPAVX1-LABEL: shuffle_v32i8_uu_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_63_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30:
4165 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
4166 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
4167 ; XOPAVX1-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[15],xmm2[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
4168 ; XOPAVX1-NEXT: vpslldq {{.*#+}} xmm0 = zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
4169 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
4170 ; XOPAVX1-NEXT: retq
4172 ; XOPAVX2-LABEL: shuffle_v32i8_uu_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_63_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30:
4174 ; XOPAVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],ymm1[31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30]
4175 ; XOPAVX2-NEXT: retq
4176 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 63, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
4177 ret <32 x i8> %shuffle
4180 define <32 x i8> @shuffle_v32i8_47_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_uu_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30(<32 x i8> %a, <32 x i8> %b) {
4181 ; AVX1-LABEL: shuffle_v32i8_47_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_uu_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30:
4183 ; AVX1-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
4184 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
4185 ; AVX1-NEXT: vpslldq {{.*#+}} xmm0 = zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
4186 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
4189 ; AVX2OR512VL-LABEL: shuffle_v32i8_47_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_uu_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30:
4190 ; AVX2OR512VL: # %bb.0:
4191 ; AVX2OR512VL-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],ymm1[31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30]
4192 ; AVX2OR512VL-NEXT: retq
4194 ; XOPAVX1-LABEL: shuffle_v32i8_47_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_uu_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30:
4196 ; XOPAVX1-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
4197 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
4198 ; XOPAVX1-NEXT: vpslldq {{.*#+}} xmm0 = zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
4199 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
4200 ; XOPAVX1-NEXT: retq
4202 ; XOPAVX2-LABEL: shuffle_v32i8_47_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_uu_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30:
4204 ; XOPAVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],ymm1[31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30]
4205 ; XOPAVX2-NEXT: retq
4206 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 47, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 undef, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
4207 ret <32 x i8> %shuffle
4210 define <32 x i8> @shuffle_v32i8_uu_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_63_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu(<32 x i8> %a, <32 x i8> %b) {
4211 ; AVX1-LABEL: shuffle_v32i8_uu_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_63_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
4213 ; AVX1-NEXT: vpslldq {{.*#+}} xmm0 = zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
4214 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
4215 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm1 = xmm1[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
4216 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
4219 ; AVX2OR512VL-LABEL: shuffle_v32i8_uu_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_63_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
4220 ; AVX2OR512VL: # %bb.0:
4221 ; AVX2OR512VL-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],ymm1[31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30]
4222 ; AVX2OR512VL-NEXT: retq
4224 ; XOPAVX1-LABEL: shuffle_v32i8_uu_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_63_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
4226 ; XOPAVX1-NEXT: vpslldq {{.*#+}} xmm0 = zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
4227 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
4228 ; XOPAVX1-NEXT: vpsrldq {{.*#+}} xmm1 = xmm1[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
4229 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
4230 ; XOPAVX1-NEXT: retq
4232 ; XOPAVX2-LABEL: shuffle_v32i8_uu_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_63_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
4234 ; XOPAVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],ymm1[31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30]
4235 ; XOPAVX2-NEXT: retq
4236 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 63, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
4237 ret <32 x i8> %shuffle
4240 define <32 x i8> @shuffle_v32i8_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_63_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30(<32 x i8> %a, <32 x i8> %b) {
4241 ; AVX1-LABEL: shuffle_v32i8_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_63_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30:
4243 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
4244 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
4245 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
4246 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
4249 ; AVX2OR512VL-LABEL: shuffle_v32i8_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_63_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30:
4250 ; AVX2OR512VL: # %bb.0:
4251 ; AVX2OR512VL-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],ymm1[31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30]
4252 ; AVX2OR512VL-NEXT: retq
4254 ; XOPAVX1-LABEL: shuffle_v32i8_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_63_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30:
4256 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
4257 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
4258 ; XOPAVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
4259 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
4260 ; XOPAVX1-NEXT: retq
4262 ; XOPAVX2-LABEL: shuffle_v32i8_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_63_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30:
4264 ; XOPAVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],ymm1[31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30]
4265 ; XOPAVX2-NEXT: retq
4266 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 63, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
4267 ret <32 x i8> %shuffle
4270 define <32 x i8> @shuffle_v32i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_32_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_48(<32 x i8> %a, <32 x i8> %b) {
4271 ; AVX1-LABEL: shuffle_v32i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_32_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_48:
4273 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
4274 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
4275 ; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm3[0]
4276 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0]
4277 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
4280 ; AVX2OR512VL-LABEL: shuffle_v32i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_32_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_48:
4281 ; AVX2OR512VL: # %bb.0:
4282 ; AVX2OR512VL-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm1[0],ymm0[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm1[16]
4283 ; AVX2OR512VL-NEXT: retq
4285 ; XOPAVX1-LABEL: shuffle_v32i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_32_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_48:
4287 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
4288 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
4289 ; XOPAVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm3[0]
4290 ; XOPAVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0]
4291 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
4292 ; XOPAVX1-NEXT: retq
4294 ; XOPAVX2-LABEL: shuffle_v32i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_32_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_48:
4296 ; XOPAVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm1[0],ymm0[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm1[16]
4297 ; XOPAVX2-NEXT: retq
4298 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 32, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 48>
4299 ret <32 x i8> %shuffle
4302 define <32 x i8> @shuffle_v32i8_33_34_35_36_37_38_39_40_41_42_43_44_45_46_47_00_49_50_51_52_53_54_55_56_57_58_59_60_61_62_63_16(<32 x i8> %a, <32 x i8> %b) {
4303 ; AVX1-LABEL: shuffle_v32i8_33_34_35_36_37_38_39_40_41_42_43_44_45_46_47_00_49_50_51_52_53_54_55_56_57_58_59_60_61_62_63_16:
4305 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
4306 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
4307 ; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm3[0]
4308 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
4309 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
4312 ; AVX2OR512VL-LABEL: shuffle_v32i8_33_34_35_36_37_38_39_40_41_42_43_44_45_46_47_00_49_50_51_52_53_54_55_56_57_58_59_60_61_62_63_16:
4313 ; AVX2OR512VL: # %bb.0:
4314 ; AVX2OR512VL-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],ymm1[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16]
4315 ; AVX2OR512VL-NEXT: retq
4317 ; XOPAVX1-LABEL: shuffle_v32i8_33_34_35_36_37_38_39_40_41_42_43_44_45_46_47_00_49_50_51_52_53_54_55_56_57_58_59_60_61_62_63_16:
4319 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
4320 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
4321 ; XOPAVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm3[0]
4322 ; XOPAVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
4323 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
4324 ; XOPAVX1-NEXT: retq
4326 ; XOPAVX2-LABEL: shuffle_v32i8_33_34_35_36_37_38_39_40_41_42_43_44_45_46_47_00_49_50_51_52_53_54_55_56_57_58_59_60_61_62_63_16:
4328 ; XOPAVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],ymm1[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16]
4329 ; XOPAVX2-NEXT: retq
4330 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 00, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63, i32 16>
4331 ret <32 x i8> %shuffle
4334 define <32 x i8> @shuffle_v32i8_15_32_33_34_35_36_37_38_39_40_41_42_43_44_45_46_31_48_49_50_51_52_53_54_55_56_57_58_59_60_61_62(<32 x i8> %a, <32 x i8> %b) {
4335 ; AVX1-LABEL: shuffle_v32i8_15_32_33_34_35_36_37_38_39_40_41_42_43_44_45_46_31_48_49_50_51_52_53_54_55_56_57_58_59_60_61_62:
4337 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
4338 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
4339 ; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[15],xmm3[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
4340 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
4341 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
4344 ; AVX2OR512VL-LABEL: shuffle_v32i8_15_32_33_34_35_36_37_38_39_40_41_42_43_44_45_46_31_48_49_50_51_52_53_54_55_56_57_58_59_60_61_62:
4345 ; AVX2OR512VL: # %bb.0:
4346 ; AVX2OR512VL-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[15],ymm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],ymm0[31],ymm1[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30]
4347 ; AVX2OR512VL-NEXT: retq
4349 ; XOPAVX1-LABEL: shuffle_v32i8_15_32_33_34_35_36_37_38_39_40_41_42_43_44_45_46_31_48_49_50_51_52_53_54_55_56_57_58_59_60_61_62:
4351 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
4352 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
4353 ; XOPAVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[15],xmm3[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
4354 ; XOPAVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
4355 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
4356 ; XOPAVX1-NEXT: retq
4358 ; XOPAVX2-LABEL: shuffle_v32i8_15_32_33_34_35_36_37_38_39_40_41_42_43_44_45_46_31_48_49_50_51_52_53_54_55_56_57_58_59_60_61_62:
4360 ; XOPAVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[15],ymm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],ymm0[31],ymm1[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30]
4361 ; XOPAVX2-NEXT: retq
4362 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 15, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 31, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62>
4363 ret <32 x i8> %shuffle
4366 define <32 x i8> @shuffle_v32i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_16(<32 x i8> %a, <32 x i8> %b) {
4367 ; AVX1-LABEL: shuffle_v32i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_16:
4369 ; AVX1-NEXT: vpalignr {{.*#+}} xmm1 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0]
4370 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
4371 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0]
4372 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
4375 ; AVX2OR512VL-LABEL: shuffle_v32i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_16:
4376 ; AVX2OR512VL: # %bb.0:
4377 ; AVX2OR512VL-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,16]
4378 ; AVX2OR512VL-NEXT: retq
4380 ; XOPAVX1-LABEL: shuffle_v32i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_16:
4382 ; XOPAVX1-NEXT: vpalignr {{.*#+}} xmm1 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0]
4383 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
4384 ; XOPAVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0]
4385 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
4386 ; XOPAVX1-NEXT: retq
4388 ; XOPAVX2-LABEL: shuffle_v32i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_16:
4390 ; XOPAVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,16]
4391 ; XOPAVX2-NEXT: retq
4392 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16>
4393 ret <32 x i8> %shuffle
4396 define <32 x i8> @shuffle_v32i8_15_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_31_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30(<32 x i8> %a, <32 x i8> %b) {
4397 ; AVX1-LABEL: shuffle_v32i8_15_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_31_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30:
4399 ; AVX1-NEXT: vpalignr {{.*#+}} xmm1 = xmm0[15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
4400 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
4401 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
4402 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
4405 ; AVX2OR512VL-LABEL: shuffle_v32i8_15_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_31_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30:
4406 ; AVX2OR512VL: # %bb.0:
4407 ; AVX2OR512VL-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,31,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30]
4408 ; AVX2OR512VL-NEXT: retq
4410 ; XOPAVX1-LABEL: shuffle_v32i8_15_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_31_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30:
4412 ; XOPAVX1-NEXT: vpalignr {{.*#+}} xmm1 = xmm0[15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
4413 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
4414 ; XOPAVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
4415 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
4416 ; XOPAVX1-NEXT: retq
4418 ; XOPAVX2-LABEL: shuffle_v32i8_15_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_31_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30:
4420 ; XOPAVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,31,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30]
4421 ; XOPAVX2-NEXT: retq
4422 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
4423 ret <32 x i8> %shuffle
4427 define <32 x i8> @shuffle_v32i8_00_01_16_17_02_03_18_19_04_05_20_21_06_07_22_23_08_09_24_25_10_11_26_27_12_13_28_29_14_15_30_31(<32 x i8> %a, <32 x i8> %b) {
4428 ; AVX1-LABEL: shuffle_v32i8_00_01_16_17_02_03_18_19_04_05_20_21_06_07_22_23_08_09_24_25_10_11_26_27_12_13_28_29_14_15_30_31:
4430 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
4431 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
4432 ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
4433 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
4436 ; AVX2-LABEL: shuffle_v32i8_00_01_16_17_02_03_18_19_04_05_20_21_06_07_22_23_08_09_24_25_10_11_26_27_12_13_28_29_14_15_30_31:
4438 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
4439 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,8,9,2,3,10,11,4,5,12,13,6,7,14,15,16,17,24,25,18,19,26,27,20,21,28,29,22,23,30,31]
4442 ; AVX512VL-LABEL: shuffle_v32i8_00_01_16_17_02_03_18_19_04_05_20_21_06_07_22_23_08_09_24_25_10_11_26_27_12_13_28_29_14_15_30_31:
4443 ; AVX512VL: # %bb.0:
4444 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [0,8,1,9,2,10,3,11,4,12,5,13,6,14,7,15]
4445 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
4446 ; AVX512VL-NEXT: retq
4448 ; XOPAVX1-LABEL: shuffle_v32i8_00_01_16_17_02_03_18_19_04_05_20_21_06_07_22_23_08_09_24_25_10_11_26_27_12_13_28_29_14_15_30_31:
4450 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
4451 ; XOPAVX1-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
4452 ; XOPAVX1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
4453 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
4454 ; XOPAVX1-NEXT: retq
4456 ; XOPAVX2-LABEL: shuffle_v32i8_00_01_16_17_02_03_18_19_04_05_20_21_06_07_22_23_08_09_24_25_10_11_26_27_12_13_28_29_14_15_30_31:
4458 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
4459 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,8,9,2,3,10,11,4,5,12,13,6,7,14,15,16,17,24,25,18,19,26,27,20,21,28,29,22,23,30,31]
4460 ; XOPAVX2-NEXT: retq
4461 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 1, i32 16, i32 17, i32 2, i32 3, i32 18, i32 19, i32 4, i32 5, i32 20, i32 21, i32 6, i32 7, i32 22, i32 23, i32 8, i32 9, i32 24, i32 25, i32 10, i32 11, i32 26, i32 27, i32 12, i32 13, i32 28, i32 29, i32 14, i32 15, i32 30, i32 31>
4462 ret <32 x i8> %shuffle
4465 define <32 x i8> @shuffle_v32i8_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_10_10_10_10_10_10_10_10_10_10_10_10_10_10_10_10(<32 x i8> %a, <32 x i8> %b) {
4466 ; AVX1-LABEL: shuffle_v32i8_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_10_10_10_10_10_10_10_10_10_10_10_10_10_10_10_10:
4468 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10]
4469 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
4472 ; AVX2OR512VL-LABEL: shuffle_v32i8_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_10_10_10_10_10_10_10_10_10_10_10_10_10_10_10_10:
4473 ; AVX2OR512VL: # %bb.0:
4474 ; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10]
4475 ; AVX2OR512VL-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
4476 ; AVX2OR512VL-NEXT: retq
4478 ; XOPAVX1-LABEL: shuffle_v32i8_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_10_10_10_10_10_10_10_10_10_10_10_10_10_10_10_10:
4480 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10]
4481 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
4482 ; XOPAVX1-NEXT: retq
4484 ; XOPAVX2-LABEL: shuffle_v32i8_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_10_10_10_10_10_10_10_10_10_10_10_10_10_10_10_10:
4486 ; XOPAVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10]
4487 ; XOPAVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
4488 ; XOPAVX2-NEXT: retq
4489 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10>
4490 ret <32 x i8> %shuffle
4493 define <32 x i8> @shuffle_v32i8_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16(<32 x i8> %a, <32 x i8> %b) {
4494 ; AVX1-LABEL: shuffle_v32i8_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16:
4496 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
4497 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
4498 ; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0
4499 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
4502 ; AVX2-LABEL: shuffle_v32i8_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16:
4504 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
4505 ; AVX2-NEXT: vpbroadcastb %xmm0, %ymm0
4508 ; AVX512VLBW-LABEL: shuffle_v32i8_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16:
4509 ; AVX512VLBW: # %bb.0:
4510 ; AVX512VLBW-NEXT: vextracti128 $1, %ymm0, %xmm0
4511 ; AVX512VLBW-NEXT: vpbroadcastb %xmm0, %ymm0
4512 ; AVX512VLBW-NEXT: retq
4514 ; AVX512VLVBMI-SLOW-LABEL: shuffle_v32i8_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16:
4515 ; AVX512VLVBMI-SLOW: # %bb.0:
4516 ; AVX512VLVBMI-SLOW-NEXT: vextracti128 $1, %ymm0, %xmm0
4517 ; AVX512VLVBMI-SLOW-NEXT: vpbroadcastb %xmm0, %ymm0
4518 ; AVX512VLVBMI-SLOW-NEXT: retq
4520 ; AVX512VLVBMI-FAST-ALL-LABEL: shuffle_v32i8_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16:
4521 ; AVX512VLVBMI-FAST-ALL: # %bb.0:
4522 ; AVX512VLVBMI-FAST-ALL-NEXT: vpbroadcastb {{.*#+}} ymm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
4523 ; AVX512VLVBMI-FAST-ALL-NEXT: vpermb %ymm0, %ymm1, %ymm0
4524 ; AVX512VLVBMI-FAST-ALL-NEXT: retq
4526 ; AVX512VLVBMI-FAST-PERLANE-LABEL: shuffle_v32i8_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16:
4527 ; AVX512VLVBMI-FAST-PERLANE: # %bb.0:
4528 ; AVX512VLVBMI-FAST-PERLANE-NEXT: vextracti128 $1, %ymm0, %xmm0
4529 ; AVX512VLVBMI-FAST-PERLANE-NEXT: vpbroadcastb %xmm0, %ymm0
4530 ; AVX512VLVBMI-FAST-PERLANE-NEXT: retq
4532 ; XOPAVX1-LABEL: shuffle_v32i8_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16:
4534 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
4535 ; XOPAVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
4536 ; XOPAVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0
4537 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
4538 ; XOPAVX1-NEXT: retq
4540 ; XOPAVX2-LABEL: shuffle_v32i8_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16:
4542 ; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
4543 ; XOPAVX2-NEXT: vpbroadcastb %xmm0, %ymm0
4544 ; XOPAVX2-NEXT: retq
4545 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
4546 ret <32 x i8> %shuffle
4549 define <32 x i8> @shuffle_v32i8_15_15_15_15_15_15_15_15_32_32_32_32_32_32_32_32_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu(<32 x i8> %a, <32 x i8> %b) {
4550 ; AVX1-LABEL: shuffle_v32i8_15_15_15_15_15_15_15_15_32_32_32_32_32_32_32_32_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
4552 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[15,15,15,15,15,15,15,15,u,u,u,u,u,u,u,u]
4553 ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
4554 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
4555 ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
4558 ; AVX2-LABEL: shuffle_v32i8_15_15_15_15_15_15_15_15_32_32_32_32_32_32_32_32_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
4560 ; AVX2-NEXT: vpbroadcastb %xmm1, %xmm1
4561 ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[15,15,15,15,15,15,15,15,u,u,u,u,u,u,u,u]
4562 ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
4565 ; AVX512VLBW-LABEL: shuffle_v32i8_15_15_15_15_15_15_15_15_32_32_32_32_32_32_32_32_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
4566 ; AVX512VLBW: # %bb.0:
4567 ; AVX512VLBW-NEXT: vpbroadcastb %xmm1, %xmm1
4568 ; AVX512VLBW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[15,15,15,15,15,15,15,15,u,u,u,u,u,u,u,u]
4569 ; AVX512VLBW-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
4570 ; AVX512VLBW-NEXT: retq
4572 ; AVX512VLVBMI-LABEL: shuffle_v32i8_15_15_15_15_15_15_15_15_32_32_32_32_32_32_32_32_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
4573 ; AVX512VLVBMI: # %bb.0:
4574 ; AVX512VLVBMI-NEXT: vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,16,16,16,16,16,16,16,16]
4575 ; AVX512VLVBMI-NEXT: vpermt2b %xmm1, %xmm2, %xmm0
4576 ; AVX512VLVBMI-NEXT: retq
4578 ; XOP-LABEL: shuffle_v32i8_15_15_15_15_15_15_15_15_32_32_32_32_32_32_32_32_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
4580 ; XOP-NEXT: vpperm {{.*#+}} xmm0 = xmm0[15,15,15,15,15,15,15,15],xmm1[0,0,0,0,0,0,0,0]
4582 ; AVX-LABEL: shuffle_v32i8_15_15_15_15_15_15_15_15_32_32_32_32_32_32_32_32_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
4584 ; AVX-NEXT: vpperm {{.*#+}} xmm0 = xmm0[15,15,15,15,15,15,15,15],xmm1[0,0,0,0,0,0,0,0]
4586 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
4587 ret <32 x i8> %shuffle
4590 define <32 x i8> @shuffle_v32i8_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu(<32 x i8> %a, <32 x i8> %b) {
4591 ; ALL-LABEL: shuffle_v32i8_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
4593 ; ALL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
4595 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
4596 ret <32 x i8> %shuffle
4599 define <32 x i8> @shuffle_v32i8_22_22_22_22_22_22_22_22_22_22_22_22_22_22_22_22_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu(<32 x i8> %a, <32 x i8> %b) {
4600 ; AVX1-LABEL: shuffle_v32i8_22_22_22_22_22_22_22_22_22_22_22_22_22_22_22_22_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
4602 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
4603 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6]
4606 ; AVX2OR512VL-LABEL: shuffle_v32i8_22_22_22_22_22_22_22_22_22_22_22_22_22_22_22_22_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
4607 ; AVX2OR512VL: # %bb.0:
4608 ; AVX2OR512VL-NEXT: vextracti128 $1, %ymm0, %xmm0
4609 ; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6]
4610 ; AVX2OR512VL-NEXT: retq
4612 ; XOPAVX1-LABEL: shuffle_v32i8_22_22_22_22_22_22_22_22_22_22_22_22_22_22_22_22_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
4614 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
4615 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6]
4616 ; XOPAVX1-NEXT: retq
4618 ; XOPAVX2-LABEL: shuffle_v32i8_22_22_22_22_22_22_22_22_22_22_22_22_22_22_22_22_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
4620 ; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
4621 ; XOPAVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6]
4622 ; XOPAVX2-NEXT: retq
4623 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
4624 ret <32 x i8> %shuffle
4628 define <32 x i8> @shuffle_v32i8_31_32_33_34_35_36_37_38_39_40_41_42_43_44_45_46_47_48_49_50_51_52_53_54_55_56_57_58_59_60_61_62(<32 x i8> %a0, <32 x i8> %a1) {
4629 ; AVX1-LABEL: shuffle_v32i8_31_32_33_34_35_36_37_38_39_40_41_42_43_44_45_46_47_48_49_50_51_52_53_54_55_56_57_58_59_60_61_62:
4631 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
4632 ; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm1[15],xmm2[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
4633 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
4634 ; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
4635 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
4638 ; AVX2-LABEL: shuffle_v32i8_31_32_33_34_35_36_37_38_39_40_41_42_43_44_45_46_47_48_49_50_51_52_53_54_55_56_57_58_59_60_61_62:
4640 ; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
4641 ; AVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[15],ymm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],ymm0[31],ymm1[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30]
4644 ; AVX512VLBW-LABEL: shuffle_v32i8_31_32_33_34_35_36_37_38_39_40_41_42_43_44_45_46_47_48_49_50_51_52_53_54_55_56_57_58_59_60_61_62:
4645 ; AVX512VLBW: # %bb.0:
4646 ; AVX512VLBW-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
4647 ; AVX512VLBW-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[15],ymm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],ymm0[31],ymm1[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30]
4648 ; AVX512VLBW-NEXT: retq
4650 ; AVX512VLVBMI-LABEL: shuffle_v32i8_31_32_33_34_35_36_37_38_39_40_41_42_43_44_45_46_47_48_49_50_51_52_53_54_55_56_57_58_59_60_61_62:
4651 ; AVX512VLVBMI: # %bb.0:
4652 ; AVX512VLVBMI-NEXT: vmovdqa {{.*#+}} ymm2 = [63,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30]
4653 ; AVX512VLVBMI-NEXT: vpermi2b %ymm0, %ymm1, %ymm2
4654 ; AVX512VLVBMI-NEXT: vmovdqa %ymm2, %ymm0
4655 ; AVX512VLVBMI-NEXT: retq
4657 ; XOPAVX1-LABEL: shuffle_v32i8_31_32_33_34_35_36_37_38_39_40_41_42_43_44_45_46_47_48_49_50_51_52_53_54_55_56_57_58_59_60_61_62:
4659 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
4660 ; XOPAVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm1[15],xmm2[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
4661 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
4662 ; XOPAVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
4663 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
4664 ; XOPAVX1-NEXT: retq
4666 ; XOPAVX2-LABEL: shuffle_v32i8_31_32_33_34_35_36_37_38_39_40_41_42_43_44_45_46_47_48_49_50_51_52_53_54_55_56_57_58_59_60_61_62:
4668 ; XOPAVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
4669 ; XOPAVX2-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[15],ymm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],ymm0[31],ymm1[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30]
4670 ; XOPAVX2-NEXT: retq
4671 %shuffle = shufflevector <32 x i8> %a0, <32 x i8> %a1, <32 x i32> <i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62>
4672 ret <32 x i8> %shuffle
4675 define <32 x i8> @shuffle_v32i8_03_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14_19_16_17_18_23_20_21_22_27_24_25_26_31_28_29_30(<32 x i8> %a) {
4676 ; AVX1-LABEL: shuffle_v32i8_03_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14_19_16_17_18_23_20_21_22_27_24_25_26_31_28_29_30:
4678 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
4679 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14]
4680 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
4681 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
4682 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
4685 ; AVX2-LABEL: shuffle_v32i8_03_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14_19_16_17_18_23_20_21_22_27_24_25_26_31_28_29_30:
4687 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14,19,16,17,18,23,20,21,22,27,24,25,26,31,28,29,30]
4690 ; AVX512VL-LABEL: shuffle_v32i8_03_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14_19_16_17_18_23_20_21_22_27_24_25_26_31_28_29_30:
4691 ; AVX512VL: # %bb.0:
4692 ; AVX512VL-NEXT: vprold $8, %ymm0, %ymm0
4693 ; AVX512VL-NEXT: retq
4695 ; XOPAVX1-LABEL: shuffle_v32i8_03_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14_19_16_17_18_23_20_21_22_27_24_25_26_31_28_29_30:
4697 ; XOPAVX1-NEXT: vprotd $8, %xmm0, %xmm1
4698 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
4699 ; XOPAVX1-NEXT: vprotd $8, %xmm0, %xmm0
4700 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
4701 ; XOPAVX1-NEXT: retq
4703 ; XOPAVX2-LABEL: shuffle_v32i8_03_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14_19_16_17_18_23_20_21_22_27_24_25_26_31_28_29_30:
4705 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14,19,16,17,18,23,20,21,22,27,24,25,26,31,28,29,30]
4706 ; XOPAVX2-NEXT: retq
4707 %shuffle = shufflevector <32 x i8> %a, <32 x i8> undef, <32 x i32> <i32 3, i32 0, i32 1, i32 2, i32 7, i32 4, i32 5, i32 6, i32 11, i32 8, i32 9, i32 10, i32 15, i32 12, i32 13, i32 14, i32 19, i32 16, i32 17, i32 18, i32 23, i32 20, i32 21, i32 22, i32 27, i32 24, i32 25, i32 26, i32 31, i32 28, i32 29, i32 30>
4708 ret <32 x i8> %shuffle
4712 define <32 x i8> @shuffle_v32i8_02_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09_18_19_20_21_22_23_16_17_26_27_28_29_30_31_24_25(<32 x i8> %a) {
4713 ; AVX1-LABEL: shuffle_v32i8_02_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09_18_19_20_21_22_23_16_17_26_27_28_29_30_31_24_25:
4715 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[1,2,3,0,4,5,6,7]
4716 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,5,6,7,4]
4717 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
4718 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,2,3,0,4,5,6,7]
4719 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,6,7,4]
4720 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
4723 ; AVX2-SLOW-LABEL: shuffle_v32i8_02_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09_18_19_20_21_22_23_16_17_26_27_28_29_30_31_24_25:
4724 ; AVX2-SLOW: # %bb.0:
4725 ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[1,2,3,0,4,5,6,7,9,10,11,8,12,13,14,15]
4726 ; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,5,6,7,4,8,9,10,11,13,14,15,12]
4727 ; AVX2-SLOW-NEXT: retq
4729 ; AVX2-FAST-LABEL: shuffle_v32i8_02_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09_18_19_20_21_22_23_16_17_26_27_28_29_30_31_24_25:
4730 ; AVX2-FAST: # %bb.0:
4731 ; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[2,3,4,5,6,7,0,1,10,11,12,13,14,15,8,9,18,19,20,21,22,23,16,17,26,27,28,29,30,31,24,25]
4732 ; AVX2-FAST-NEXT: retq
4734 ; AVX512VL-LABEL: shuffle_v32i8_02_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09_18_19_20_21_22_23_16_17_26_27_28_29_30_31_24_25:
4735 ; AVX512VL: # %bb.0:
4736 ; AVX512VL-NEXT: vprolq $48, %ymm0, %ymm0
4737 ; AVX512VL-NEXT: retq
4739 ; XOPAVX1-LABEL: shuffle_v32i8_02_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09_18_19_20_21_22_23_16_17_26_27_28_29_30_31_24_25:
4741 ; XOPAVX1-NEXT: vprotq $48, %xmm0, %xmm1
4742 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
4743 ; XOPAVX1-NEXT: vprotq $48, %xmm0, %xmm0
4744 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
4745 ; XOPAVX1-NEXT: retq
4747 ; XOPAVX2-LABEL: shuffle_v32i8_02_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09_18_19_20_21_22_23_16_17_26_27_28_29_30_31_24_25:
4749 ; XOPAVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[1,2,3,0,4,5,6,7,9,10,11,8,12,13,14,15]
4750 ; XOPAVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,5,6,7,4,8,9,10,11,13,14,15,12]
4751 ; XOPAVX2-NEXT: retq
4752 %shuffle = shufflevector <32 x i8> %a, <32 x i8> undef, <32 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 16, i32 17, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 24, i32 25>
4753 ret <32 x i8> %shuffle
4757 define <32 x i8> @shuffle_v32i8_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31(<32 x i8> %a) {
4758 ; AVX1-LABEL: shuffle_v32i8_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31:
4760 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
4761 ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,7,7,7]
4762 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,2,2]
4763 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
4766 ; AVX2-SLOW-LABEL: shuffle_v32i8_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31:
4767 ; AVX2-SLOW: # %bb.0:
4768 ; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,7,7,7,8,9,10,11,15,15,15,15]
4769 ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,2,2,2,6,6,6,6]
4770 ; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,2,2,2]
4771 ; AVX2-SLOW-NEXT: retq
4773 ; AVX2-FAST-ALL-LABEL: shuffle_v32i8_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31:
4774 ; AVX2-FAST-ALL: # %bb.0:
4775 ; AVX2-FAST-ALL-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,7,7,7,8,9,10,11,15,15,15,15]
4776 ; AVX2-FAST-ALL-NEXT: vpbroadcastd {{.*#+}} ymm1 = [6,6,6,6,6,6,6,6]
4777 ; AVX2-FAST-ALL-NEXT: vpermd %ymm0, %ymm1, %ymm0
4778 ; AVX2-FAST-ALL-NEXT: retq
4780 ; AVX2-FAST-PERLANE-LABEL: shuffle_v32i8_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31:
4781 ; AVX2-FAST-PERLANE: # %bb.0:
4782 ; AVX2-FAST-PERLANE-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,30,31,30,31,30,31,30,31,u,u,u,u,u,u,u,u]
4783 ; AVX2-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,2,2,2]
4784 ; AVX2-FAST-PERLANE-NEXT: retq
4786 ; AVX512VL-LABEL: shuffle_v32i8_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31:
4787 ; AVX512VL: # %bb.0:
4788 ; AVX512VL-NEXT: vpbroadcastw {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
4789 ; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
4790 ; AVX512VL-NEXT: retq
4792 ; XOPAVX1-LABEL: shuffle_v32i8_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31:
4794 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
4795 ; XOPAVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,7,7,7]
4796 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,2,2]
4797 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
4798 ; XOPAVX1-NEXT: retq
4800 ; XOPAVX2-LABEL: shuffle_v32i8_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31:
4802 ; XOPAVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,7,7,7,8,9,10,11,15,15,15,15]
4803 ; XOPAVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,2,2,2,6,6,6,6]
4804 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,2,2,2]
4805 ; XOPAVX2-NEXT: retq
4806 %shuffle = shufflevector <32 x i8> %a, <32 x i8> undef, <32 x i32> <i32 30, i32 31, i32 30, i32 31, i32 30, i32 31, i32 30, i32 31, i32 30, i32 31, i32 30, i32 31, i32 30, i32 31, i32 30, i32 31, i32 30, i32 31, i32 30, i32 31, i32 30, i32 31, i32 30, i32 31, i32 30, i32 31, i32 30, i32 31, i32 30, i32 31, i32 30, i32 31>
4807 ret <32 x i8> %shuffle
4810 define <32 x i8> @shuffle_v32i8_00_02_04_06_08_10_12_14_32_34_36_38_40_42_44_46_16_18_20_22_24_26_28_30_48_50_52_54_56_58_60_62(<32 x i8> %a0, <32 x i8> %a1) {
4811 ; AVX1-LABEL: shuffle_v32i8_00_02_04_06_08_10_12_14_32_34_36_38_40_42_44_46_16_18_20_22_24_26_28_30_48_50_52_54_56_58_60_62:
4813 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
4814 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
4815 ; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2
4816 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
4817 ; AVX1-NEXT: vpand %xmm3, %xmm4, %xmm4
4818 ; AVX1-NEXT: vpackuswb %xmm2, %xmm4, %xmm2
4819 ; AVX1-NEXT: vpand %xmm3, %xmm1, %xmm1
4820 ; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm0
4821 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
4822 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
4825 ; AVX2-LABEL: shuffle_v32i8_00_02_04_06_08_10_12_14_32_34_36_38_40_42_44_46_16_18_20_22_24_26_28_30_48_50_52_54_56_58_60_62:
4827 ; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,u,u,u,u,0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u,16,18,20,22,24,26,28,30]
4828 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u,16,18,20,22,24,26,28,30,u,u,u,u,u,u,u,u]
4829 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
4832 ; AVX512VLBW-LABEL: shuffle_v32i8_00_02_04_06_08_10_12_14_32_34_36_38_40_42_44_46_16_18_20_22_24_26_28_30_48_50_52_54_56_58_60_62:
4833 ; AVX512VLBW: # %bb.0:
4834 ; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,u,u,u,u,0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u,16,18,20,22,24,26,28,30]
4835 ; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u,16,18,20,22,24,26,28,30,u,u,u,u,u,u,u,u]
4836 ; AVX512VLBW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
4837 ; AVX512VLBW-NEXT: retq
4839 ; AVX512VLVBMI-LABEL: shuffle_v32i8_00_02_04_06_08_10_12_14_32_34_36_38_40_42_44_46_16_18_20_22_24_26_28_30_48_50_52_54_56_58_60_62:
4840 ; AVX512VLVBMI: # %bb.0:
4841 ; AVX512VLVBMI-NEXT: vmovdqa {{.*#+}} ymm2 = [0,2,4,6,8,10,12,14,32,34,36,38,40,42,44,46,16,18,20,22,24,26,28,30,48,50,52,54,56,58,60,62]
4842 ; AVX512VLVBMI-NEXT: vpermt2b %ymm1, %ymm2, %ymm0
4843 ; AVX512VLVBMI-NEXT: retq
4845 ; XOPAVX1-LABEL: shuffle_v32i8_00_02_04_06_08_10_12_14_32_34_36_38_40_42_44_46_16_18_20_22_24_26_28_30_48_50_52_54_56_58_60_62:
4847 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
4848 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
4849 ; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30]
4850 ; XOPAVX1-NEXT: vpperm %xmm4, %xmm2, %xmm3, %xmm2
4851 ; XOPAVX1-NEXT: vpperm %xmm4, %xmm1, %xmm0, %xmm0
4852 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
4853 ; XOPAVX1-NEXT: retq
4855 ; XOPAVX2-LABEL: shuffle_v32i8_00_02_04_06_08_10_12_14_32_34_36_38_40_42_44_46_16_18_20_22_24_26_28_30_48_50_52_54_56_58_60_62:
4857 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,u,u,u,u,0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u,16,18,20,22,24,26,28,30]
4858 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u,16,18,20,22,24,26,28,30,u,u,u,u,u,u,u,u]
4859 ; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
4860 ; XOPAVX2-NEXT: retq
4861 %1 = shufflevector <32 x i8> %a0, <32 x i8> %a1, <32 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 32, i32 34, i32 36, i32 38, i32 40, i32 42, i32 44, i32 46, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30, i32 48, i32 50, i32 52, i32 54, i32 56, i32 58, i32 60, i32 62>
4865 define <32 x i8> @shuffle_v32i8_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30_32_34_36_38_40_42_44_46_48_50_52_54_56_58_60_62(<32 x i8> %a0, <32 x i8> %a1) {
4866 ; AVX1-LABEL: shuffle_v32i8_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30_32_34_36_38_40_42_44_46_48_50_52_54_56_58_60_62:
4868 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
4869 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
4870 ; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2
4871 ; AVX1-NEXT: vpand %xmm3, %xmm1, %xmm1
4872 ; AVX1-NEXT: vpackuswb %xmm2, %xmm1, %xmm1
4873 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
4874 ; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2
4875 ; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm0
4876 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
4877 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
4880 ; AVX2-LABEL: shuffle_v32i8_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30_32_34_36_38_40_42_44_46_48_50_52_54_56_58_60_62:
4882 ; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,u,u,u,u,0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u,16,18,20,22,24,26,28,30]
4883 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u,16,18,20,22,24,26,28,30,u,u,u,u,u,u,u,u]
4884 ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
4885 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
4888 ; AVX512VL-LABEL: shuffle_v32i8_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30_32_34_36_38_40_42_44_46_48_50_52_54_56_58_60_62:
4889 ; AVX512VL: # %bb.0:
4890 ; AVX512VL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
4891 ; AVX512VL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
4892 ; AVX512VL-NEXT: vpmovwb %zmm0, %ymm0
4893 ; AVX512VL-NEXT: retq
4895 ; XOPAVX1-LABEL: shuffle_v32i8_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30_32_34_36_38_40_42_44_46_48_50_52_54_56_58_60_62:
4897 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
4898 ; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30]
4899 ; XOPAVX1-NEXT: vpperm %xmm3, %xmm2, %xmm1, %xmm1
4900 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
4901 ; XOPAVX1-NEXT: vpperm %xmm3, %xmm2, %xmm0, %xmm0
4902 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
4903 ; XOPAVX1-NEXT: retq
4905 ; XOPAVX2-LABEL: shuffle_v32i8_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30_32_34_36_38_40_42_44_46_48_50_52_54_56_58_60_62:
4907 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,u,u,u,u,0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u,16,18,20,22,24,26,28,30]
4908 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u,16,18,20,22,24,26,28,30,u,u,u,u,u,u,u,u]
4909 ; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
4910 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
4911 ; XOPAVX2-NEXT: retq
4912 %1 = shufflevector <32 x i8> %a0, <32 x i8> %a1, <32 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30, i32 32, i32 34, i32 36, i32 38, i32 40, i32 42, i32 44, i32 46, i32 48, i32 50, i32 52, i32 54, i32 56, i32 58, i32 60, i32 62>
4916 define <32 x i8> @shuffle_v32i8_shift_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30_32_34_36_38_40_42_44_46_48_50_52_54_56_58_60_62(<16 x i16> %a0, <16 x i16> %a1) {
4917 ; AVX1-LABEL: shuffle_v32i8_shift_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30_32_34_36_38_40_42_44_46_48_50_52_54_56_58_60_62:
4919 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
4920 ; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2
4921 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0
4922 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
4923 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
4924 ; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2
4925 ; AVX1-NEXT: vpsrlw $8, %xmm1, %xmm1
4926 ; AVX1-NEXT: vpackuswb %xmm2, %xmm1, %xmm1
4927 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
4930 ; AVX2-LABEL: shuffle_v32i8_shift_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30_32_34_36_38_40_42_44_46_48_50_52_54_56_58_60_62:
4932 ; AVX2-NEXT: vpsrlw $8, %ymm0, %ymm0
4933 ; AVX2-NEXT: vpsrlw $8, %ymm1, %ymm1
4934 ; AVX2-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
4935 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
4938 ; AVX512VL-LABEL: shuffle_v32i8_shift_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30_32_34_36_38_40_42_44_46_48_50_52_54_56_58_60_62:
4939 ; AVX512VL: # %bb.0:
4940 ; AVX512VL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
4941 ; AVX512VL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
4942 ; AVX512VL-NEXT: vpsrlw $8, %zmm0, %zmm0
4943 ; AVX512VL-NEXT: vpmovwb %zmm0, %ymm0
4944 ; AVX512VL-NEXT: retq
4946 ; XOPAVX1-LABEL: shuffle_v32i8_shift_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30_32_34_36_38_40_42_44_46_48_50_52_54_56_58_60_62:
4948 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
4949 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
4950 ; XOPAVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [1,3,5,7,9,11,13,15,17,19,21,23,25,27,29,31]
4951 ; XOPAVX1-NEXT: vpperm %xmm4, %xmm3, %xmm1, %xmm1
4952 ; XOPAVX1-NEXT: vpperm %xmm4, %xmm2, %xmm0, %xmm0
4953 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
4954 ; XOPAVX1-NEXT: retq
4956 ; XOPAVX2-LABEL: shuffle_v32i8_shift_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30_32_34_36_38_40_42_44_46_48_50_52_54_56_58_60_62:
4958 ; XOPAVX2-NEXT: vpsrlw $8, %ymm0, %ymm0
4959 ; XOPAVX2-NEXT: vpsrlw $8, %ymm1, %ymm1
4960 ; XOPAVX2-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
4961 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
4962 ; XOPAVX2-NEXT: retq
4963 %1 = lshr <16 x i16> %a0, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
4964 %2 = lshr <16 x i16> %a1, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
4965 %3 = bitcast <16 x i16> %1 to <32 x i8>
4966 %4 = bitcast <16 x i16> %2 to <32 x i8>
4967 %5 = shufflevector <32 x i8> %3, <32 x i8> %4, <32 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30, i32 32, i32 34, i32 36, i32 38, i32 40, i32 42, i32 44, i32 46, i32 48, i32 50, i32 52, i32 54, i32 56, i32 58, i32 60, i32 62>
4971 define <4 x i64> @PR28136(<32 x i8> %a0, <32 x i8> %a1) {
4972 ; AVX1-LABEL: PR28136:
4974 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
4975 ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
4976 ; AVX1-NEXT: vpshufb {{.*#+}} xmm2 = zero,xmm1[8],zero,xmm1[10],zero,xmm1[12],zero,xmm1[14],zero,xmm1[9],zero,xmm1[11],zero,xmm1[13],zero,xmm1[15]
4977 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
4978 ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
4979 ; AVX1-NEXT: vpshufb {{.*#+}} xmm3 = xmm0[8],zero,xmm0[10],zero,xmm0[12],zero,xmm0[14],zero,xmm0[9],zero,xmm0[11],zero,xmm0[13],zero,xmm0[15],zero
4980 ; AVX1-NEXT: vpor %xmm2, %xmm3, %xmm2
4981 ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = zero,xmm1[0],zero,xmm1[2],zero,xmm1[4],zero,xmm1[6],zero,xmm1[1],zero,xmm1[3],zero,xmm1[5],zero,xmm1[7]
4982 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0],zero,xmm0[2],zero,xmm0[4],zero,xmm0[6],zero,xmm0[1],zero,xmm0[3],zero,xmm0[5],zero,xmm0[7],zero
4983 ; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
4984 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
4987 ; AVX2OR512VL-LABEL: PR28136:
4988 ; AVX2OR512VL: # %bb.0:
4989 ; AVX2OR512VL-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
4990 ; AVX2OR512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
4991 ; AVX2OR512VL-NEXT: retq
4993 ; XOPAVX1-LABEL: PR28136:
4995 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
4996 ; XOPAVX1-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
4997 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
4998 ; XOPAVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
4999 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm2 = xmm0[8],xmm1[8],xmm0[10],xmm1[10],xmm0[12],xmm1[12],xmm0[14],xmm1[14],xmm0[9],xmm1[9],xmm0[11],xmm1[11],xmm0[13],xmm1[13],xmm0[15],xmm1[15]
5000 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2],xmm1[2],xmm0[4],xmm1[4],xmm0[6],xmm1[6],xmm0[1],xmm1[1],xmm0[3],xmm1[3],xmm0[5],xmm1[5],xmm0[7],xmm1[7]
5001 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
5002 ; XOPAVX1-NEXT: retq
5004 ; XOPAVX2-LABEL: PR28136:
5006 ; XOPAVX2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
5007 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
5008 ; XOPAVX2-NEXT: retq
5009 %1 = shufflevector <32 x i8> %a0, <32 x i8> %a1, <32 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 16, i32 48, i32 17, i32 49, i32 18, i32 50,i32 19, i32 51, i32 20, i32 52, i32 21, i32 53, i32 22, i32 54, i32 23, i32 55>
5010 %2 = bitcast <32 x i8> %1 to <4 x i64>
5011 %3 = shufflevector <4 x i64> %2, <4 x i64> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
5015 define <32 x i8> @PR47262(<4 x i64> %a0) {
5016 ; AVX1-LABEL: PR47262:
5018 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
5019 ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
5020 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,8,1,9,2,10,3,11,4,12,5,13,6,14,7,15]
5021 ; AVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2
5022 ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
5023 ; AVX1-NEXT: vpshufb %xmm3, %xmm0, %xmm0
5024 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
5027 ; AVX2-LABEL: PR47262:
5029 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
5030 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12,1,5,9,13,2,6,10,14,3,7,11,15,16,20,24,28,17,21,25,29,18,22,26,30,19,23,27,31]
5033 ; AVX512VLBW-LABEL: PR47262:
5034 ; AVX512VLBW: # %bb.0:
5035 ; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
5036 ; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12,1,5,9,13,2,6,10,14,3,7,11,15,16,20,24,28,17,21,25,29,18,22,26,30,19,23,27,31]
5037 ; AVX512VLBW-NEXT: retq
5039 ; AVX512VLVBMI-LABEL: PR47262:
5040 ; AVX512VLVBMI: # %bb.0:
5041 ; AVX512VLVBMI-NEXT: vmovdqa {{.*#+}} ymm1 = [0,4,16,20,1,5,17,21,2,6,18,22,3,7,19,23,8,12,24,28,9,13,25,29,10,14,26,30,11,15,27,31]
5042 ; AVX512VLVBMI-NEXT: vpermb %ymm0, %ymm1, %ymm0
5043 ; AVX512VLVBMI-NEXT: retq
5045 ; XOPAVX1-LABEL: PR47262:
5047 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
5048 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm2 = xmm0[8,12],xmm1[8,12],xmm0[9,13],xmm1[9,13],xmm0[10,14],xmm1[10,14],xmm0[11,15],xmm1[11,15]
5049 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[0,4],xmm1[0,4],xmm0[1,5],xmm1[1,5],xmm0[2,6],xmm1[2,6],xmm0[3,7],xmm1[3,7]
5050 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
5051 ; XOPAVX1-NEXT: retq
5053 ; XOPAVX2-LABEL: PR47262:
5055 ; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
5056 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12,1,5,9,13,2,6,10,14,3,7,11,15,16,20,24,28,17,21,25,29,18,22,26,30,19,23,27,31]
5057 ; XOPAVX2-NEXT: retq
5058 %t1 = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
5059 %t2 = bitcast <4 x i64> %t1 to <32 x i8>
5060 %t3 = shufflevector <32 x i8> %t2, <32 x i8> undef, <32 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15, i32 16, i32 20, i32 24, i32 28, i32 17, i32 21, i32 25, i32 29, i32 18, i32 22, i32 26, i32 30, i32 19, i32 23, i32 27, i32 31>
5064 define <32 x i8> @PR55066(<32 x i8> %a0) {
5065 ; AVX1-LABEL: PR55066:
5067 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
5068 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [0,4,8,12,0,4,8,12,0,4,8,12,0,4,8,12]
5069 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
5070 ; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
5071 ; AVX1-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
5072 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
5075 ; AVX2-LABEL: PR55066:
5077 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u,16,20,24,28,u,u,u,u,u,u,u,u,u,u,u,u]
5078 ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [0,4,0,4,0,4,0,4]
5079 ; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
5082 ; AVX512VLBW-LABEL: PR55066:
5083 ; AVX512VLBW: # %bb.0:
5084 ; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u,16,20,24,28,u,u,u,u,u,u,u,u,u,u,u,u]
5085 ; AVX512VLBW-NEXT: vpbroadcastq {{.*#+}} ymm1 = [0,4,0,4,0,4,0,4]
5086 ; AVX512VLBW-NEXT: vpermd %ymm0, %ymm1, %ymm0
5087 ; AVX512VLBW-NEXT: retq
5089 ; AVX512VLVBMI-LABEL: PR55066:
5090 ; AVX512VLVBMI: # %bb.0:
5091 ; AVX512VLVBMI-NEXT: vpbroadcastq {{.*#+}} ymm1 = [0,4,8,12,16,20,24,28,0,4,8,12,16,20,24,28,0,4,8,12,16,20,24,28,0,4,8,12,16,20,24,28]
5092 ; AVX512VLVBMI-NEXT: vpermb %ymm0, %ymm1, %ymm0
5093 ; AVX512VLVBMI-NEXT: retq
5095 ; XOPAVX1-LABEL: PR55066:
5097 ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
5098 ; XOPAVX1-NEXT: vpperm {{.*#+}} xmm0 = xmm0[0,4,8,12],xmm1[0,4,8,12],xmm0[0,4,8,12],xmm1[0,4,8,12]
5099 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
5100 ; XOPAVX1-NEXT: retq
5102 ; XOPAVX2-LABEL: PR55066:
5104 ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u,16,20,24,28,u,u,u,u,u,u,u,u,u,u,u,u]
5105 ; XOPAVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [0,4,0,4,0,4,0,4]
5106 ; XOPAVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
5107 ; XOPAVX2-NEXT: retq
5108 %shuffle = shufflevector <32 x i8> %a0, <32 x i8> poison, <32 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28, i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28, i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28, i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28>
5109 ret <32 x i8> %shuffle
5112 define <32 x i8> @insert_dup_mem_v32i8_i32(ptr %ptr) {
5113 ; AVX1-LABEL: insert_dup_mem_v32i8_i32:
5115 ; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
5116 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
5117 ; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0
5118 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
5121 ; AVX2OR512VL-LABEL: insert_dup_mem_v32i8_i32:
5122 ; AVX2OR512VL: # %bb.0:
5123 ; AVX2OR512VL-NEXT: vpbroadcastb (%rdi), %ymm0
5124 ; AVX2OR512VL-NEXT: retq
5126 ; XOPAVX1-LABEL: insert_dup_mem_v32i8_i32:
5128 ; XOPAVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
5129 ; XOPAVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
5130 ; XOPAVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0
5131 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
5132 ; XOPAVX1-NEXT: retq
5134 ; XOPAVX2-LABEL: insert_dup_mem_v32i8_i32:
5136 ; XOPAVX2-NEXT: vpbroadcastb (%rdi), %ymm0
5137 ; XOPAVX2-NEXT: retq
5138 %tmp = load i32, ptr %ptr, align 4
5139 %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
5140 %tmp2 = bitcast <4 x i32> %tmp1 to <16 x i8>
5141 %tmp3 = shufflevector <16 x i8> %tmp2, <16 x i8> undef, <32 x i32> zeroinitializer
5145 define <32 x i8> @insert_dup_mem_v32i8_sext_i8(ptr %ptr) {
5146 ; AVX1-LABEL: insert_dup_mem_v32i8_sext_i8:
5148 ; AVX1-NEXT: movzbl (%rdi), %eax
5149 ; AVX1-NEXT: vmovd %eax, %xmm0
5150 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
5151 ; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0
5152 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
5155 ; AVX2OR512VL-LABEL: insert_dup_mem_v32i8_sext_i8:
5156 ; AVX2OR512VL: # %bb.0:
5157 ; AVX2OR512VL-NEXT: vpbroadcastb (%rdi), %ymm0
5158 ; AVX2OR512VL-NEXT: retq
5160 ; XOPAVX1-LABEL: insert_dup_mem_v32i8_sext_i8:
5162 ; XOPAVX1-NEXT: movzbl (%rdi), %eax
5163 ; XOPAVX1-NEXT: vmovd %eax, %xmm0
5164 ; XOPAVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
5165 ; XOPAVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0
5166 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
5167 ; XOPAVX1-NEXT: retq
5169 ; XOPAVX2-LABEL: insert_dup_mem_v32i8_sext_i8:
5171 ; XOPAVX2-NEXT: vpbroadcastb (%rdi), %ymm0
5172 ; XOPAVX2-NEXT: retq
5173 %tmp = load i8, ptr %ptr, align 1
5174 %tmp1 = sext i8 %tmp to i32
5175 %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0
5176 %tmp3 = bitcast <4 x i32> %tmp2 to <16 x i8>
5177 %tmp4 = shufflevector <16 x i8> %tmp3, <16 x i8> undef, <32 x i32> zeroinitializer
5181 define <32 x i8> @insert_dup_elt1_mem_v32i8_i32(ptr %ptr) {
5182 ; AVX1-LABEL: insert_dup_elt1_mem_v32i8_i32:
5184 ; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
5185 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
5186 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
5189 ; AVX2OR512VL-LABEL: insert_dup_elt1_mem_v32i8_i32:
5190 ; AVX2OR512VL: # %bb.0:
5191 ; AVX2OR512VL-NEXT: vpbroadcastb 1(%rdi), %ymm0
5192 ; AVX2OR512VL-NEXT: retq
5194 ; XOPAVX1-LABEL: insert_dup_elt1_mem_v32i8_i32:
5196 ; XOPAVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
5197 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
5198 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
5199 ; XOPAVX1-NEXT: retq
5201 ; XOPAVX2-LABEL: insert_dup_elt1_mem_v32i8_i32:
5203 ; XOPAVX2-NEXT: vpbroadcastb 1(%rdi), %ymm0
5204 ; XOPAVX2-NEXT: retq
5205 %tmp = load i32, ptr %ptr, align 4
5206 %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
5207 %tmp2 = bitcast <4 x i32> %tmp1 to <16 x i8>
5208 %tmp3 = shufflevector <16 x i8> %tmp2, <16 x i8> undef, <32 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
5212 define <32 x i8> @insert_dup_elt3_mem_v32i8_i32(ptr %ptr) {
5213 ; AVX1-LABEL: insert_dup_elt3_mem_v32i8_i32:
5215 ; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
5216 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3]
5217 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
5220 ; AVX2OR512VL-LABEL: insert_dup_elt3_mem_v32i8_i32:
5221 ; AVX2OR512VL: # %bb.0:
5222 ; AVX2OR512VL-NEXT: vpbroadcastb 3(%rdi), %ymm0
5223 ; AVX2OR512VL-NEXT: retq
5225 ; XOPAVX1-LABEL: insert_dup_elt3_mem_v32i8_i32:
5227 ; XOPAVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
5228 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3]
5229 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
5230 ; XOPAVX1-NEXT: retq
5232 ; XOPAVX2-LABEL: insert_dup_elt3_mem_v32i8_i32:
5234 ; XOPAVX2-NEXT: vpbroadcastb 3(%rdi), %ymm0
5235 ; XOPAVX2-NEXT: retq
5236 %tmp = load i32, ptr %ptr, align 4
5237 %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
5238 %tmp2 = bitcast <4 x i32> %tmp1 to <16 x i8>
5239 %tmp3 = shufflevector <16 x i8> %tmp2, <16 x i8> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
5243 define <32 x i8> @insert_dup_elt1_mem_v32i8_sext_i8(ptr %ptr) {
5244 ; AVX1-LABEL: insert_dup_elt1_mem_v32i8_sext_i8:
5246 ; AVX1-NEXT: movsbl (%rdi), %eax
5247 ; AVX1-NEXT: vmovd %eax, %xmm0
5248 ; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
5249 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
5252 ; AVX2-LABEL: insert_dup_elt1_mem_v32i8_sext_i8:
5254 ; AVX2-NEXT: movsbl (%rdi), %eax
5255 ; AVX2-NEXT: shrl $8, %eax
5256 ; AVX2-NEXT: vmovd %eax, %xmm0
5257 ; AVX2-NEXT: vpbroadcastb %xmm0, %ymm0
5260 ; AVX512VL-LABEL: insert_dup_elt1_mem_v32i8_sext_i8:
5261 ; AVX512VL: # %bb.0:
5262 ; AVX512VL-NEXT: movsbl (%rdi), %eax
5263 ; AVX512VL-NEXT: shrl $8, %eax
5264 ; AVX512VL-NEXT: vpbroadcastb %eax, %ymm0
5265 ; AVX512VL-NEXT: retq
5267 ; XOPAVX1-LABEL: insert_dup_elt1_mem_v32i8_sext_i8:
5269 ; XOPAVX1-NEXT: movsbl (%rdi), %eax
5270 ; XOPAVX1-NEXT: vmovd %eax, %xmm0
5271 ; XOPAVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
5272 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
5273 ; XOPAVX1-NEXT: retq
5275 ; XOPAVX2-LABEL: insert_dup_elt1_mem_v32i8_sext_i8:
5277 ; XOPAVX2-NEXT: movsbl (%rdi), %eax
5278 ; XOPAVX2-NEXT: shrl $8, %eax
5279 ; XOPAVX2-NEXT: vmovd %eax, %xmm0
5280 ; XOPAVX2-NEXT: vpbroadcastb %xmm0, %ymm0
5281 ; XOPAVX2-NEXT: retq
5282 %tmp = load i8, ptr %ptr, align 1
5283 %tmp1 = sext i8 %tmp to i32
5284 %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0
5285 %tmp3 = bitcast <4 x i32> %tmp2 to <16 x i8>
5286 %tmp4 = shufflevector <16 x i8> %tmp3, <16 x i8> undef, <32 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
5290 define <32 x i8> @zeroable_src_to_zext(<32 x i8> %a0) {
5291 ; AVX1-LABEL: zeroable_src_to_zext:
5293 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
5294 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
5295 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
5296 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
5299 ; AVX2OR512VL-LABEL: zeroable_src_to_zext:
5300 ; AVX2OR512VL: # %bb.0:
5301 ; AVX2OR512VL-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
5302 ; AVX2OR512VL-NEXT: retq
5304 ; XOPAVX1-LABEL: zeroable_src_to_zext:
5306 ; XOPAVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
5307 ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
5308 ; XOPAVX1-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
5309 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
5310 ; XOPAVX1-NEXT: retq
5312 ; XOPAVX2-LABEL: zeroable_src_to_zext:
5314 ; XOPAVX2-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
5315 ; XOPAVX2-NEXT: retq
5316 %1 = shufflevector <32 x i8> %a0, <32 x i8> undef, <32 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
5317 %2 = shufflevector <32 x i8> %1, <32 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <32 x i32> <i32 8, i32 9, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 10, i32 11, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 20, i32 21, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 22, i32 23, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48>
5321 define <32 x i8> @unpckh_v32i8(<32 x i8> %x, <32 x i8> %y) {
5322 ; AVX1-LABEL: unpckh_v32i8:
5324 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
5325 ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
5328 ; AVX2OR512VL-LABEL: unpckh_v32i8:
5329 ; AVX2OR512VL: # %bb.0:
5330 ; AVX2OR512VL-NEXT: vextracti128 $1, %ymm1, %xmm1
5331 ; AVX2OR512VL-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
5332 ; AVX2OR512VL-NEXT: retq
5334 ; XOPAVX1-LABEL: unpckh_v32i8:
5336 ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
5337 ; XOPAVX1-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
5338 ; XOPAVX1-NEXT: retq
5340 ; XOPAVX2-LABEL: unpckh_v32i8:
5342 ; XOPAVX2-NEXT: vextracti128 $1, %ymm1, %xmm1
5343 ; XOPAVX2-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
5344 ; XOPAVX2-NEXT: retq
5345 %unpckh = shufflevector <32 x i8> %x, <32 x i8> %y, <32 x i32> <i32 8, i32 56, i32 9, i32 57, i32 10, i32 58, i32 11, i32 59, i32 12, i32 60, i32 13, i32 61, i32 14, i32 62, i32 15, i32 63, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
5346 ret <32 x i8> %unpckh
5349 define <32 x i8> @shuffle_v16i16_zextinreg_to_v8i32(<32 x i8> %a) {
5350 ; AVX1-LABEL: shuffle_v16i16_zextinreg_to_v8i32:
5352 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
5353 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
5354 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
5355 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
5358 ; AVX2OR512VL-LABEL: shuffle_v16i16_zextinreg_to_v8i32:
5359 ; AVX2OR512VL: # %bb.0:
5360 ; AVX2OR512VL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
5361 ; AVX2OR512VL-NEXT: retq
5363 ; XOPAVX1-LABEL: shuffle_v16i16_zextinreg_to_v8i32:
5365 ; XOPAVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
5366 ; XOPAVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
5367 ; XOPAVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
5368 ; XOPAVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
5369 ; XOPAVX1-NEXT: retq
5371 ; XOPAVX2-LABEL: shuffle_v16i16_zextinreg_to_v8i32:
5373 ; XOPAVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
5374 ; XOPAVX2-NEXT: retq
5375 %b = shufflevector <32 x i8> %a, <32 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <32 x i32> <i32 0, i32 1, i32 42, i32 42, i32 2, i32 3, i32 42, i32 42, i32 4, i32 5, i32 42, i32 42, i32 6, i32 7, i32 42, i32 42, i32 8, i32 9, i32 42, i32 42, i32 10, i32 11, i32 42, i32 42, i32 12, i32 13, i32 42, i32 42, i32 14, i32 15, i32 42, i32 42>