1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512bw,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64
5 define <8 x i16> @test_mask_packs_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) {
6 ; CHECK-LABEL: test_mask_packs_epi32_rr_128:
8 ; CHECK-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6b,0xc1]
9 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
10 %1 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a, <4 x i32> %b)
14 define <8 x i16> @test_mask_packs_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask) {
15 ; X86-LABEL: test_mask_packs_epi32_rrk_128:
17 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
18 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
19 ; X86-NEXT: vpackssdw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6b,0xd1]
20 ; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
21 ; X86-NEXT: retl # encoding: [0xc3]
23 ; X64-LABEL: test_mask_packs_epi32_rrk_128:
25 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
26 ; X64-NEXT: vpackssdw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6b,0xd1]
27 ; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
28 ; X64-NEXT: retq # encoding: [0xc3]
29 %1 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a, <4 x i32> %b)
30 %2 = bitcast i8 %mask to <8 x i1>
31 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %passThru
35 define <8 x i16> @test_mask_packs_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
36 ; X86-LABEL: test_mask_packs_epi32_rrkz_128:
38 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
39 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
40 ; X86-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x6b,0xc1]
41 ; X86-NEXT: retl # encoding: [0xc3]
43 ; X64-LABEL: test_mask_packs_epi32_rrkz_128:
45 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
46 ; X64-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x6b,0xc1]
47 ; X64-NEXT: retq # encoding: [0xc3]
48 %1 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a, <4 x i32> %b)
49 %2 = bitcast i8 %mask to <8 x i1>
50 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer
54 define <8 x i16> @test_mask_packs_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) {
55 ; X86-LABEL: test_mask_packs_epi32_rm_128:
57 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
58 ; X86-NEXT: vpackssdw (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6b,0x00]
59 ; X86-NEXT: retl # encoding: [0xc3]
61 ; X64-LABEL: test_mask_packs_epi32_rm_128:
63 ; X64-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6b,0x07]
64 ; X64-NEXT: retq # encoding: [0xc3]
65 %b = load <4 x i32>, <4 x i32>* %ptr_b
66 %1 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a, <4 x i32> %b)
70 define <8 x i16> @test_mask_packs_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <8 x i16> %passThru, i8 %mask) {
71 ; X86-LABEL: test_mask_packs_epi32_rmk_128:
73 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
74 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
75 ; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
76 ; X86-NEXT: vpackssdw (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6b,0x08]
77 ; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
78 ; X86-NEXT: retl # encoding: [0xc3]
80 ; X64-LABEL: test_mask_packs_epi32_rmk_128:
82 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
83 ; X64-NEXT: vpackssdw (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6b,0x0f]
84 ; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
85 ; X64-NEXT: retq # encoding: [0xc3]
86 %b = load <4 x i32>, <4 x i32>* %ptr_b
87 %1 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a, <4 x i32> %b)
88 %2 = bitcast i8 %mask to <8 x i1>
89 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %passThru
93 define <8 x i16> @test_mask_packs_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) {
94 ; X86-LABEL: test_mask_packs_epi32_rmkz_128:
96 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
97 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
98 ; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
99 ; X86-NEXT: vpackssdw (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x6b,0x00]
100 ; X86-NEXT: retl # encoding: [0xc3]
102 ; X64-LABEL: test_mask_packs_epi32_rmkz_128:
104 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
105 ; X64-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x6b,0x07]
106 ; X64-NEXT: retq # encoding: [0xc3]
107 %b = load <4 x i32>, <4 x i32>* %ptr_b
108 %1 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a, <4 x i32> %b)
109 %2 = bitcast i8 %mask to <8 x i1>
110 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer
114 define <8 x i16> @test_mask_packs_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) {
115 ; X86-LABEL: test_mask_packs_epi32_rmb_128:
117 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
118 ; X86-NEXT: vpackssdw (%eax){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0x6b,0x00]
119 ; X86-NEXT: retl # encoding: [0xc3]
121 ; X64-LABEL: test_mask_packs_epi32_rmb_128:
123 ; X64-NEXT: vpackssdw (%rdi){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0x6b,0x07]
124 ; X64-NEXT: retq # encoding: [0xc3]
125 %q = load i32, i32* %ptr_b
126 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
127 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
128 %1 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a, <4 x i32> %b)
132 define <8 x i16> @test_mask_packs_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <8 x i16> %passThru, i8 %mask) {
133 ; X86-LABEL: test_mask_packs_epi32_rmbk_128:
135 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
136 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
137 ; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
138 ; X86-NEXT: vpackssdw (%eax){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x19,0x6b,0x08]
139 ; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
140 ; X86-NEXT: retl # encoding: [0xc3]
142 ; X64-LABEL: test_mask_packs_epi32_rmbk_128:
144 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
145 ; X64-NEXT: vpackssdw (%rdi){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x19,0x6b,0x0f]
146 ; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
147 ; X64-NEXT: retq # encoding: [0xc3]
148 %q = load i32, i32* %ptr_b
149 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
150 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
151 %1 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a, <4 x i32> %b)
152 %2 = bitcast i8 %mask to <8 x i1>
153 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %passThru
157 define <8 x i16> @test_mask_packs_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) {
158 ; X86-LABEL: test_mask_packs_epi32_rmbkz_128:
160 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
161 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
162 ; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
163 ; X86-NEXT: vpackssdw (%eax){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x99,0x6b,0x00]
164 ; X86-NEXT: retl # encoding: [0xc3]
166 ; X64-LABEL: test_mask_packs_epi32_rmbkz_128:
168 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
169 ; X64-NEXT: vpackssdw (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x99,0x6b,0x07]
170 ; X64-NEXT: retq # encoding: [0xc3]
171 %q = load i32, i32* %ptr_b
172 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
173 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
174 %1 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a, <4 x i32> %b)
175 %2 = bitcast i8 %mask to <8 x i1>
176 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer
180 declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>)
182 define <16 x i16> @test_mask_packs_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) {
183 ; CHECK-LABEL: test_mask_packs_epi32_rr_256:
185 ; CHECK-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6b,0xc1]
186 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
187 %1 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %a, <8 x i32> %b)
191 define <16 x i16> @test_mask_packs_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask) {
192 ; X86-LABEL: test_mask_packs_epi32_rrk_256:
194 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
195 ; X86-NEXT: vpackssdw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x6b,0xd1]
196 ; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
197 ; X86-NEXT: retl # encoding: [0xc3]
199 ; X64-LABEL: test_mask_packs_epi32_rrk_256:
201 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
202 ; X64-NEXT: vpackssdw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x6b,0xd1]
203 ; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
204 ; X64-NEXT: retq # encoding: [0xc3]
205 %1 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %a, <8 x i32> %b)
206 %2 = bitcast i16 %mask to <16 x i1>
207 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %passThru
211 define <16 x i16> @test_mask_packs_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i16 %mask) {
212 ; X86-LABEL: test_mask_packs_epi32_rrkz_256:
214 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
215 ; X86-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x6b,0xc1]
216 ; X86-NEXT: retl # encoding: [0xc3]
218 ; X64-LABEL: test_mask_packs_epi32_rrkz_256:
220 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
221 ; X64-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x6b,0xc1]
222 ; X64-NEXT: retq # encoding: [0xc3]
223 %1 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %a, <8 x i32> %b)
224 %2 = bitcast i16 %mask to <16 x i1>
225 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer
229 define <16 x i16> @test_mask_packs_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) {
230 ; X86-LABEL: test_mask_packs_epi32_rm_256:
232 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
233 ; X86-NEXT: vpackssdw (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6b,0x00]
234 ; X86-NEXT: retl # encoding: [0xc3]
236 ; X64-LABEL: test_mask_packs_epi32_rm_256:
238 ; X64-NEXT: vpackssdw (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6b,0x07]
239 ; X64-NEXT: retq # encoding: [0xc3]
240 %b = load <8 x i32>, <8 x i32>* %ptr_b
241 %1 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %a, <8 x i32> %b)
245 define <16 x i16> @test_mask_packs_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <16 x i16> %passThru, i16 %mask) {
246 ; X86-LABEL: test_mask_packs_epi32_rmk_256:
248 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
249 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
250 ; X86-NEXT: vpackssdw (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x6b,0x08]
251 ; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
252 ; X86-NEXT: retl # encoding: [0xc3]
254 ; X64-LABEL: test_mask_packs_epi32_rmk_256:
256 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
257 ; X64-NEXT: vpackssdw (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x6b,0x0f]
258 ; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
259 ; X64-NEXT: retq # encoding: [0xc3]
260 %b = load <8 x i32>, <8 x i32>* %ptr_b
261 %1 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %a, <8 x i32> %b)
262 %2 = bitcast i16 %mask to <16 x i1>
263 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %passThru
267 define <16 x i16> @test_mask_packs_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i16 %mask) {
268 ; X86-LABEL: test_mask_packs_epi32_rmkz_256:
270 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
271 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
272 ; X86-NEXT: vpackssdw (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x6b,0x00]
273 ; X86-NEXT: retl # encoding: [0xc3]
275 ; X64-LABEL: test_mask_packs_epi32_rmkz_256:
277 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
278 ; X64-NEXT: vpackssdw (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x6b,0x07]
279 ; X64-NEXT: retq # encoding: [0xc3]
280 %b = load <8 x i32>, <8 x i32>* %ptr_b
281 %1 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %a, <8 x i32> %b)
282 %2 = bitcast i16 %mask to <16 x i1>
283 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer
287 define <16 x i16> @test_mask_packs_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) {
288 ; X86-LABEL: test_mask_packs_epi32_rmb_256:
290 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
291 ; X86-NEXT: vpackssdw (%eax){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0x6b,0x00]
292 ; X86-NEXT: retl # encoding: [0xc3]
294 ; X64-LABEL: test_mask_packs_epi32_rmb_256:
296 ; X64-NEXT: vpackssdw (%rdi){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0x6b,0x07]
297 ; X64-NEXT: retq # encoding: [0xc3]
298 %q = load i32, i32* %ptr_b
299 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
300 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
301 %1 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %a, <8 x i32> %b)
305 define <16 x i16> @test_mask_packs_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <16 x i16> %passThru, i16 %mask) {
306 ; X86-LABEL: test_mask_packs_epi32_rmbk_256:
308 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
309 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
310 ; X86-NEXT: vpackssdw (%eax){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x39,0x6b,0x08]
311 ; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
312 ; X86-NEXT: retl # encoding: [0xc3]
314 ; X64-LABEL: test_mask_packs_epi32_rmbk_256:
316 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
317 ; X64-NEXT: vpackssdw (%rdi){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x39,0x6b,0x0f]
318 ; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
319 ; X64-NEXT: retq # encoding: [0xc3]
320 %q = load i32, i32* %ptr_b
321 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
322 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
323 %1 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %a, <8 x i32> %b)
324 %2 = bitcast i16 %mask to <16 x i1>
325 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %passThru
329 define <16 x i16> @test_mask_packs_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i16 %mask) {
330 ; X86-LABEL: test_mask_packs_epi32_rmbkz_256:
332 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
333 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
334 ; X86-NEXT: vpackssdw (%eax){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xb9,0x6b,0x00]
335 ; X86-NEXT: retl # encoding: [0xc3]
337 ; X64-LABEL: test_mask_packs_epi32_rmbkz_256:
339 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
340 ; X64-NEXT: vpackssdw (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xb9,0x6b,0x07]
341 ; X64-NEXT: retq # encoding: [0xc3]
342 %q = load i32, i32* %ptr_b
343 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
344 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
345 %1 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %a, <8 x i32> %b)
346 %2 = bitcast i16 %mask to <16 x i1>
347 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer
351 declare <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32>, <8 x i32>)
353 define <16 x i8> @test_mask_packs_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) {
354 ; CHECK-LABEL: test_mask_packs_epi16_rr_128:
356 ; CHECK-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x63,0xc1]
357 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
358 %1 = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a, <8 x i16> %b)
362 define <16 x i8> @test_mask_packs_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask) {
363 ; X86-LABEL: test_mask_packs_epi16_rrk_128:
365 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
366 ; X86-NEXT: vpacksswb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x63,0xd1]
367 ; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
368 ; X86-NEXT: retl # encoding: [0xc3]
370 ; X64-LABEL: test_mask_packs_epi16_rrk_128:
372 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
373 ; X64-NEXT: vpacksswb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x63,0xd1]
374 ; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
375 ; X64-NEXT: retq # encoding: [0xc3]
376 %1 = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a, <8 x i16> %b)
377 %2 = bitcast i16 %mask to <16 x i1>
378 %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %passThru
382 define <16 x i8> @test_mask_packs_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i16 %mask) {
383 ; X86-LABEL: test_mask_packs_epi16_rrkz_128:
385 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
386 ; X86-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x63,0xc1]
387 ; X86-NEXT: retl # encoding: [0xc3]
389 ; X64-LABEL: test_mask_packs_epi16_rrkz_128:
391 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
392 ; X64-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x63,0xc1]
393 ; X64-NEXT: retq # encoding: [0xc3]
394 %1 = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a, <8 x i16> %b)
395 %2 = bitcast i16 %mask to <16 x i1>
396 %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> zeroinitializer
400 define <16 x i8> @test_mask_packs_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) {
401 ; X86-LABEL: test_mask_packs_epi16_rm_128:
403 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
404 ; X86-NEXT: vpacksswb (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x63,0x00]
405 ; X86-NEXT: retl # encoding: [0xc3]
407 ; X64-LABEL: test_mask_packs_epi16_rm_128:
409 ; X64-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x63,0x07]
410 ; X64-NEXT: retq # encoding: [0xc3]
411 %b = load <8 x i16>, <8 x i16>* %ptr_b
412 %1 = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a, <8 x i16> %b)
416 define <16 x i8> @test_mask_packs_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <16 x i8> %passThru, i16 %mask) {
417 ; X86-LABEL: test_mask_packs_epi16_rmk_128:
419 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
420 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
421 ; X86-NEXT: vpacksswb (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x63,0x08]
422 ; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
423 ; X86-NEXT: retl # encoding: [0xc3]
425 ; X64-LABEL: test_mask_packs_epi16_rmk_128:
427 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
428 ; X64-NEXT: vpacksswb (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x63,0x0f]
429 ; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
430 ; X64-NEXT: retq # encoding: [0xc3]
431 %b = load <8 x i16>, <8 x i16>* %ptr_b
432 %1 = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a, <8 x i16> %b)
433 %2 = bitcast i16 %mask to <16 x i1>
434 %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %passThru
438 define <16 x i8> @test_mask_packs_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i16 %mask) {
439 ; X86-LABEL: test_mask_packs_epi16_rmkz_128:
441 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
442 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
443 ; X86-NEXT: vpacksswb (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x63,0x00]
444 ; X86-NEXT: retl # encoding: [0xc3]
446 ; X64-LABEL: test_mask_packs_epi16_rmkz_128:
448 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
449 ; X64-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x63,0x07]
450 ; X64-NEXT: retq # encoding: [0xc3]
451 %b = load <8 x i16>, <8 x i16>* %ptr_b
452 %1 = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a, <8 x i16> %b)
453 %2 = bitcast i16 %mask to <16 x i1>
454 %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> zeroinitializer
458 declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>)
460 define <32 x i8> @test_mask_packs_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) {
461 ; CHECK-LABEL: test_mask_packs_epi16_rr_256:
463 ; CHECK-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x63,0xc1]
464 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
465 %1 = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %a, <16 x i16> %b)
469 define <32 x i8> @test_mask_packs_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <32 x i8> %passThru, i32 %mask) {
470 ; X86-LABEL: test_mask_packs_epi16_rrk_256:
472 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
473 ; X86-NEXT: vpacksswb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x63,0xd1]
474 ; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
475 ; X86-NEXT: retl # encoding: [0xc3]
477 ; X64-LABEL: test_mask_packs_epi16_rrk_256:
479 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
480 ; X64-NEXT: vpacksswb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x63,0xd1]
481 ; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
482 ; X64-NEXT: retq # encoding: [0xc3]
483 %1 = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %a, <16 x i16> %b)
484 %2 = bitcast i32 %mask to <32 x i1>
485 %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %passThru
489 define <32 x i8> @test_mask_packs_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i32 %mask) {
490 ; X86-LABEL: test_mask_packs_epi16_rrkz_256:
492 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
493 ; X86-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x63,0xc1]
494 ; X86-NEXT: retl # encoding: [0xc3]
496 ; X64-LABEL: test_mask_packs_epi16_rrkz_256:
498 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
499 ; X64-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x63,0xc1]
500 ; X64-NEXT: retq # encoding: [0xc3]
501 %1 = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %a, <16 x i16> %b)
502 %2 = bitcast i32 %mask to <32 x i1>
503 %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> zeroinitializer
507 define <32 x i8> @test_mask_packs_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) {
508 ; X86-LABEL: test_mask_packs_epi16_rm_256:
510 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
511 ; X86-NEXT: vpacksswb (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x63,0x00]
512 ; X86-NEXT: retl # encoding: [0xc3]
514 ; X64-LABEL: test_mask_packs_epi16_rm_256:
516 ; X64-NEXT: vpacksswb (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x63,0x07]
517 ; X64-NEXT: retq # encoding: [0xc3]
518 %b = load <16 x i16>, <16 x i16>* %ptr_b
519 %1 = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %a, <16 x i16> %b)
523 define <32 x i8> @test_mask_packs_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <32 x i8> %passThru, i32 %mask) {
524 ; X86-LABEL: test_mask_packs_epi16_rmk_256:
526 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
527 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
528 ; X86-NEXT: vpacksswb (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x63,0x08]
529 ; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
530 ; X86-NEXT: retl # encoding: [0xc3]
532 ; X64-LABEL: test_mask_packs_epi16_rmk_256:
534 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
535 ; X64-NEXT: vpacksswb (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x63,0x0f]
536 ; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
537 ; X64-NEXT: retq # encoding: [0xc3]
538 %b = load <16 x i16>, <16 x i16>* %ptr_b
539 %1 = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %a, <16 x i16> %b)
540 %2 = bitcast i32 %mask to <32 x i1>
541 %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %passThru
545 define <32 x i8> @test_mask_packs_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i32 %mask) {
546 ; X86-LABEL: test_mask_packs_epi16_rmkz_256:
548 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
549 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
550 ; X86-NEXT: vpacksswb (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x63,0x00]
551 ; X86-NEXT: retl # encoding: [0xc3]
553 ; X64-LABEL: test_mask_packs_epi16_rmkz_256:
555 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
556 ; X64-NEXT: vpacksswb (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x63,0x07]
557 ; X64-NEXT: retq # encoding: [0xc3]
558 %b = load <16 x i16>, <16 x i16>* %ptr_b
559 %1 = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %a, <16 x i16> %b)
560 %2 = bitcast i32 %mask to <32 x i1>
561 %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> zeroinitializer
565 declare <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16>, <16 x i16>)
568 define <8 x i16> @test_mask_packus_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) {
569 ; CHECK-LABEL: test_mask_packus_epi32_rr_128:
571 ; CHECK-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x2b,0xc1]
572 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
573 %1 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a, <4 x i32> %b)
577 define <8 x i16> @test_mask_packus_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask) {
578 ; X86-LABEL: test_mask_packus_epi32_rrk_128:
580 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
581 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
582 ; X86-NEXT: vpackusdw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x2b,0xd1]
583 ; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
584 ; X86-NEXT: retl # encoding: [0xc3]
586 ; X64-LABEL: test_mask_packus_epi32_rrk_128:
588 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
589 ; X64-NEXT: vpackusdw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x2b,0xd1]
590 ; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
591 ; X64-NEXT: retq # encoding: [0xc3]
592 %1 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a, <4 x i32> %b)
593 %2 = bitcast i8 %mask to <8 x i1>
594 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %passThru
598 define <8 x i16> @test_mask_packus_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
599 ; X86-LABEL: test_mask_packus_epi32_rrkz_128:
601 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
602 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
603 ; X86-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x2b,0xc1]
604 ; X86-NEXT: retl # encoding: [0xc3]
606 ; X64-LABEL: test_mask_packus_epi32_rrkz_128:
608 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
609 ; X64-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x2b,0xc1]
610 ; X64-NEXT: retq # encoding: [0xc3]
611 %1 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a, <4 x i32> %b)
612 %2 = bitcast i8 %mask to <8 x i1>
613 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer
617 define <8 x i16> @test_mask_packus_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) {
618 ; X86-LABEL: test_mask_packus_epi32_rm_128:
620 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
621 ; X86-NEXT: vpackusdw (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x2b,0x00]
622 ; X86-NEXT: retl # encoding: [0xc3]
624 ; X64-LABEL: test_mask_packus_epi32_rm_128:
626 ; X64-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x2b,0x07]
627 ; X64-NEXT: retq # encoding: [0xc3]
628 %b = load <4 x i32>, <4 x i32>* %ptr_b
629 %1 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a, <4 x i32> %b)
633 define <8 x i16> @test_mask_packus_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <8 x i16> %passThru, i8 %mask) {
634 ; X86-LABEL: test_mask_packus_epi32_rmk_128:
636 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
637 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
638 ; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
639 ; X86-NEXT: vpackusdw (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x2b,0x08]
640 ; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
641 ; X86-NEXT: retl # encoding: [0xc3]
643 ; X64-LABEL: test_mask_packus_epi32_rmk_128:
645 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
646 ; X64-NEXT: vpackusdw (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x2b,0x0f]
647 ; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
648 ; X64-NEXT: retq # encoding: [0xc3]
649 %b = load <4 x i32>, <4 x i32>* %ptr_b
650 %1 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a, <4 x i32> %b)
651 %2 = bitcast i8 %mask to <8 x i1>
652 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %passThru
656 define <8 x i16> @test_mask_packus_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) {
657 ; X86-LABEL: test_mask_packus_epi32_rmkz_128:
659 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
660 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
661 ; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
662 ; X86-NEXT: vpackusdw (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x2b,0x00]
663 ; X86-NEXT: retl # encoding: [0xc3]
665 ; X64-LABEL: test_mask_packus_epi32_rmkz_128:
667 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
668 ; X64-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x2b,0x07]
669 ; X64-NEXT: retq # encoding: [0xc3]
670 %b = load <4 x i32>, <4 x i32>* %ptr_b
671 %1 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a, <4 x i32> %b)
672 %2 = bitcast i8 %mask to <8 x i1>
673 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer
677 define <8 x i16> @test_mask_packus_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) {
678 ; X86-LABEL: test_mask_packus_epi32_rmb_128:
680 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
681 ; X86-NEXT: vpackusdw (%eax){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7d,0x18,0x2b,0x00]
682 ; X86-NEXT: retl # encoding: [0xc3]
684 ; X64-LABEL: test_mask_packus_epi32_rmb_128:
686 ; X64-NEXT: vpackusdw (%rdi){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7d,0x18,0x2b,0x07]
687 ; X64-NEXT: retq # encoding: [0xc3]
688 %q = load i32, i32* %ptr_b
689 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
690 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
691 %1 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a, <4 x i32> %b)
695 define <8 x i16> @test_mask_packus_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <8 x i16> %passThru, i8 %mask) {
696 ; X86-LABEL: test_mask_packus_epi32_rmbk_128:
698 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
699 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
700 ; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
701 ; X86-NEXT: vpackusdw (%eax){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x19,0x2b,0x08]
702 ; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
703 ; X86-NEXT: retl # encoding: [0xc3]
705 ; X64-LABEL: test_mask_packus_epi32_rmbk_128:
707 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
708 ; X64-NEXT: vpackusdw (%rdi){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x19,0x2b,0x0f]
709 ; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
710 ; X64-NEXT: retq # encoding: [0xc3]
711 %q = load i32, i32* %ptr_b
712 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
713 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
714 %1 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a, <4 x i32> %b)
715 %2 = bitcast i8 %mask to <8 x i1>
716 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %passThru
720 define <8 x i16> @test_mask_packus_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) {
721 ; X86-LABEL: test_mask_packus_epi32_rmbkz_128:
723 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
724 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
725 ; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
726 ; X86-NEXT: vpackusdw (%eax){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x99,0x2b,0x00]
727 ; X86-NEXT: retl # encoding: [0xc3]
729 ; X64-LABEL: test_mask_packus_epi32_rmbkz_128:
731 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
732 ; X64-NEXT: vpackusdw (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x99,0x2b,0x07]
733 ; X64-NEXT: retq # encoding: [0xc3]
734 %q = load i32, i32* %ptr_b
735 %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
736 %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
737 %1 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a, <4 x i32> %b)
738 %2 = bitcast i8 %mask to <8 x i1>
739 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer
743 declare <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32>, <4 x i32>)
745 define <16 x i16> @test_mask_packus_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) {
746 ; CHECK-LABEL: test_mask_packus_epi32_rr_256:
748 ; CHECK-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x2b,0xc1]
749 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
750 %1 = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %a, <8 x i32> %b)
754 define <16 x i16> @test_mask_packus_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask) {
755 ; X86-LABEL: test_mask_packus_epi32_rrk_256:
757 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
758 ; X86-NEXT: vpackusdw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x2b,0xd1]
759 ; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
760 ; X86-NEXT: retl # encoding: [0xc3]
762 ; X64-LABEL: test_mask_packus_epi32_rrk_256:
764 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
765 ; X64-NEXT: vpackusdw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x2b,0xd1]
766 ; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
767 ; X64-NEXT: retq # encoding: [0xc3]
768 %1 = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %a, <8 x i32> %b)
769 %2 = bitcast i16 %mask to <16 x i1>
770 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %passThru
774 define <16 x i16> @test_mask_packus_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i16 %mask) {
775 ; X86-LABEL: test_mask_packus_epi32_rrkz_256:
777 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
778 ; X86-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x2b,0xc1]
779 ; X86-NEXT: retl # encoding: [0xc3]
781 ; X64-LABEL: test_mask_packus_epi32_rrkz_256:
783 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
784 ; X64-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x2b,0xc1]
785 ; X64-NEXT: retq # encoding: [0xc3]
786 %1 = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %a, <8 x i32> %b)
787 %2 = bitcast i16 %mask to <16 x i1>
788 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer
792 define <16 x i16> @test_mask_packus_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) {
793 ; X86-LABEL: test_mask_packus_epi32_rm_256:
795 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
796 ; X86-NEXT: vpackusdw (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x2b,0x00]
797 ; X86-NEXT: retl # encoding: [0xc3]
799 ; X64-LABEL: test_mask_packus_epi32_rm_256:
801 ; X64-NEXT: vpackusdw (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x2b,0x07]
802 ; X64-NEXT: retq # encoding: [0xc3]
803 %b = load <8 x i32>, <8 x i32>* %ptr_b
804 %1 = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %a, <8 x i32> %b)
808 define <16 x i16> @test_mask_packus_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <16 x i16> %passThru, i16 %mask) {
809 ; X86-LABEL: test_mask_packus_epi32_rmk_256:
811 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
812 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
813 ; X86-NEXT: vpackusdw (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x2b,0x08]
814 ; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
815 ; X86-NEXT: retl # encoding: [0xc3]
817 ; X64-LABEL: test_mask_packus_epi32_rmk_256:
819 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
820 ; X64-NEXT: vpackusdw (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x2b,0x0f]
821 ; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
822 ; X64-NEXT: retq # encoding: [0xc3]
823 %b = load <8 x i32>, <8 x i32>* %ptr_b
824 %1 = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %a, <8 x i32> %b)
825 %2 = bitcast i16 %mask to <16 x i1>
826 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %passThru
830 define <16 x i16> @test_mask_packus_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i16 %mask) {
831 ; X86-LABEL: test_mask_packus_epi32_rmkz_256:
833 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
834 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
835 ; X86-NEXT: vpackusdw (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x2b,0x00]
836 ; X86-NEXT: retl # encoding: [0xc3]
838 ; X64-LABEL: test_mask_packus_epi32_rmkz_256:
840 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
841 ; X64-NEXT: vpackusdw (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x2b,0x07]
842 ; X64-NEXT: retq # encoding: [0xc3]
843 %b = load <8 x i32>, <8 x i32>* %ptr_b
844 %1 = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %a, <8 x i32> %b)
845 %2 = bitcast i16 %mask to <16 x i1>
846 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer
850 define <16 x i16> @test_mask_packus_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) {
851 ; X86-LABEL: test_mask_packus_epi32_rmb_256:
853 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
854 ; X86-NEXT: vpackusdw (%eax){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf2,0x7d,0x38,0x2b,0x00]
855 ; X86-NEXT: retl # encoding: [0xc3]
857 ; X64-LABEL: test_mask_packus_epi32_rmb_256:
859 ; X64-NEXT: vpackusdw (%rdi){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf2,0x7d,0x38,0x2b,0x07]
860 ; X64-NEXT: retq # encoding: [0xc3]
861 %q = load i32, i32* %ptr_b
862 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
863 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
864 %1 = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %a, <8 x i32> %b)
868 define <16 x i16> @test_mask_packus_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <16 x i16> %passThru, i16 %mask) {
869 ; X86-LABEL: test_mask_packus_epi32_rmbk_256:
871 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
872 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
873 ; X86-NEXT: vpackusdw (%eax){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x39,0x2b,0x08]
874 ; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
875 ; X86-NEXT: retl # encoding: [0xc3]
877 ; X64-LABEL: test_mask_packus_epi32_rmbk_256:
879 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
880 ; X64-NEXT: vpackusdw (%rdi){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x39,0x2b,0x0f]
881 ; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
882 ; X64-NEXT: retq # encoding: [0xc3]
883 %q = load i32, i32* %ptr_b
884 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
885 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
886 %1 = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %a, <8 x i32> %b)
887 %2 = bitcast i16 %mask to <16 x i1>
888 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %passThru
892 define <16 x i16> @test_mask_packus_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i16 %mask) {
893 ; X86-LABEL: test_mask_packus_epi32_rmbkz_256:
895 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
896 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
897 ; X86-NEXT: vpackusdw (%eax){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xb9,0x2b,0x00]
898 ; X86-NEXT: retl # encoding: [0xc3]
900 ; X64-LABEL: test_mask_packus_epi32_rmbkz_256:
902 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
903 ; X64-NEXT: vpackusdw (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xb9,0x2b,0x07]
904 ; X64-NEXT: retq # encoding: [0xc3]
905 %q = load i32, i32* %ptr_b
906 %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
907 %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
908 %1 = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %a, <8 x i32> %b)
909 %2 = bitcast i16 %mask to <16 x i1>
910 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer
914 declare <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32>, <8 x i32>)
916 define <16 x i8> @test_mask_packus_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) {
917 ; CHECK-LABEL: test_mask_packus_epi16_rr_128:
919 ; CHECK-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x67,0xc1]
920 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
921 %1 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a, <8 x i16> %b)
925 define <16 x i8> @test_mask_packus_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask) {
926 ; X86-LABEL: test_mask_packus_epi16_rrk_128:
928 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
929 ; X86-NEXT: vpackuswb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x67,0xd1]
930 ; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
931 ; X86-NEXT: retl # encoding: [0xc3]
933 ; X64-LABEL: test_mask_packus_epi16_rrk_128:
935 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
936 ; X64-NEXT: vpackuswb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x67,0xd1]
937 ; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
938 ; X64-NEXT: retq # encoding: [0xc3]
939 %1 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a, <8 x i16> %b)
940 %2 = bitcast i16 %mask to <16 x i1>
941 %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %passThru
945 define <16 x i8> @test_mask_packus_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i16 %mask) {
946 ; X86-LABEL: test_mask_packus_epi16_rrkz_128:
948 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
949 ; X86-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x67,0xc1]
950 ; X86-NEXT: retl # encoding: [0xc3]
952 ; X64-LABEL: test_mask_packus_epi16_rrkz_128:
954 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
955 ; X64-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x67,0xc1]
956 ; X64-NEXT: retq # encoding: [0xc3]
957 %1 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a, <8 x i16> %b)
958 %2 = bitcast i16 %mask to <16 x i1>
959 %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> zeroinitializer
963 define <16 x i8> @test_mask_packus_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) {
964 ; X86-LABEL: test_mask_packus_epi16_rm_128:
966 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
967 ; X86-NEXT: vpackuswb (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x67,0x00]
968 ; X86-NEXT: retl # encoding: [0xc3]
970 ; X64-LABEL: test_mask_packus_epi16_rm_128:
972 ; X64-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x67,0x07]
973 ; X64-NEXT: retq # encoding: [0xc3]
974 %b = load <8 x i16>, <8 x i16>* %ptr_b
975 %1 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a, <8 x i16> %b)
979 define <16 x i8> @test_mask_packus_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <16 x i8> %passThru, i16 %mask) {
980 ; X86-LABEL: test_mask_packus_epi16_rmk_128:
982 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
983 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
984 ; X86-NEXT: vpackuswb (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x67,0x08]
985 ; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
986 ; X86-NEXT: retl # encoding: [0xc3]
988 ; X64-LABEL: test_mask_packus_epi16_rmk_128:
990 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
991 ; X64-NEXT: vpackuswb (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x67,0x0f]
992 ; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
993 ; X64-NEXT: retq # encoding: [0xc3]
994 %b = load <8 x i16>, <8 x i16>* %ptr_b
995 %1 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a, <8 x i16> %b)
996 %2 = bitcast i16 %mask to <16 x i1>
997 %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %passThru
1001 define <16 x i8> @test_mask_packus_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i16 %mask) {
1002 ; X86-LABEL: test_mask_packus_epi16_rmkz_128:
1004 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1005 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
1006 ; X86-NEXT: vpackuswb (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x67,0x00]
1007 ; X86-NEXT: retl # encoding: [0xc3]
1009 ; X64-LABEL: test_mask_packus_epi16_rmkz_128:
1011 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1012 ; X64-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x67,0x07]
1013 ; X64-NEXT: retq # encoding: [0xc3]
1014 %b = load <8 x i16>, <8 x i16>* %ptr_b
1015 %1 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a, <8 x i16> %b)
1016 %2 = bitcast i16 %mask to <16 x i1>
1017 %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> zeroinitializer
1021 declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>)
1023 define <32 x i8> @test_mask_packus_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) {
1024 ; CHECK-LABEL: test_mask_packus_epi16_rr_256:
1026 ; CHECK-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x67,0xc1]
1027 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1028 %1 = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> %a, <16 x i16> %b)
1032 define <32 x i8> @test_mask_packus_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <32 x i8> %passThru, i32 %mask) {
1033 ; X86-LABEL: test_mask_packus_epi16_rrk_256:
1035 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1036 ; X86-NEXT: vpackuswb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x67,0xd1]
1037 ; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1038 ; X86-NEXT: retl # encoding: [0xc3]
1040 ; X64-LABEL: test_mask_packus_epi16_rrk_256:
1042 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1043 ; X64-NEXT: vpackuswb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x67,0xd1]
1044 ; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1045 ; X64-NEXT: retq # encoding: [0xc3]
1046 %1 = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> %a, <16 x i16> %b)
1047 %2 = bitcast i32 %mask to <32 x i1>
1048 %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %passThru
1052 define <32 x i8> @test_mask_packus_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i32 %mask) {
1053 ; X86-LABEL: test_mask_packus_epi16_rrkz_256:
1055 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1056 ; X86-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x67,0xc1]
1057 ; X86-NEXT: retl # encoding: [0xc3]
1059 ; X64-LABEL: test_mask_packus_epi16_rrkz_256:
1061 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1062 ; X64-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x67,0xc1]
1063 ; X64-NEXT: retq # encoding: [0xc3]
1064 %1 = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> %a, <16 x i16> %b)
1065 %2 = bitcast i32 %mask to <32 x i1>
1066 %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> zeroinitializer
1070 define <32 x i8> @test_mask_packus_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) {
1071 ; X86-LABEL: test_mask_packus_epi16_rm_256:
1073 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1074 ; X86-NEXT: vpackuswb (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x67,0x00]
1075 ; X86-NEXT: retl # encoding: [0xc3]
1077 ; X64-LABEL: test_mask_packus_epi16_rm_256:
1079 ; X64-NEXT: vpackuswb (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x67,0x07]
1080 ; X64-NEXT: retq # encoding: [0xc3]
1081 %b = load <16 x i16>, <16 x i16>* %ptr_b
1082 %1 = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> %a, <16 x i16> %b)
1086 define <32 x i8> @test_mask_packus_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <32 x i8> %passThru, i32 %mask) {
1087 ; X86-LABEL: test_mask_packus_epi16_rmk_256:
1089 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1090 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
1091 ; X86-NEXT: vpackuswb (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x67,0x08]
1092 ; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
1093 ; X86-NEXT: retl # encoding: [0xc3]
1095 ; X64-LABEL: test_mask_packus_epi16_rmk_256:
1097 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1098 ; X64-NEXT: vpackuswb (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x67,0x0f]
1099 ; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
1100 ; X64-NEXT: retq # encoding: [0xc3]
1101 %b = load <16 x i16>, <16 x i16>* %ptr_b
1102 %1 = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> %a, <16 x i16> %b)
1103 %2 = bitcast i32 %mask to <32 x i1>
1104 %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %passThru
1108 define <32 x i8> @test_mask_packus_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i32 %mask) {
1109 ; X86-LABEL: test_mask_packus_epi16_rmkz_256:
1111 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1112 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
1113 ; X86-NEXT: vpackuswb (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x67,0x00]
1114 ; X86-NEXT: retl # encoding: [0xc3]
1116 ; X64-LABEL: test_mask_packus_epi16_rmkz_256:
1118 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1119 ; X64-NEXT: vpackuswb (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x67,0x07]
1120 ; X64-NEXT: retq # encoding: [0xc3]
1121 %b = load <16 x i16>, <16 x i16>* %ptr_b
1122 %1 = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> %a, <16 x i16> %b)
1123 %2 = bitcast i32 %mask to <32 x i1>
1124 %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> zeroinitializer
1128 declare <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16>, <16 x i16>)
1130 define <8 x i16>@test_int_x86_avx512_vpermt2var_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) {
1131 ; CHECK-LABEL: test_int_x86_avx512_vpermt2var_hi_128:
1133 ; CHECK-NEXT: vpermi2w %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0xf5,0x08,0x75,0xc2]
1134 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1135 %1 = call <8 x i16> @llvm.x86.avx512.vpermi2var.hi.128(<8 x i16> %x1, <8 x i16> %x0, <8 x i16> %x2)
1139 define <8 x i16>@test_int_x86_avx512_mask_vpermt2var_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
1140 ; X86-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_128:
1142 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1143 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
1144 ; X86-NEXT: vpermt2w %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x7d,0xca]
1145 ; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
1146 ; X86-NEXT: retl # encoding: [0xc3]
1148 ; X64-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_128:
1150 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1151 ; X64-NEXT: vpermt2w %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x7d,0xca]
1152 ; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
1153 ; X64-NEXT: retq # encoding: [0xc3]
1154 %1 = call <8 x i16> @llvm.x86.avx512.vpermi2var.hi.128(<8 x i16> %x1, <8 x i16> %x0, <8 x i16> %x2)
1155 %2 = bitcast i8 %x3 to <8 x i1>
1156 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %x1
1160 define <8 x i16>@test_int_x86_avx512_maskz_vpermt2var_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
1161 ; X86-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_128:
1163 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1164 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
1165 ; X86-NEXT: vpermi2w %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x75,0xc2]
1166 ; X86-NEXT: retl # encoding: [0xc3]
1168 ; X64-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_128:
1170 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1171 ; X64-NEXT: vpermi2w %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x75,0xc2]
1172 ; X64-NEXT: retq # encoding: [0xc3]
1173 %1 = call <8 x i16> @llvm.x86.avx512.vpermi2var.hi.128(<8 x i16> %x1, <8 x i16> %x0, <8 x i16> %x2)
1174 %2 = bitcast i8 %x3 to <8 x i1>
1175 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer
1179 define <16 x i16>@test_int_x86_avx512_vpermt2var_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2) {
1180 ; CHECK-LABEL: test_int_x86_avx512_vpermt2var_hi_256:
1182 ; CHECK-NEXT: vpermi2w %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0xf5,0x28,0x75,0xc2]
1183 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1184 %1 = call <16 x i16> @llvm.x86.avx512.vpermi2var.hi.256(<16 x i16> %x1, <16 x i16> %x0, <16 x i16> %x2)
1188 define <16 x i16>@test_int_x86_avx512_mask_vpermt2var_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
1189 ; X86-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_256:
1191 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1192 ; X86-NEXT: vpermt2w %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x7d,0xca]
1193 ; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
1194 ; X86-NEXT: retl # encoding: [0xc3]
1196 ; X64-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_256:
1198 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1199 ; X64-NEXT: vpermt2w %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x7d,0xca]
1200 ; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
1201 ; X64-NEXT: retq # encoding: [0xc3]
1202 %1 = call <16 x i16> @llvm.x86.avx512.vpermi2var.hi.256(<16 x i16> %x1, <16 x i16> %x0, <16 x i16> %x2)
1203 %2 = bitcast i16 %x3 to <16 x i1>
1204 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %x1
1208 define <16 x i16>@test_int_x86_avx512_maskz_vpermt2var_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
1209 ; X86-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_256:
1211 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1212 ; X86-NEXT: vpermi2w %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x75,0xc2]
1213 ; X86-NEXT: retl # encoding: [0xc3]
1215 ; X64-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_256:
1217 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1218 ; X64-NEXT: vpermi2w %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x75,0xc2]
1219 ; X64-NEXT: retq # encoding: [0xc3]
1220 %1 = call <16 x i16> @llvm.x86.avx512.vpermi2var.hi.256(<16 x i16> %x1, <16 x i16> %x0, <16 x i16> %x2)
1221 %2 = bitcast i16 %x3 to <16 x i1>
1222 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer
1226 declare <8 x i16> @llvm.x86.avx512.vpermi2var.hi.128(<8 x i16>, <8 x i16>, <8 x i16>)
1228 define <8 x i16>@test_int_x86_avx512_vpermi2var_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) {
1229 ; CHECK-LABEL: test_int_x86_avx512_vpermi2var_hi_128:
1231 ; CHECK-NEXT: vpermt2w %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0xf5,0x08,0x7d,0xc2]
1232 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1233 %1 = call <8 x i16> @llvm.x86.avx512.vpermi2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2)
1237 define <8 x i16>@test_int_x86_avx512_mask_vpermi2var_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
1238 ; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_128:
1240 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1241 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
1242 ; X86-NEXT: vpermi2w %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x75,0xca]
1243 ; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
1244 ; X86-NEXT: retl # encoding: [0xc3]
1246 ; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_128:
1248 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1249 ; X64-NEXT: vpermi2w %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x75,0xca]
1250 ; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
1251 ; X64-NEXT: retq # encoding: [0xc3]
1252 %1 = call <8 x i16> @llvm.x86.avx512.vpermi2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2)
1253 %2 = bitcast i8 %x3 to <8 x i1>
1254 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %x1
1258 declare <16 x i16> @llvm.x86.avx512.vpermi2var.hi.256(<16 x i16>, <16 x i16>, <16 x i16>)
1260 define <16 x i16>@test_int_x86_avx512_vpermi2var_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2) {
1261 ; CHECK-LABEL: test_int_x86_avx512_vpermi2var_hi_256:
1263 ; CHECK-NEXT: vpermt2w %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0xf5,0x28,0x7d,0xc2]
1264 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1265 %1 = call <16 x i16> @llvm.x86.avx512.vpermi2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2)
1269 define <16 x i16>@test_int_x86_avx512_mask_vpermi2var_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
1270 ; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_256:
1272 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1273 ; X86-NEXT: vpermi2w %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x75,0xca]
1274 ; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
1275 ; X86-NEXT: retl # encoding: [0xc3]
1277 ; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_256:
1279 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1280 ; X64-NEXT: vpermi2w %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x75,0xca]
1281 ; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
1282 ; X64-NEXT: retq # encoding: [0xc3]
1283 %1 = call <16 x i16> @llvm.x86.avx512.vpermi2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2)
1284 %2 = bitcast i16 %x3 to <16 x i1>
1285 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %x1
1289 declare <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8>, <16 x i8>)
1291 define <16 x i8> @test_int_x86_avx512_mask_pavg_b_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) {
1292 ; X86-LABEL: test_int_x86_avx512_mask_pavg_b_128:
1294 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1295 ; X86-NEXT: vpavgb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe0,0xd1]
1296 ; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
1297 ; X86-NEXT: retl # encoding: [0xc3]
1299 ; X64-LABEL: test_int_x86_avx512_mask_pavg_b_128:
1301 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1302 ; X64-NEXT: vpavgb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe0,0xd1]
1303 ; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
1304 ; X64-NEXT: retq # encoding: [0xc3]
1305 %1 = call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %x0, <16 x i8> %x1)
1306 %2 = bitcast i16 %x3 to <16 x i1>
1307 %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %x2
1311 declare <32 x i8> @llvm.x86.avx2.pavg.b(<32 x i8>, <32 x i8>)
1313 define <32 x i8> @test_int_x86_avx512_mask_pavg_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) {
1314 ; X86-LABEL: test_int_x86_avx512_mask_pavg_b_256:
1316 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1317 ; X86-NEXT: vpavgb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe0,0xd1]
1318 ; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1319 ; X86-NEXT: retl # encoding: [0xc3]
1321 ; X64-LABEL: test_int_x86_avx512_mask_pavg_b_256:
1323 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1324 ; X64-NEXT: vpavgb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe0,0xd1]
1325 ; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1326 ; X64-NEXT: retq # encoding: [0xc3]
1327 %1 = call <32 x i8> @llvm.x86.avx2.pavg.b(<32 x i8> %x0, <32 x i8> %x1)
1328 %2 = bitcast i32 %x3 to <32 x i1>
1329 %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %x2
1333 declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>)
1335 define <8 x i16> @test_int_x86_avx512_mask_pavg_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
1336 ; X86-LABEL: test_int_x86_avx512_mask_pavg_w_128:
1338 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1339 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
1340 ; X86-NEXT: vpavgw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe3,0xd1]
1341 ; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
1342 ; X86-NEXT: retl # encoding: [0xc3]
1344 ; X64-LABEL: test_int_x86_avx512_mask_pavg_w_128:
1346 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1347 ; X64-NEXT: vpavgw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe3,0xd1]
1348 ; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
1349 ; X64-NEXT: retq # encoding: [0xc3]
1350 %1 = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %x0, <8 x i16> %x1)
1351 %2 = bitcast i8 %x3 to <8 x i1>
1352 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %x2
1356 declare <16 x i16> @llvm.x86.avx2.pavg.w(<16 x i16>, <16 x i16>)
1358 define <16 x i16> @test_int_x86_avx512_mask_pavg_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
1359 ; X86-LABEL: test_int_x86_avx512_mask_pavg_w_256:
1361 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1362 ; X86-NEXT: vpavgw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe3,0xd1]
1363 ; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1364 ; X86-NEXT: retl # encoding: [0xc3]
1366 ; X64-LABEL: test_int_x86_avx512_mask_pavg_w_256:
1368 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1369 ; X64-NEXT: vpavgw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe3,0xd1]
1370 ; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1371 ; X64-NEXT: retq # encoding: [0xc3]
1372 %1 = call <16 x i16> @llvm.x86.avx2.pavg.w(<16 x i16> %x0, <16 x i16> %x1)
1373 %2 = bitcast i16 %x3 to <16 x i1>
1374 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %x2
1378 declare <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16>, <8 x i16>)
1380 define <8 x i16> @test_int_x86_avx512_mask_pmulhu_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
1381 ; X86-LABEL: test_int_x86_avx512_mask_pmulhu_w_128:
1383 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1384 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
1385 ; X86-NEXT: vpmulhuw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe4,0xd1]
1386 ; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
1387 ; X86-NEXT: retl # encoding: [0xc3]
1389 ; X64-LABEL: test_int_x86_avx512_mask_pmulhu_w_128:
1391 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1392 ; X64-NEXT: vpmulhuw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe4,0xd1]
1393 ; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
1394 ; X64-NEXT: retq # encoding: [0xc3]
1395 %1 = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %x0, <8 x i16> %x1)
1396 %2 = bitcast i8 %x3 to <8 x i1>
1397 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %x2
1401 declare <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16>, <16 x i16>)
1403 define <16 x i16> @test_int_x86_avx512_mask_pmulhu_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
1404 ; X86-LABEL: test_int_x86_avx512_mask_pmulhu_w_256:
1406 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1407 ; X86-NEXT: vpmulhuw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe4,0xd1]
1408 ; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1409 ; X86-NEXT: retl # encoding: [0xc3]
1411 ; X64-LABEL: test_int_x86_avx512_mask_pmulhu_w_256:
1413 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1414 ; X64-NEXT: vpmulhuw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe4,0xd1]
1415 ; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1416 ; X64-NEXT: retq # encoding: [0xc3]
1417 %1 = call <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16> %x0, <16 x i16> %x1)
1418 %2 = bitcast i16 %x3 to <16 x i1>
1419 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %x2
1423 declare <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16>, <8 x i16>)
1425 define <8 x i16> @test_int_x86_avx512_mask_pmulh_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
1426 ; X86-LABEL: test_int_x86_avx512_mask_pmulh_w_128:
1428 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1429 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
1430 ; X86-NEXT: vpmulhw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe5,0xd1]
1431 ; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
1432 ; X86-NEXT: retl # encoding: [0xc3]
1434 ; X64-LABEL: test_int_x86_avx512_mask_pmulh_w_128:
1436 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1437 ; X64-NEXT: vpmulhw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe5,0xd1]
1438 ; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
1439 ; X64-NEXT: retq # encoding: [0xc3]
1440 %1 = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %x0, <8 x i16> %x1)
1441 %2 = bitcast i8 %x3 to <8 x i1>
1442 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %x2
1446 declare <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16>, <16 x i16>)
1448 define <16 x i16> @test_int_x86_avx512_mask_pmulh_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
1449 ; X86-LABEL: test_int_x86_avx512_mask_pmulh_w_256:
1451 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1452 ; X86-NEXT: vpmulhw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe5,0xd1]
1453 ; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1454 ; X86-NEXT: retl # encoding: [0xc3]
1456 ; X64-LABEL: test_int_x86_avx512_mask_pmulh_w_256:
1458 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1459 ; X64-NEXT: vpmulhw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe5,0xd1]
1460 ; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1461 ; X64-NEXT: retq # encoding: [0xc3]
1462 %1 = call <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16> %x0, <16 x i16> %x1)
1463 %2 = bitcast i16 %x3 to <16 x i1>
1464 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %x2
1468 declare <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16>, <8 x i16>)
1470 define <8 x i16> @test_int_x86_avx512_mask_pmulhr_sw_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
1471 ; X86-LABEL: test_int_x86_avx512_mask_pmulhr_sw_128:
1473 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1474 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
1475 ; X86-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x0b,0xd1]
1476 ; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
1477 ; X86-NEXT: retl # encoding: [0xc3]
1479 ; X64-LABEL: test_int_x86_avx512_mask_pmulhr_sw_128:
1481 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1482 ; X64-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x0b,0xd1]
1483 ; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
1484 ; X64-NEXT: retq # encoding: [0xc3]
1485 %1 = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> %x0, <8 x i16> %x1)
1486 %2 = bitcast i8 %x3 to <8 x i1>
1487 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %x2
1491 declare <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16>, <16 x i16>)
1493 define <16 x i16> @test_int_x86_avx512_mask_pmulhr_sw_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
1494 ; X86-LABEL: test_int_x86_avx512_mask_pmulhr_sw_256:
1496 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1497 ; X86-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x0b,0xd1]
1498 ; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1499 ; X86-NEXT: retl # encoding: [0xc3]
1501 ; X64-LABEL: test_int_x86_avx512_mask_pmulhr_sw_256:
1503 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1504 ; X64-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x0b,0xd1]
1505 ; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1506 ; X64-NEXT: retq # encoding: [0xc3]
1507 %1 = call <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16> %x0, <16 x i16> %x1)
1508 %2 = bitcast i16 %x3 to <16 x i1>
1509 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %x2
1513 declare <16 x i8> @llvm.x86.avx512.mask.pmov.wb.128(<8 x i16>, <16 x i8>, i8)
1515 define <16 x i8>@test_int_x86_avx512_mask_pmov_wb_128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2) {
1516 ; X86-LABEL: test_int_x86_avx512_mask_pmov_wb_128:
1518 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1519 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
1520 ; X86-NEXT: vpmovwb %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x30,0xc2]
1521 ; X86-NEXT: vpmovwb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x30,0xc1]
1522 ; X86-NEXT: vpmovwb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x30,0xc0]
1523 ; X86-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc0]
1524 ; X86-NEXT: vpaddb %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0]
1525 ; X86-NEXT: retl # encoding: [0xc3]
1527 ; X64-LABEL: test_int_x86_avx512_mask_pmov_wb_128:
1529 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1530 ; X64-NEXT: vpmovwb %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x30,0xc2]
1531 ; X64-NEXT: vpmovwb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x30,0xc1]
1532 ; X64-NEXT: vpmovwb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x30,0xc0]
1533 ; X64-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc0]
1534 ; X64-NEXT: vpaddb %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0]
1535 ; X64-NEXT: retq # encoding: [0xc3]
1536 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 -1)
1537 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2)
1538 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.128(<8 x i16> %x0, <16 x i8> zeroinitializer, i8 %x2)
1539 %res3 = add <16 x i8> %res0, %res1
1540 %res4 = add <16 x i8> %res3, %res2
1544 declare void @llvm.x86.avx512.mask.pmov.wb.mem.128(i8* %ptr, <8 x i16>, i8)
1546 define void @test_int_x86_avx512_mask_pmov_wb_mem_128(i8* %ptr, <8 x i16> %x1, i8 %x2) {
1547 ; X86-LABEL: test_int_x86_avx512_mask_pmov_wb_mem_128:
1549 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
1550 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
1551 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1552 ; X86-NEXT: vpmovwb %xmm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x08,0x30,0x00]
1553 ; X86-NEXT: vpmovwb %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x30,0x00]
1554 ; X86-NEXT: retl # encoding: [0xc3]
1556 ; X64-LABEL: test_int_x86_avx512_mask_pmov_wb_mem_128:
1558 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1559 ; X64-NEXT: vpmovwb %xmm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x08,0x30,0x07]
1560 ; X64-NEXT: vpmovwb %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x30,0x07]
1561 ; X64-NEXT: retq # encoding: [0xc3]
1562 call void @llvm.x86.avx512.mask.pmov.wb.mem.128(i8* %ptr, <8 x i16> %x1, i8 -1)
1563 call void @llvm.x86.avx512.mask.pmov.wb.mem.128(i8* %ptr, <8 x i16> %x1, i8 %x2)
1567 declare <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.128(<8 x i16>, <16 x i8>, i8)
1569 define <16 x i8>@test_int_x86_avx512_mask_pmovs_wb_128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2) {
1570 ; X86-LABEL: test_int_x86_avx512_mask_pmovs_wb_128:
1572 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1573 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
1574 ; X86-NEXT: vpmovswb %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x20,0xc2]
1575 ; X86-NEXT: vpmovswb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x20,0xc1]
1576 ; X86-NEXT: vpmovswb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x20,0xc0]
1577 ; X86-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc0]
1578 ; X86-NEXT: vpaddb %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0]
1579 ; X86-NEXT: retl # encoding: [0xc3]
1581 ; X64-LABEL: test_int_x86_avx512_mask_pmovs_wb_128:
1583 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1584 ; X64-NEXT: vpmovswb %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x20,0xc2]
1585 ; X64-NEXT: vpmovswb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x20,0xc1]
1586 ; X64-NEXT: vpmovswb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x20,0xc0]
1587 ; X64-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc0]
1588 ; X64-NEXT: vpaddb %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0]
1589 ; X64-NEXT: retq # encoding: [0xc3]
1590 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 -1)
1591 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2)
1592 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.128(<8 x i16> %x0, <16 x i8> zeroinitializer, i8 %x2)
1593 %res3 = add <16 x i8> %res0, %res1
1594 %res4 = add <16 x i8> %res3, %res2
1598 declare void @llvm.x86.avx512.mask.pmovs.wb.mem.128(i8* %ptr, <8 x i16>, i8)
1600 define void @test_int_x86_avx512_mask_pmovs_wb_mem_128(i8* %ptr, <8 x i16> %x1, i8 %x2) {
1601 ; X86-LABEL: test_int_x86_avx512_mask_pmovs_wb_mem_128:
1603 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
1604 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
1605 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1606 ; X86-NEXT: vpmovswb %xmm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x08,0x20,0x00]
1607 ; X86-NEXT: vpmovswb %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x20,0x00]
1608 ; X86-NEXT: retl # encoding: [0xc3]
1610 ; X64-LABEL: test_int_x86_avx512_mask_pmovs_wb_mem_128:
1612 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1613 ; X64-NEXT: vpmovswb %xmm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x08,0x20,0x07]
1614 ; X64-NEXT: vpmovswb %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x20,0x07]
1615 ; X64-NEXT: retq # encoding: [0xc3]
1616 call void @llvm.x86.avx512.mask.pmovs.wb.mem.128(i8* %ptr, <8 x i16> %x1, i8 -1)
1617 call void @llvm.x86.avx512.mask.pmovs.wb.mem.128(i8* %ptr, <8 x i16> %x1, i8 %x2)
1621 declare <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.128(<8 x i16>, <16 x i8>, i8)
1623 define <16 x i8>@test_int_x86_avx512_mask_pmovus_wb_128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2) {
1624 ; X86-LABEL: test_int_x86_avx512_mask_pmovus_wb_128:
1626 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1627 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
1628 ; X86-NEXT: vpmovuswb %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x10,0xc2]
1629 ; X86-NEXT: vpmovuswb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x10,0xc1]
1630 ; X86-NEXT: vpmovuswb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x10,0xc0]
1631 ; X86-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc0]
1632 ; X86-NEXT: vpaddb %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0]
1633 ; X86-NEXT: retl # encoding: [0xc3]
1635 ; X64-LABEL: test_int_x86_avx512_mask_pmovus_wb_128:
1637 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1638 ; X64-NEXT: vpmovuswb %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x10,0xc2]
1639 ; X64-NEXT: vpmovuswb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x10,0xc1]
1640 ; X64-NEXT: vpmovuswb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x10,0xc0]
1641 ; X64-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc0]
1642 ; X64-NEXT: vpaddb %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0]
1643 ; X64-NEXT: retq # encoding: [0xc3]
1644 %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 -1)
1645 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2)
1646 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.128(<8 x i16> %x0, <16 x i8> zeroinitializer, i8 %x2)
1647 %res3 = add <16 x i8> %res0, %res1
1648 %res4 = add <16 x i8> %res3, %res2
1652 declare void @llvm.x86.avx512.mask.pmovus.wb.mem.128(i8* %ptr, <8 x i16>, i8)
1654 define void @test_int_x86_avx512_mask_pmovus_wb_mem_128(i8* %ptr, <8 x i16> %x1, i8 %x2) {
1655 ; X86-LABEL: test_int_x86_avx512_mask_pmovus_wb_mem_128:
1657 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
1658 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
1659 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1660 ; X86-NEXT: vpmovuswb %xmm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x08,0x10,0x00]
1661 ; X86-NEXT: vpmovuswb %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x10,0x00]
1662 ; X86-NEXT: retl # encoding: [0xc3]
1664 ; X64-LABEL: test_int_x86_avx512_mask_pmovus_wb_mem_128:
1666 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1667 ; X64-NEXT: vpmovuswb %xmm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x08,0x10,0x07]
1668 ; X64-NEXT: vpmovuswb %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x10,0x07]
1669 ; X64-NEXT: retq # encoding: [0xc3]
1670 call void @llvm.x86.avx512.mask.pmovus.wb.mem.128(i8* %ptr, <8 x i16> %x1, i8 -1)
1671 call void @llvm.x86.avx512.mask.pmovus.wb.mem.128(i8* %ptr, <8 x i16> %x1, i8 %x2)
1675 define <16 x i8>@test_int_x86_avx512_pmov_wb_256(<16 x i16> %x0) {
1676 ; CHECK-LABEL: test_int_x86_avx512_pmov_wb_256:
1678 ; CHECK-NEXT: vpmovwb %ymm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x30,0xc0]
1679 ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
1680 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1681 %1 = trunc <16 x i16> %x0 to <16 x i8>
1685 define <16 x i8>@test_int_x86_avx512_mask_pmov_wb_256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2) {
1686 ; X86-LABEL: test_int_x86_avx512_mask_pmov_wb_256:
1688 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1689 ; X86-NEXT: vpmovwb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x30,0xc1]
1690 ; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
1691 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
1692 ; X86-NEXT: retl # encoding: [0xc3]
1694 ; X64-LABEL: test_int_x86_avx512_mask_pmov_wb_256:
1696 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1697 ; X64-NEXT: vpmovwb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x30,0xc1]
1698 ; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
1699 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
1700 ; X64-NEXT: retq # encoding: [0xc3]
1701 %1 = trunc <16 x i16> %x0 to <16 x i8>
1702 %2 = bitcast i16 %x2 to <16 x i1>
1703 %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %x1
1707 define <16 x i8>@test_int_x86_avx512_maskz_pmov_wb_256(<16 x i16> %x0, i16 %x2) {
1708 ; X86-LABEL: test_int_x86_avx512_maskz_pmov_wb_256:
1710 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1711 ; X86-NEXT: vpmovwb %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x30,0xc0]
1712 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
1713 ; X86-NEXT: retl # encoding: [0xc3]
1715 ; X64-LABEL: test_int_x86_avx512_maskz_pmov_wb_256:
1717 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1718 ; X64-NEXT: vpmovwb %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x30,0xc0]
1719 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
1720 ; X64-NEXT: retq # encoding: [0xc3]
1721 %1 = trunc <16 x i16> %x0 to <16 x i8>
1722 %2 = bitcast i16 %x2 to <16 x i1>
1723 %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> zeroinitializer
1727 declare void @llvm.x86.avx512.mask.pmov.wb.mem.256(i8* %ptr, <16 x i16>, i16)
1729 define void @test_int_x86_avx512_mask_pmov_wb_mem_256(i8* %ptr, <16 x i16> %x1, i16 %x2) {
1730 ; X86-LABEL: test_int_x86_avx512_mask_pmov_wb_mem_256:
1732 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
1733 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1734 ; X86-NEXT: vpmovwb %ymm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x28,0x30,0x00]
1735 ; X86-NEXT: vpmovwb %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x30,0x00]
1736 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
1737 ; X86-NEXT: retl # encoding: [0xc3]
1739 ; X64-LABEL: test_int_x86_avx512_mask_pmov_wb_mem_256:
1741 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1742 ; X64-NEXT: vpmovwb %ymm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x28,0x30,0x07]
1743 ; X64-NEXT: vpmovwb %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x30,0x07]
1744 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
1745 ; X64-NEXT: retq # encoding: [0xc3]
1746 call void @llvm.x86.avx512.mask.pmov.wb.mem.256(i8* %ptr, <16 x i16> %x1, i16 -1)
1747 call void @llvm.x86.avx512.mask.pmov.wb.mem.256(i8* %ptr, <16 x i16> %x1, i16 %x2)
1751 declare <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.256(<16 x i16>, <16 x i8>, i16)
1753 define <16 x i8>@test_int_x86_avx512_pmovs_wb_256(<16 x i16> %x0, <16 x i8> %x1) {
1754 ; CHECK-LABEL: test_int_x86_avx512_pmovs_wb_256:
1756 ; CHECK-NEXT: vpmovswb %ymm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x20,0xc0]
1757 ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
1758 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1759 %res = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.256(<16 x i16> %x0, <16 x i8> %x1, i16 -1)
1763 define <16 x i8>@test_int_x86_avx512_mask_pmovs_wb_256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2) {
1764 ; X86-LABEL: test_int_x86_avx512_mask_pmovs_wb_256:
1766 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1767 ; X86-NEXT: vpmovswb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x20,0xc1]
1768 ; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
1769 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
1770 ; X86-NEXT: retl # encoding: [0xc3]
1772 ; X64-LABEL: test_int_x86_avx512_mask_pmovs_wb_256:
1774 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1775 ; X64-NEXT: vpmovswb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x20,0xc1]
1776 ; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
1777 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
1778 ; X64-NEXT: retq # encoding: [0xc3]
1779 %res = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2)
1783 define <16 x i8>@test_int_x86_avx512_maskz_pmovs_wb_256(<16 x i16> %x0, i16 %x2) {
1784 ; X86-LABEL: test_int_x86_avx512_maskz_pmovs_wb_256:
1786 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1787 ; X86-NEXT: vpmovswb %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x20,0xc0]
1788 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
1789 ; X86-NEXT: retl # encoding: [0xc3]
1791 ; X64-LABEL: test_int_x86_avx512_maskz_pmovs_wb_256:
1793 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1794 ; X64-NEXT: vpmovswb %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x20,0xc0]
1795 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
1796 ; X64-NEXT: retq # encoding: [0xc3]
1797 %res = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.256(<16 x i16> %x0, <16 x i8> zeroinitializer, i16 %x2)
1801 declare void @llvm.x86.avx512.mask.pmovs.wb.mem.256(i8* %ptr, <16 x i16>, i16)
1803 define void @test_int_x86_avx512_mask_pmovs_wb_mem_256(i8* %ptr, <16 x i16> %x1, i16 %x2) {
1804 ; X86-LABEL: test_int_x86_avx512_mask_pmovs_wb_mem_256:
1806 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
1807 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1808 ; X86-NEXT: vpmovswb %ymm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x28,0x20,0x00]
1809 ; X86-NEXT: vpmovswb %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x20,0x00]
1810 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
1811 ; X86-NEXT: retl # encoding: [0xc3]
1813 ; X64-LABEL: test_int_x86_avx512_mask_pmovs_wb_mem_256:
1815 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1816 ; X64-NEXT: vpmovswb %ymm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x28,0x20,0x07]
1817 ; X64-NEXT: vpmovswb %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x20,0x07]
1818 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
1819 ; X64-NEXT: retq # encoding: [0xc3]
1820 call void @llvm.x86.avx512.mask.pmovs.wb.mem.256(i8* %ptr, <16 x i16> %x1, i16 -1)
1821 call void @llvm.x86.avx512.mask.pmovs.wb.mem.256(i8* %ptr, <16 x i16> %x1, i16 %x2)
1825 declare <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.256(<16 x i16>, <16 x i8>, i16)
1827 define <16 x i8>@test_int_x86_avx512_pmovus_wb_256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2) {
1828 ; CHECK-LABEL: test_int_x86_avx512_pmovus_wb_256:
1830 ; CHECK-NEXT: vpmovuswb %ymm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x10,0xc0]
1831 ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
1832 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1833 %res = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.256(<16 x i16> %x0, <16 x i8> %x1, i16 -1)
1837 define <16 x i8>@test_int_x86_avx512_mask_pmovus_wb_256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2) {
1838 ; X86-LABEL: test_int_x86_avx512_mask_pmovus_wb_256:
1840 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1841 ; X86-NEXT: vpmovuswb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x10,0xc1]
1842 ; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
1843 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
1844 ; X86-NEXT: retl # encoding: [0xc3]
1846 ; X64-LABEL: test_int_x86_avx512_mask_pmovus_wb_256:
1848 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1849 ; X64-NEXT: vpmovuswb %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x10,0xc1]
1850 ; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
1851 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
1852 ; X64-NEXT: retq # encoding: [0xc3]
1853 %res = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2)
1857 define <16 x i8>@test_int_x86_avx512_maskz_pmovus_wb_256(<16 x i16> %x0, i16 %x2) {
1858 ; X86-LABEL: test_int_x86_avx512_maskz_pmovus_wb_256:
1860 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1861 ; X86-NEXT: vpmovuswb %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x10,0xc0]
1862 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
1863 ; X86-NEXT: retl # encoding: [0xc3]
1865 ; X64-LABEL: test_int_x86_avx512_maskz_pmovus_wb_256:
1867 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1868 ; X64-NEXT: vpmovuswb %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x10,0xc0]
1869 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
1870 ; X64-NEXT: retq # encoding: [0xc3]
1871 %res = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.256(<16 x i16> %x0, <16 x i8> zeroinitializer, i16 %x2)
1875 declare void @llvm.x86.avx512.mask.pmovus.wb.mem.256(i8* %ptr, <16 x i16>, i16)
1877 define void @test_int_x86_avx512_mask_pmovus_wb_mem_256(i8* %ptr, <16 x i16> %x1, i16 %x2) {
1878 ; X86-LABEL: test_int_x86_avx512_mask_pmovus_wb_mem_256:
1880 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
1881 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1882 ; X86-NEXT: vpmovuswb %ymm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x28,0x10,0x00]
1883 ; X86-NEXT: vpmovuswb %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x10,0x00]
1884 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
1885 ; X86-NEXT: retl # encoding: [0xc3]
1887 ; X64-LABEL: test_int_x86_avx512_mask_pmovus_wb_mem_256:
1889 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1890 ; X64-NEXT: vpmovuswb %ymm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x28,0x10,0x07]
1891 ; X64-NEXT: vpmovuswb %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x10,0x07]
1892 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
1893 ; X64-NEXT: retq # encoding: [0xc3]
1894 call void @llvm.x86.avx512.mask.pmovus.wb.mem.256(i8* %ptr, <16 x i16> %x1, i16 -1)
1895 call void @llvm.x86.avx512.mask.pmovus.wb.mem.256(i8* %ptr, <16 x i16> %x1, i16 %x2)
1899 declare <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>)
1901 define <4 x i32> @test_int_x86_avx512_mask_pmaddw_d_128(<8 x i16> %x0, <8 x i16> %x1, <4 x i32> %x2, i8 %x3) {
1902 ; X86-LABEL: test_int_x86_avx512_mask_pmaddw_d_128:
1904 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1905 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
1906 ; X86-NEXT: vpmaddwd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xf5,0xd1]
1907 ; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
1908 ; X86-NEXT: retl # encoding: [0xc3]
1910 ; X64-LABEL: test_int_x86_avx512_mask_pmaddw_d_128:
1912 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1913 ; X64-NEXT: vpmaddwd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xf5,0xd1]
1914 ; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
1915 ; X64-NEXT: retq # encoding: [0xc3]
1916 %1 = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %x0, <8 x i16> %x1)
1917 %2 = bitcast i8 %x3 to <8 x i1>
1918 %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1919 %3 = select <4 x i1> %extract, <4 x i32> %1, <4 x i32> %x2
1923 declare <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16>, <16 x i16>)
1925 define <8 x i32> @test_int_x86_avx512_mask_pmaddw_d_256(<16 x i16> %x0, <16 x i16> %x1, <8 x i32> %x2, i8 %x3) {
1926 ; X86-LABEL: test_int_x86_avx512_mask_pmaddw_d_256:
1928 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1929 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
1930 ; X86-NEXT: vpmaddwd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xf5,0xd1]
1931 ; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1932 ; X86-NEXT: retl # encoding: [0xc3]
1934 ; X64-LABEL: test_int_x86_avx512_mask_pmaddw_d_256:
1936 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1937 ; X64-NEXT: vpmaddwd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xf5,0xd1]
1938 ; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1939 ; X64-NEXT: retq # encoding: [0xc3]
1940 %1 = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> %x0, <16 x i16> %x1)
1941 %2 = bitcast i8 %x3 to <8 x i1>
1942 %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %x2
1946 declare <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8>, <16 x i8>)
1948 define <8 x i16> @test_int_x86_avx512_mask_pmaddubs_w_128(<16 x i8> %x0, <16 x i8> %x1, <8 x i16> %x2, i8 %x3) {
1949 ; X86-LABEL: test_int_x86_avx512_mask_pmaddubs_w_128:
1951 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1952 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
1953 ; X86-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x04,0xd1]
1954 ; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
1955 ; X86-NEXT: retl # encoding: [0xc3]
1957 ; X64-LABEL: test_int_x86_avx512_mask_pmaddubs_w_128:
1959 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1960 ; X64-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x04,0xd1]
1961 ; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
1962 ; X64-NEXT: retq # encoding: [0xc3]
1963 %1 = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> %x0, <16 x i8> %x1)
1964 %2 = bitcast i8 %x3 to <8 x i1>
1965 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %x2
1969 declare <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8>, <32 x i8>)
1971 define <16 x i16> @test_int_x86_avx512_mask_pmaddubs_w_256(<32 x i8> %x0, <32 x i8> %x1, <16 x i16> %x2, i16 %x3) {
1972 ; X86-LABEL: test_int_x86_avx512_mask_pmaddubs_w_256:
1974 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1975 ; X86-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x04,0xd1]
1976 ; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1977 ; X86-NEXT: retl # encoding: [0xc3]
1979 ; X64-LABEL: test_int_x86_avx512_mask_pmaddubs_w_256:
1981 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1982 ; X64-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x04,0xd1]
1983 ; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
1984 ; X64-NEXT: retq # encoding: [0xc3]
1985 %1 = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> %x0, <32 x i8> %x1)
1986 %2 = bitcast i16 %x3 to <16 x i1>
1987 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %x2
1991 declare <8 x i16> @llvm.x86.avx512.dbpsadbw.128(<16 x i8>, <16 x i8>, i32)
1993 define <8 x i16>@test_int_x86_avx512_mask_dbpsadbw_128(<16 x i8> %x0, <16 x i8> %x1, <8 x i16> %x3, i8 %x4) {
1994 ; X86-LABEL: test_int_x86_avx512_mask_dbpsadbw_128:
1996 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1997 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
1998 ; X86-NEXT: vdbpsadbw $2, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x42,0xd1,0x02]
1999 ; X86-NEXT: vdbpsadbw $3, %xmm1, %xmm0, %xmm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x42,0xd9,0x03]
2000 ; X86-NEXT: vdbpsadbw $4, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0x7d,0x08,0x42,0xc1,0x04]
2001 ; X86-NEXT: vpaddw %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfd,0xc0]
2002 ; X86-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0]
2003 ; X86-NEXT: retl # encoding: [0xc3]
2005 ; X64-LABEL: test_int_x86_avx512_mask_dbpsadbw_128:
2007 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2008 ; X64-NEXT: vdbpsadbw $2, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x42,0xd1,0x02]
2009 ; X64-NEXT: vdbpsadbw $3, %xmm1, %xmm0, %xmm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x42,0xd9,0x03]
2010 ; X64-NEXT: vdbpsadbw $4, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0x7d,0x08,0x42,0xc1,0x04]
2011 ; X64-NEXT: vpaddw %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfd,0xc0]
2012 ; X64-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0]
2013 ; X64-NEXT: retq # encoding: [0xc3]
2014 %1 = call <8 x i16> @llvm.x86.avx512.dbpsadbw.128(<16 x i8> %x0, <16 x i8> %x1, i32 2)
2015 %2 = bitcast i8 %x4 to <8 x i1>
2016 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %x3
2017 %4 = call <8 x i16> @llvm.x86.avx512.dbpsadbw.128(<16 x i8> %x0, <16 x i8> %x1, i32 3)
2018 %5 = bitcast i8 %x4 to <8 x i1>
2019 %6 = select <8 x i1> %5, <8 x i16> %4, <8 x i16> zeroinitializer
2020 %7 = call <8 x i16> @llvm.x86.avx512.dbpsadbw.128(<16 x i8> %x0, <16 x i8> %x1, i32 4)
2021 %res3 = add <8 x i16> %3, %6
2022 %res4 = add <8 x i16> %7, %res3
2026 declare <16 x i16> @llvm.x86.avx512.dbpsadbw.256(<32 x i8>, <32 x i8>, i32)
2028 define <16 x i16>@test_int_x86_avx512_mask_dbpsadbw_256(<32 x i8> %x0, <32 x i8> %x1, <16 x i16> %x3, i16 %x4) {
2029 ; X86-LABEL: test_int_x86_avx512_mask_dbpsadbw_256:
2031 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2032 ; X86-NEXT: vdbpsadbw $2, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x42,0xd1,0x02]
2033 ; X86-NEXT: vdbpsadbw $3, %ymm1, %ymm0, %ymm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x42,0xd9,0x03]
2034 ; X86-NEXT: vdbpsadbw $4, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf3,0x7d,0x28,0x42,0xc1,0x04]
2035 ; X86-NEXT: vpaddw %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfd,0xc0]
2036 ; X86-NEXT: vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0]
2037 ; X86-NEXT: retl # encoding: [0xc3]
2039 ; X64-LABEL: test_int_x86_avx512_mask_dbpsadbw_256:
2041 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2042 ; X64-NEXT: vdbpsadbw $2, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x42,0xd1,0x02]
2043 ; X64-NEXT: vdbpsadbw $3, %ymm1, %ymm0, %ymm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x42,0xd9,0x03]
2044 ; X64-NEXT: vdbpsadbw $4, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf3,0x7d,0x28,0x42,0xc1,0x04]
2045 ; X64-NEXT: vpaddw %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfd,0xc0]
2046 ; X64-NEXT: vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0]
2047 ; X64-NEXT: retq # encoding: [0xc3]
2048 %1 = call <16 x i16> @llvm.x86.avx512.dbpsadbw.256(<32 x i8> %x0, <32 x i8> %x1, i32 2)
2049 %2 = bitcast i16 %x4 to <16 x i1>
2050 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %x3
2051 %4 = call <16 x i16> @llvm.x86.avx512.dbpsadbw.256(<32 x i8> %x0, <32 x i8> %x1, i32 3)
2052 %5 = bitcast i16 %x4 to <16 x i1>
2053 %6 = select <16 x i1> %5, <16 x i16> %4, <16 x i16> zeroinitializer
2054 %7 = call <16 x i16> @llvm.x86.avx512.dbpsadbw.256(<32 x i8> %x0, <32 x i8> %x1, i32 4)
2055 %res3 = add <16 x i16> %3, %6
2056 %res4 = add <16 x i16> %res3, %7
2057 ret <16 x i16> %res4
2060 declare <16 x i16> @llvm.x86.avx512.mask.psrlv16.hi(<16 x i16>, <16 x i16>, <16 x i16>, i16)
2062 define <16 x i16>@test_int_x86_avx512_psrlv16_hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2) {
2063 ; CHECK-LABEL: test_int_x86_avx512_psrlv16_hi:
2065 ; CHECK-NEXT: vpsrlvw %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x10,0xc1]
2066 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
2067 %res = call <16 x i16> @llvm.x86.avx512.mask.psrlv16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1)
2071 define <16 x i16>@test_int_x86_avx512_mask_psrlv16_hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
2072 ; X86-LABEL: test_int_x86_avx512_mask_psrlv16_hi:
2074 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2075 ; X86-NEXT: vpsrlvw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x10,0xd1]
2076 ; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
2077 ; X86-NEXT: retl # encoding: [0xc3]
2079 ; X64-LABEL: test_int_x86_avx512_mask_psrlv16_hi:
2081 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2082 ; X64-NEXT: vpsrlvw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x10,0xd1]
2083 ; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
2084 ; X64-NEXT: retq # encoding: [0xc3]
2085 %res = call <16 x i16> @llvm.x86.avx512.mask.psrlv16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
2089 define <16 x i16>@test_int_x86_avx512_maskz_psrlv16_hi(<16 x i16> %x0, <16 x i16> %x1, i16 %x3) {
2090 ; X86-LABEL: test_int_x86_avx512_maskz_psrlv16_hi:
2092 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2093 ; X86-NEXT: vpsrlvw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x10,0xc1]
2094 ; X86-NEXT: retl # encoding: [0xc3]
2096 ; X64-LABEL: test_int_x86_avx512_maskz_psrlv16_hi:
2098 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2099 ; X64-NEXT: vpsrlvw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x10,0xc1]
2100 ; X64-NEXT: retq # encoding: [0xc3]
2101 %res = call <16 x i16> @llvm.x86.avx512.mask.psrlv16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %x3)
2105 declare <8 x i16> @llvm.x86.avx512.mask.psrlv8.hi(<8 x i16>, <8 x i16>, <8 x i16>, i8)
2107 define <8 x i16>@test_int_x86_avx512_psrlv8_hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2) {
2108 ; CHECK-LABEL: test_int_x86_avx512_psrlv8_hi:
2110 ; CHECK-NEXT: vpsrlvw %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x10,0xc1]
2111 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
2112 %res = call <8 x i16> @llvm.x86.avx512.mask.psrlv8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
2116 define <8 x i16>@test_int_x86_avx512_mask_psrlv8_hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
2117 ; X86-LABEL: test_int_x86_avx512_mask_psrlv8_hi:
2119 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2120 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
2121 ; X86-NEXT: vpsrlvw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x10,0xd1]
2122 ; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2123 ; X86-NEXT: retl # encoding: [0xc3]
2125 ; X64-LABEL: test_int_x86_avx512_mask_psrlv8_hi:
2127 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2128 ; X64-NEXT: vpsrlvw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x10,0xd1]
2129 ; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2130 ; X64-NEXT: retq # encoding: [0xc3]
2131 %res = call <8 x i16> @llvm.x86.avx512.mask.psrlv8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
2135 define <8 x i16>@test_int_x86_avx512_maskz_psrlv8_hi(<8 x i16> %x0, <8 x i16> %x1, i8 %x3) {
2136 ; X86-LABEL: test_int_x86_avx512_maskz_psrlv8_hi:
2138 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2139 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
2140 ; X86-NEXT: vpsrlvw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x10,0xc1]
2141 ; X86-NEXT: retl # encoding: [0xc3]
2143 ; X64-LABEL: test_int_x86_avx512_maskz_psrlv8_hi:
2145 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2146 ; X64-NEXT: vpsrlvw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x10,0xc1]
2147 ; X64-NEXT: retq # encoding: [0xc3]
2148 %res = call <8 x i16> @llvm.x86.avx512.mask.psrlv8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> zeroinitializer, i8 %x3)
2153 define <8 x i16> @test_int_x86_avx512_psrlv_w_128_const() optsize {
2154 ; X86-LABEL: test_int_x86_avx512_psrlv_w_128_const:
2156 ; X86-NEXT: vmovdqa {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # EVEX TO VEX Compression xmm0 = [4,4,4,4,4,4,4,65535]
2157 ; X86-NEXT: # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A]
2158 ; X86-NEXT: # fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
2159 ; X86-NEXT: vpsrlvw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x10,0x05,A,A,A,A]
2160 ; X86-NEXT: # fixup A - offset: 6, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
2161 ; X86-NEXT: retl # encoding: [0xc3]
2163 ; X64-LABEL: test_int_x86_avx512_psrlv_w_128_const:
2165 ; X64-NEXT: vmovdqa {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # EVEX TO VEX Compression xmm0 = [4,4,4,4,4,4,4,65535]
2166 ; X64-NEXT: # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A]
2167 ; X64-NEXT: # fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
2168 ; X64-NEXT: vpsrlvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x10,0x05,A,A,A,A]
2169 ; X64-NEXT: # fixup A - offset: 6, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
2170 ; X64-NEXT: retq # encoding: [0xc3]
2171 %res = call <8 x i16> @llvm.x86.avx512.psrlv.w.128(<8 x i16> <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 -1>, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 -1>)
2175 declare <8 x i16> @llvm.x86.avx512.psrlv.w.128(<8 x i16>, <8 x i16>)
2177 define <16 x i16> @test_int_x86_avx512_psrlv_w_256_const() optsize {
2178 ; X86-LABEL: test_int_x86_avx512_psrlv_w_256_const:
2180 ; X86-NEXT: vmovdqa {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0 # EVEX TO VEX Compression ymm0 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,65535]
2181 ; X86-NEXT: # encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A]
2182 ; X86-NEXT: # fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
2183 ; X86-NEXT: vpsrlvw {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x10,0x05,A,A,A,A]
2184 ; X86-NEXT: # fixup A - offset: 6, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
2185 ; X86-NEXT: retl # encoding: [0xc3]
2187 ; X64-LABEL: test_int_x86_avx512_psrlv_w_256_const:
2189 ; X64-NEXT: vmovdqa {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0 # EVEX TO VEX Compression ymm0 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,65535]
2190 ; X64-NEXT: # encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A]
2191 ; X64-NEXT: # fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
2192 ; X64-NEXT: vpsrlvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x10,0x05,A,A,A,A]
2193 ; X64-NEXT: # fixup A - offset: 6, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
2194 ; X64-NEXT: retq # encoding: [0xc3]
2195 %res = call <16 x i16> @llvm.x86.avx512.psrlv.w.256(<16 x i16> <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 -1>, <16 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 -1>)
2199 declare <16 x i16> @llvm.x86.avx512.psrlv.w.256(<16 x i16>, <16 x i16>)
2201 declare <16 x i16> @llvm.x86.avx512.psrav.w.256(<16 x i16>, <16 x i16>)
2203 define <16 x i16>@test_int_x86_avx512_psrav16_hi(<16 x i16> %x0, <16 x i16> %x1) {
2204 ; CHECK-LABEL: test_int_x86_avx512_psrav16_hi:
2206 ; CHECK-NEXT: vpsravw %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x11,0xc1]
2207 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
2208 %1 = call <16 x i16> @llvm.x86.avx512.psrav.w.256(<16 x i16> %x0, <16 x i16> %x1)
2212 define <16 x i16>@test_int_x86_avx512_mask_psrav16_hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
2213 ; X86-LABEL: test_int_x86_avx512_mask_psrav16_hi:
2215 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2216 ; X86-NEXT: vpsravw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x11,0xd1]
2217 ; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
2218 ; X86-NEXT: retl # encoding: [0xc3]
2220 ; X64-LABEL: test_int_x86_avx512_mask_psrav16_hi:
2222 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2223 ; X64-NEXT: vpsravw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x11,0xd1]
2224 ; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
2225 ; X64-NEXT: retq # encoding: [0xc3]
2226 %1 = call <16 x i16> @llvm.x86.avx512.psrav.w.256(<16 x i16> %x0, <16 x i16> %x1)
2227 %2 = bitcast i16 %x3 to <16 x i1>
2228 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %x2
2232 define <16 x i16>@test_int_x86_avx512_maskz_psrav16_hi(<16 x i16> %x0, <16 x i16> %x1, i16 %x3) {
2233 ; X86-LABEL: test_int_x86_avx512_maskz_psrav16_hi:
2235 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2236 ; X86-NEXT: vpsravw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x11,0xc1]
2237 ; X86-NEXT: retl # encoding: [0xc3]
2239 ; X64-LABEL: test_int_x86_avx512_maskz_psrav16_hi:
2241 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2242 ; X64-NEXT: vpsravw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x11,0xc1]
2243 ; X64-NEXT: retq # encoding: [0xc3]
2244 %1 = call <16 x i16> @llvm.x86.avx512.psrav.w.256(<16 x i16> %x0, <16 x i16> %x1)
2245 %2 = bitcast i16 %x3 to <16 x i1>
2246 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer
2250 declare <8 x i16> @llvm.x86.avx512.psrav.w.128(<8 x i16>, <8 x i16>)
2252 define <8 x i16>@test_int_x86_avx512_psrav8_hi(<8 x i16> %x0, <8 x i16> %x1) {
2253 ; CHECK-LABEL: test_int_x86_avx512_psrav8_hi:
2255 ; CHECK-NEXT: vpsravw %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x11,0xc1]
2256 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
2257 %1 = call <8 x i16> @llvm.x86.avx512.psrav.w.128(<8 x i16> %x0, <8 x i16> %x1)
2261 define <8 x i16>@test_int_x86_avx512_mask_psrav8_hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
2262 ; X86-LABEL: test_int_x86_avx512_mask_psrav8_hi:
2264 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2265 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
2266 ; X86-NEXT: vpsravw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x11,0xd1]
2267 ; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2268 ; X86-NEXT: retl # encoding: [0xc3]
2270 ; X64-LABEL: test_int_x86_avx512_mask_psrav8_hi:
2272 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2273 ; X64-NEXT: vpsravw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x11,0xd1]
2274 ; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2275 ; X64-NEXT: retq # encoding: [0xc3]
2276 %1 = call <8 x i16> @llvm.x86.avx512.psrav.w.128(<8 x i16> %x0, <8 x i16> %x1)
2277 %2 = bitcast i8 %x3 to <8 x i1>
2278 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %x2
2282 define <8 x i16>@test_int_x86_avx512_maskz_psrav8_hi(<8 x i16> %x0, <8 x i16> %x1, i8 %x3) {
2283 ; X86-LABEL: test_int_x86_avx512_maskz_psrav8_hi:
2285 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2286 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
2287 ; X86-NEXT: vpsravw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x11,0xc1]
2288 ; X86-NEXT: retl # encoding: [0xc3]
2290 ; X64-LABEL: test_int_x86_avx512_maskz_psrav8_hi:
2292 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2293 ; X64-NEXT: vpsravw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x11,0xc1]
2294 ; X64-NEXT: retq # encoding: [0xc3]
2295 %1 = call <8 x i16> @llvm.x86.avx512.psrav.w.128(<8 x i16> %x0, <8 x i16> %x1)
2296 %2 = bitcast i8 %x3 to <8 x i1>
2297 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer
2301 define <16 x i16>@test_int_x86_avx512_psllv16_hi(<16 x i16> %x0, <16 x i16> %x1) {
2302 ; CHECK-LABEL: test_int_x86_avx512_psllv16_hi:
2304 ; CHECK-NEXT: vpsllvw %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x12,0xc1]
2305 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
2306 %1 = call <16 x i16> @llvm.x86.avx512.psllv.w.256(<16 x i16> %x0, <16 x i16> %x1)
2310 define <16 x i16>@test_int_x86_avx512_mask_psllv16_hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
2311 ; X86-LABEL: test_int_x86_avx512_mask_psllv16_hi:
2313 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2314 ; X86-NEXT: vpsllvw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x12,0xd1]
2315 ; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
2316 ; X86-NEXT: retl # encoding: [0xc3]
2318 ; X64-LABEL: test_int_x86_avx512_mask_psllv16_hi:
2320 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2321 ; X64-NEXT: vpsllvw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x12,0xd1]
2322 ; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
2323 ; X64-NEXT: retq # encoding: [0xc3]
2324 %1 = call <16 x i16> @llvm.x86.avx512.psllv.w.256(<16 x i16> %x0, <16 x i16> %x1)
2325 %2 = bitcast i16 %x3 to <16 x i1>
2326 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %x2
2330 define <16 x i16>@test_int_x86_avx512_maskz_psllv16_hi(<16 x i16> %x0, <16 x i16> %x1, i16 %x3) {
2331 ; X86-LABEL: test_int_x86_avx512_maskz_psllv16_hi:
2333 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2334 ; X86-NEXT: vpsllvw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x12,0xc1]
2335 ; X86-NEXT: retl # encoding: [0xc3]
2337 ; X64-LABEL: test_int_x86_avx512_maskz_psllv16_hi:
2339 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2340 ; X64-NEXT: vpsllvw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x12,0xc1]
2341 ; X64-NEXT: retq # encoding: [0xc3]
2342 %1 = call <16 x i16> @llvm.x86.avx512.psllv.w.256(<16 x i16> %x0, <16 x i16> %x1)
2343 %2 = bitcast i16 %x3 to <16 x i1>
2344 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer
2348 define <8 x i16>@test_int_x86_avx512_psllv8_hi(<8 x i16> %x0, <8 x i16> %x1) {
2349 ; CHECK-LABEL: test_int_x86_avx512_psllv8_hi:
2351 ; CHECK-NEXT: vpsllvw %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x12,0xc1]
2352 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
2353 %1 = call <8 x i16> @llvm.x86.avx512.psllv.w.128(<8 x i16> %x0, <8 x i16> %x1)
2357 define <8 x i16>@test_int_x86_avx512_mask_psllv8_hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
2358 ; X86-LABEL: test_int_x86_avx512_mask_psllv8_hi:
2360 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2361 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
2362 ; X86-NEXT: vpsllvw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x12,0xd1]
2363 ; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2364 ; X86-NEXT: retl # encoding: [0xc3]
2366 ; X64-LABEL: test_int_x86_avx512_mask_psllv8_hi:
2368 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2369 ; X64-NEXT: vpsllvw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x12,0xd1]
2370 ; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2371 ; X64-NEXT: retq # encoding: [0xc3]
2372 %1 = call <8 x i16> @llvm.x86.avx512.psllv.w.128(<8 x i16> %x0, <8 x i16> %x1)
2373 %2 = bitcast i8 %x3 to <8 x i1>
2374 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %x2
2378 define <8 x i16>@test_int_x86_avx512_maskz_psllv8_hi(<8 x i16> %x0, <8 x i16> %x1, i8 %x3) {
2379 ; X86-LABEL: test_int_x86_avx512_maskz_psllv8_hi:
2381 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2382 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
2383 ; X86-NEXT: vpsllvw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x12,0xc1]
2384 ; X86-NEXT: retl # encoding: [0xc3]
2386 ; X64-LABEL: test_int_x86_avx512_maskz_psllv8_hi:
2388 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2389 ; X64-NEXT: vpsllvw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x12,0xc1]
2390 ; X64-NEXT: retq # encoding: [0xc3]
2391 %1 = call <8 x i16> @llvm.x86.avx512.psllv.w.128(<8 x i16> %x0, <8 x i16> %x1)
2392 %2 = bitcast i8 %x3 to <8 x i1>
2393 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer
2397 define <8 x i16> @test_int_x86_avx512_psllv_w_128_const() optsize {
2398 ; X86-LABEL: test_int_x86_avx512_psllv_w_128_const:
2400 ; X86-NEXT: vmovdqa {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # EVEX TO VEX Compression xmm0 = [4,4,4,4,4,4,4,65535]
2401 ; X86-NEXT: # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A]
2402 ; X86-NEXT: # fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
2403 ; X86-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x12,0x05,A,A,A,A]
2404 ; X86-NEXT: # fixup A - offset: 6, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
2405 ; X86-NEXT: retl # encoding: [0xc3]
2407 ; X64-LABEL: test_int_x86_avx512_psllv_w_128_const:
2409 ; X64-NEXT: vmovdqa {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # EVEX TO VEX Compression xmm0 = [4,4,4,4,4,4,4,65535]
2410 ; X64-NEXT: # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A]
2411 ; X64-NEXT: # fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
2412 ; X64-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x12,0x05,A,A,A,A]
2413 ; X64-NEXT: # fixup A - offset: 6, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
2414 ; X64-NEXT: retq # encoding: [0xc3]
2415 %res = call <8 x i16> @llvm.x86.avx512.psllv.w.128(<8 x i16> <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 -1>, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 -1>)
2419 declare <8 x i16> @llvm.x86.avx512.psllv.w.128(<8 x i16>, <8 x i16>)
2422 define <16 x i16> @test_int_x86_avx512_psllv_w_256_const() optsize {
2423 ; X86-LABEL: test_int_x86_avx512_psllv_w_256_const:
2425 ; X86-NEXT: vmovdqa {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0 # EVEX TO VEX Compression ymm0 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,65535]
2426 ; X86-NEXT: # encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A]
2427 ; X86-NEXT: # fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
2428 ; X86-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x12,0x05,A,A,A,A]
2429 ; X86-NEXT: # fixup A - offset: 6, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
2430 ; X86-NEXT: retl # encoding: [0xc3]
2432 ; X64-LABEL: test_int_x86_avx512_psllv_w_256_const:
2434 ; X64-NEXT: vmovdqa {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0 # EVEX TO VEX Compression ymm0 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,65535]
2435 ; X64-NEXT: # encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A]
2436 ; X64-NEXT: # fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
2437 ; X64-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x12,0x05,A,A,A,A]
2438 ; X64-NEXT: # fixup A - offset: 6, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
2439 ; X64-NEXT: retq # encoding: [0xc3]
2440 %res = call <16 x i16> @llvm.x86.avx512.psllv.w.256(<16 x i16> <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 -1>, <16 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 -1>)
2444 declare <16 x i16> @llvm.x86.avx512.psllv.w.256(<16 x i16>, <16 x i16>)
2448 declare <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16>, <8 x i16>)
2450 define <8 x i16>@test_int_x86_avx512_permvar_hi_128(<8 x i16> %x0, <8 x i16> %x1) {
2451 ; CHECK-LABEL: test_int_x86_avx512_permvar_hi_128:
2453 ; CHECK-NEXT: vpermw %xmm0, %xmm1, %xmm0 # encoding: [0x62,0xf2,0xf5,0x08,0x8d,0xc0]
2454 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
2455 %1 = call <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16> %x0, <8 x i16> %x1)
2459 define <8 x i16>@test_int_x86_avx512_mask_permvar_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
2460 ; X86-LABEL: test_int_x86_avx512_mask_permvar_hi_128:
2462 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2463 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
2464 ; X86-NEXT: vpermw %xmm0, %xmm1, %xmm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x8d,0xd0]
2465 ; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2466 ; X86-NEXT: retl # encoding: [0xc3]
2468 ; X64-LABEL: test_int_x86_avx512_mask_permvar_hi_128:
2470 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2471 ; X64-NEXT: vpermw %xmm0, %xmm1, %xmm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x8d,0xd0]
2472 ; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2473 ; X64-NEXT: retq # encoding: [0xc3]
2474 %1 = call <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16> %x0, <8 x i16> %x1)
2475 %2 = bitcast i8 %x3 to <8 x i1>
2476 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %x2
2480 define <8 x i16>@test_int_x86_avx512_maskz_permvar_hi_128(<8 x i16> %x0, <8 x i16> %x1, i8 %x3) {
2481 ; X86-LABEL: test_int_x86_avx512_maskz_permvar_hi_128:
2483 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2484 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
2485 ; X86-NEXT: vpermw %xmm0, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x8d,0xc0]
2486 ; X86-NEXT: retl # encoding: [0xc3]
2488 ; X64-LABEL: test_int_x86_avx512_maskz_permvar_hi_128:
2490 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2491 ; X64-NEXT: vpermw %xmm0, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x8d,0xc0]
2492 ; X64-NEXT: retq # encoding: [0xc3]
2493 %1 = call <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16> %x0, <8 x i16> %x1)
2494 %2 = bitcast i8 %x3 to <8 x i1>
2495 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer
2499 declare <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16>, <16 x i16>)
2501 define <16 x i16>@test_int_x86_avx512_permvar_hi_256(<16 x i16> %x0, <16 x i16> %x1) {
2502 ; CHECK-LABEL: test_int_x86_avx512_permvar_hi_256:
2504 ; CHECK-NEXT: vpermw %ymm0, %ymm1, %ymm0 # encoding: [0x62,0xf2,0xf5,0x28,0x8d,0xc0]
2505 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
2506 %1 = call <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16> %x0, <16 x i16> %x1)
2510 define <16 x i16>@test_int_x86_avx512_mask_permvar_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
2511 ; X86-LABEL: test_int_x86_avx512_mask_permvar_hi_256:
2513 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2514 ; X86-NEXT: vpermw %ymm0, %ymm1, %ymm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x8d,0xd0]
2515 ; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
2516 ; X86-NEXT: retl # encoding: [0xc3]
2518 ; X64-LABEL: test_int_x86_avx512_mask_permvar_hi_256:
2520 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2521 ; X64-NEXT: vpermw %ymm0, %ymm1, %ymm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x8d,0xd0]
2522 ; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
2523 ; X64-NEXT: retq # encoding: [0xc3]
2524 %1 = call <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16> %x0, <16 x i16> %x1)
2525 %2 = bitcast i16 %x3 to <16 x i1>
2526 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %x2
2530 define <16 x i16>@test_int_x86_avx512_maskz_permvar_hi_256(<16 x i16> %x0, <16 x i16> %x1, i16 %x3) {
2531 ; X86-LABEL: test_int_x86_avx512_maskz_permvar_hi_256:
2533 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2534 ; X86-NEXT: vpermw %ymm0, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x8d,0xc0]
2535 ; X86-NEXT: retl # encoding: [0xc3]
2537 ; X64-LABEL: test_int_x86_avx512_maskz_permvar_hi_256:
2539 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2540 ; X64-NEXT: vpermw %ymm0, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x8d,0xc0]
2541 ; X64-NEXT: retq # encoding: [0xc3]
2542 %1 = call <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16> %x0, <16 x i16> %x1)
2543 %2 = bitcast i16 %x3 to <16 x i1>
2544 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer