1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=+avx512bw --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86
3 ; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx512bw --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64
5 define i32 @test_int_x86_avx512_kadd_d(<32 x i16> %A, <32 x i16> %B) nounwind {
6 ; CHECK-LABEL: test_int_x86_avx512_kadd_d:
7 ; CHECK: # %bb.0: # %entry
8 ; CHECK-NEXT: vptestmw %zmm0, %zmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x48,0x26,0xc0]
9 ; CHECK-NEXT: vptestmw %zmm1, %zmm1, %k1 # encoding: [0x62,0xf2,0xf5,0x48,0x26,0xc9]
10 ; CHECK-NEXT: kaddd %k1, %k0, %k0 # encoding: [0xc4,0xe1,0xfd,0x4a,0xc1]
11 ; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
12 ; CHECK-NEXT: kortestd %k0, %k0 # encoding: [0xc4,0xe1,0xf9,0x98,0xc0]
13 ; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
14 ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
15 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
17 %0 = icmp ne <32 x i16> %A, zeroinitializer
18 %1 = icmp ne <32 x i16> %B, zeroinitializer
19 %2 = call <32 x i1> @llvm.x86.avx512.kadd.d(<32 x i1> %0, <32 x i1> %1)
20 %3 = bitcast <32 x i1> %2 to i32
21 %4 = icmp eq i32 %3, 0
22 %5 = zext i1 %4 to i32
25 declare <32 x i1> @llvm.x86.avx512.kadd.d(<32 x i1>, <32 x i1>)
27 define i32 @test_int_x86_avx512_kadd_q(<64 x i8> %A, <64 x i8> %B) nounwind {
28 ; X86-LABEL: test_int_x86_avx512_kadd_q:
29 ; X86: # %bb.0: # %entry
30 ; X86-NEXT: vptestmb %zmm0, %zmm0, %k0 # encoding: [0x62,0xf2,0x7d,0x48,0x26,0xc0]
31 ; X86-NEXT: vptestmb %zmm1, %zmm1, %k1 # encoding: [0x62,0xf2,0x75,0x48,0x26,0xc9]
32 ; X86-NEXT: kaddq %k1, %k0, %k0 # encoding: [0xc4,0xe1,0xfc,0x4a,0xc1]
33 ; X86-NEXT: kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20]
34 ; X86-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
35 ; X86-NEXT: kortestd %k1, %k0 # encoding: [0xc4,0xe1,0xf9,0x98,0xc1]
36 ; X86-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
37 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
38 ; X86-NEXT: retl # encoding: [0xc3]
40 ; X64-LABEL: test_int_x86_avx512_kadd_q:
41 ; X64: # %bb.0: # %entry
42 ; X64-NEXT: vptestmb %zmm0, %zmm0, %k0 # encoding: [0x62,0xf2,0x7d,0x48,0x26,0xc0]
43 ; X64-NEXT: vptestmb %zmm1, %zmm1, %k1 # encoding: [0x62,0xf2,0x75,0x48,0x26,0xc9]
44 ; X64-NEXT: kaddq %k1, %k0, %k0 # encoding: [0xc4,0xe1,0xfc,0x4a,0xc1]
45 ; X64-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
46 ; X64-NEXT: kortestq %k0, %k0 # encoding: [0xc4,0xe1,0xf8,0x98,0xc0]
47 ; X64-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
48 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
49 ; X64-NEXT: retq # encoding: [0xc3]
51 %0 = icmp ne <64 x i8> %A, zeroinitializer
52 %1 = icmp ne <64 x i8> %B, zeroinitializer
53 %2 = call <64 x i1> @llvm.x86.avx512.kadd.q(<64 x i1> %0, <64 x i1> %1)
54 %3 = bitcast <64 x i1> %2 to i64
55 %4 = icmp eq i64 %3, 0
56 %5 = zext i1 %4 to i32
59 declare <64 x i1> @llvm.x86.avx512.kadd.q(<64 x i1>, <64 x i1>)
61 define i32 @test_x86_avx512_ktestc_d(<32 x i16> %A, <32 x i16> %B) {
62 ; CHECK-LABEL: test_x86_avx512_ktestc_d:
64 ; CHECK-NEXT: vptestmw %zmm0, %zmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x48,0x26,0xc0]
65 ; CHECK-NEXT: vptestmw %zmm1, %zmm1, %k1 # encoding: [0x62,0xf2,0xf5,0x48,0x26,0xc9]
66 ; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
67 ; CHECK-NEXT: ktestd %k1, %k0 # encoding: [0xc4,0xe1,0xf9,0x99,0xc1]
68 ; CHECK-NEXT: setb %al # encoding: [0x0f,0x92,0xc0]
69 ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
70 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
71 %1 = icmp ne <32 x i16> %A, zeroinitializer
72 %2 = icmp ne <32 x i16> %B, zeroinitializer
73 %res = call i32 @llvm.x86.avx512.ktestc.d(<32 x i1> %1, <32 x i1> %2) ; <i32> [#uses=1]
76 declare i32 @llvm.x86.avx512.ktestc.d(<32 x i1>, <32 x i1>) nounwind readnone
78 define i32 @test_x86_avx512_ktestz_d(<32 x i16> %A, <32 x i16> %B) {
79 ; CHECK-LABEL: test_x86_avx512_ktestz_d:
81 ; CHECK-NEXT: vptestmw %zmm0, %zmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x48,0x26,0xc0]
82 ; CHECK-NEXT: vptestmw %zmm1, %zmm1, %k1 # encoding: [0x62,0xf2,0xf5,0x48,0x26,0xc9]
83 ; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
84 ; CHECK-NEXT: ktestd %k1, %k0 # encoding: [0xc4,0xe1,0xf9,0x99,0xc1]
85 ; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
86 ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
87 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
88 %1 = icmp ne <32 x i16> %A, zeroinitializer
89 %2 = icmp ne <32 x i16> %B, zeroinitializer
90 %res = call i32 @llvm.x86.avx512.ktestz.d(<32 x i1> %1, <32 x i1> %2) ; <i32> [#uses=1]
93 declare i32 @llvm.x86.avx512.ktestz.d(<32 x i1>, <32 x i1>) nounwind readnone
95 define i32 @test_x86_avx512_ktestc_q(<64 x i8> %A, <64 x i8> %B) {
96 ; CHECK-LABEL: test_x86_avx512_ktestc_q:
98 ; CHECK-NEXT: vptestmb %zmm0, %zmm0, %k0 # encoding: [0x62,0xf2,0x7d,0x48,0x26,0xc0]
99 ; CHECK-NEXT: vptestmb %zmm1, %zmm1, %k1 # encoding: [0x62,0xf2,0x75,0x48,0x26,0xc9]
100 ; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
101 ; CHECK-NEXT: ktestq %k1, %k0 # encoding: [0xc4,0xe1,0xf8,0x99,0xc1]
102 ; CHECK-NEXT: setb %al # encoding: [0x0f,0x92,0xc0]
103 ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
104 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
105 %1 = icmp ne <64 x i8> %A, zeroinitializer
106 %2 = icmp ne <64 x i8> %B, zeroinitializer
107 %res = call i32 @llvm.x86.avx512.ktestc.q(<64 x i1> %1, <64 x i1> %2) ; <i32> [#uses=1]
110 declare i32 @llvm.x86.avx512.ktestc.q(<64 x i1>, <64 x i1>) nounwind readnone
112 define i32 @test_x86_avx512_ktestz_q(<64 x i8> %A, <64 x i8> %B) {
113 ; CHECK-LABEL: test_x86_avx512_ktestz_q:
115 ; CHECK-NEXT: vptestmb %zmm0, %zmm0, %k0 # encoding: [0x62,0xf2,0x7d,0x48,0x26,0xc0]
116 ; CHECK-NEXT: vptestmb %zmm1, %zmm1, %k1 # encoding: [0x62,0xf2,0x75,0x48,0x26,0xc9]
117 ; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
118 ; CHECK-NEXT: ktestq %k1, %k0 # encoding: [0xc4,0xe1,0xf8,0x99,0xc1]
119 ; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
120 ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
121 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
122 %1 = icmp ne <64 x i8> %A, zeroinitializer
123 %2 = icmp ne <64 x i8> %B, zeroinitializer
124 %res = call i32 @llvm.x86.avx512.ktestz.q(<64 x i1> %1, <64 x i1> %2) ; <i32> [#uses=1]
127 declare i32 @llvm.x86.avx512.ktestz.q(<64 x i1>, <64 x i1>) nounwind readnone
129 define <32 x i16> @test_mask_packs_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) {
130 ; CHECK-LABEL: test_mask_packs_epi32_rr_512:
132 ; CHECK-NEXT: vpackssdw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x6b,0xc1]
133 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
134 %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b)
138 define <32 x i16> @test_mask_packs_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) {
139 ; X86-LABEL: test_mask_packs_epi32_rrk_512:
141 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
142 ; X86-NEXT: vpackssdw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x6b,0xd1]
143 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
144 ; X86-NEXT: retl # encoding: [0xc3]
146 ; X64-LABEL: test_mask_packs_epi32_rrk_512:
148 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
149 ; X64-NEXT: vpackssdw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x6b,0xd1]
150 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
151 ; X64-NEXT: retq # encoding: [0xc3]
152 %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b)
153 %2 = bitcast i32 %mask to <32 x i1>
154 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru
158 define <32 x i16> @test_mask_packs_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, i32 %mask) {
159 ; X86-LABEL: test_mask_packs_epi32_rrkz_512:
161 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
162 ; X86-NEXT: vpackssdw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x6b,0xc1]
163 ; X86-NEXT: retl # encoding: [0xc3]
165 ; X64-LABEL: test_mask_packs_epi32_rrkz_512:
167 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
168 ; X64-NEXT: vpackssdw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x6b,0xc1]
169 ; X64-NEXT: retq # encoding: [0xc3]
170 %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b)
171 %2 = bitcast i32 %mask to <32 x i1>
172 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer
176 define <32 x i16> @test_mask_packs_epi32_rm_512(<16 x i32> %a, <16 x i32>* %ptr_b) {
177 ; X86-LABEL: test_mask_packs_epi32_rm_512:
179 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
180 ; X86-NEXT: vpackssdw (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x6b,0x00]
181 ; X86-NEXT: retl # encoding: [0xc3]
183 ; X64-LABEL: test_mask_packs_epi32_rm_512:
185 ; X64-NEXT: vpackssdw (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x6b,0x07]
186 ; X64-NEXT: retq # encoding: [0xc3]
187 %b = load <16 x i32>, <16 x i32>* %ptr_b
188 %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b)
192 define <32 x i16> @test_mask_packs_epi32_rmk_512(<16 x i32> %a, <16 x i32>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
193 ; X86-LABEL: test_mask_packs_epi32_rmk_512:
195 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
196 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
197 ; X86-NEXT: vpackssdw (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x6b,0x08]
198 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
199 ; X86-NEXT: retl # encoding: [0xc3]
201 ; X64-LABEL: test_mask_packs_epi32_rmk_512:
203 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
204 ; X64-NEXT: vpackssdw (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x6b,0x0f]
205 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
206 ; X64-NEXT: retq # encoding: [0xc3]
207 %b = load <16 x i32>, <16 x i32>* %ptr_b
208 %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b)
209 %2 = bitcast i32 %mask to <32 x i1>
210 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru
214 define <32 x i16> @test_mask_packs_epi32_rmkz_512(<16 x i32> %a, <16 x i32>* %ptr_b, i32 %mask) {
215 ; X86-LABEL: test_mask_packs_epi32_rmkz_512:
217 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
218 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
219 ; X86-NEXT: vpackssdw (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x6b,0x00]
220 ; X86-NEXT: retl # encoding: [0xc3]
222 ; X64-LABEL: test_mask_packs_epi32_rmkz_512:
224 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
225 ; X64-NEXT: vpackssdw (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x6b,0x07]
226 ; X64-NEXT: retq # encoding: [0xc3]
227 %b = load <16 x i32>, <16 x i32>* %ptr_b
228 %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b)
229 %2 = bitcast i32 %mask to <32 x i1>
230 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer
234 define <32 x i16> @test_mask_packs_epi32_rmb_512(<16 x i32> %a, i32* %ptr_b) {
235 ; X86-LABEL: test_mask_packs_epi32_rmb_512:
237 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
238 ; X86-NEXT: vpackssdw (%eax){1to16}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x58,0x6b,0x00]
239 ; X86-NEXT: retl # encoding: [0xc3]
241 ; X64-LABEL: test_mask_packs_epi32_rmb_512:
243 ; X64-NEXT: vpackssdw (%rdi){1to16}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x58,0x6b,0x07]
244 ; X64-NEXT: retq # encoding: [0xc3]
245 %q = load i32, i32* %ptr_b
246 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
247 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
248 %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b)
252 define <32 x i16> @test_mask_packs_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, <32 x i16> %passThru, i32 %mask) {
253 ; X86-LABEL: test_mask_packs_epi32_rmbk_512:
255 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
256 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
257 ; X86-NEXT: vpackssdw (%eax){1to16}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x59,0x6b,0x08]
258 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
259 ; X86-NEXT: retl # encoding: [0xc3]
261 ; X64-LABEL: test_mask_packs_epi32_rmbk_512:
263 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
264 ; X64-NEXT: vpackssdw (%rdi){1to16}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x59,0x6b,0x0f]
265 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
266 ; X64-NEXT: retq # encoding: [0xc3]
267 %q = load i32, i32* %ptr_b
268 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
269 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
270 %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b)
271 %2 = bitcast i32 %mask to <32 x i1>
272 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru
276 define <32 x i16> @test_mask_packs_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b, i32 %mask) {
277 ; X86-LABEL: test_mask_packs_epi32_rmbkz_512:
279 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
280 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
281 ; X86-NEXT: vpackssdw (%eax){1to16}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xd9,0x6b,0x00]
282 ; X86-NEXT: retl # encoding: [0xc3]
284 ; X64-LABEL: test_mask_packs_epi32_rmbkz_512:
286 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
287 ; X64-NEXT: vpackssdw (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xd9,0x6b,0x07]
288 ; X64-NEXT: retq # encoding: [0xc3]
289 %q = load i32, i32* %ptr_b
290 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
291 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
292 %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b)
293 %2 = bitcast i32 %mask to <32 x i1>
294 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer
298 declare <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32>, <16 x i32>)
300 define <64 x i8> @test_mask_packs_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
301 ; CHECK-LABEL: test_mask_packs_epi16_rr_512:
303 ; CHECK-NEXT: vpacksswb %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x63,0xc1]
304 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
305 %1 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> %a, <32 x i16> %b)
309 define <64 x i8> @test_mask_packs_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) {
310 ; X86-LABEL: test_mask_packs_epi16_rrk_512:
312 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
313 ; X86-NEXT: vpacksswb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x63,0xd1]
314 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
315 ; X86-NEXT: retl # encoding: [0xc3]
317 ; X64-LABEL: test_mask_packs_epi16_rrk_512:
319 ; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
320 ; X64-NEXT: vpacksswb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x63,0xd1]
321 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
322 ; X64-NEXT: retq # encoding: [0xc3]
323 %1 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> %a, <32 x i16> %b)
324 %2 = bitcast i64 %mask to <64 x i1>
325 %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> %passThru
329 define <64 x i8> @test_mask_packs_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i64 %mask) {
330 ; X86-LABEL: test_mask_packs_epi16_rrkz_512:
332 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
333 ; X86-NEXT: vpacksswb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x63,0xc1]
334 ; X86-NEXT: retl # encoding: [0xc3]
336 ; X64-LABEL: test_mask_packs_epi16_rrkz_512:
338 ; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
339 ; X64-NEXT: vpacksswb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x63,0xc1]
340 ; X64-NEXT: retq # encoding: [0xc3]
341 %1 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> %a, <32 x i16> %b)
342 %2 = bitcast i64 %mask to <64 x i1>
343 %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> zeroinitializer
347 define <64 x i8> @test_mask_packs_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
348 ; X86-LABEL: test_mask_packs_epi16_rm_512:
350 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
351 ; X86-NEXT: vpacksswb (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x63,0x00]
352 ; X86-NEXT: retl # encoding: [0xc3]
354 ; X64-LABEL: test_mask_packs_epi16_rm_512:
356 ; X64-NEXT: vpacksswb (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x63,0x07]
357 ; X64-NEXT: retq # encoding: [0xc3]
358 %b = load <32 x i16>, <32 x i16>* %ptr_b
359 %1 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> %a, <32 x i16> %b)
363 define <64 x i8> @test_mask_packs_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <64 x i8> %passThru, i64 %mask) {
364 ; X86-LABEL: test_mask_packs_epi16_rmk_512:
366 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
367 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08]
368 ; X86-NEXT: vpacksswb (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x63,0x08]
369 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
370 ; X86-NEXT: retl # encoding: [0xc3]
372 ; X64-LABEL: test_mask_packs_epi16_rmk_512:
374 ; X64-NEXT: kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce]
375 ; X64-NEXT: vpacksswb (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x63,0x0f]
376 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
377 ; X64-NEXT: retq # encoding: [0xc3]
378 %b = load <32 x i16>, <32 x i16>* %ptr_b
379 %1 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> %a, <32 x i16> %b)
380 %2 = bitcast i64 %mask to <64 x i1>
381 %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> %passThru
385 define <64 x i8> @test_mask_packs_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i64 %mask) {
386 ; X86-LABEL: test_mask_packs_epi16_rmkz_512:
388 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
389 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08]
390 ; X86-NEXT: vpacksswb (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x63,0x00]
391 ; X86-NEXT: retl # encoding: [0xc3]
393 ; X64-LABEL: test_mask_packs_epi16_rmkz_512:
395 ; X64-NEXT: kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce]
396 ; X64-NEXT: vpacksswb (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x63,0x07]
397 ; X64-NEXT: retq # encoding: [0xc3]
398 %b = load <32 x i16>, <32 x i16>* %ptr_b
399 %1 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> %a, <32 x i16> %b)
400 %2 = bitcast i64 %mask to <64 x i1>
401 %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> zeroinitializer
405 declare <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16>, <32 x i16>)
408 define <32 x i16> @test_mask_packus_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) {
409 ; CHECK-LABEL: test_mask_packus_epi32_rr_512:
411 ; CHECK-NEXT: vpackusdw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x2b,0xc1]
412 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
413 %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b)
417 define <32 x i16> @test_mask_packus_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) {
418 ; X86-LABEL: test_mask_packus_epi32_rrk_512:
420 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
421 ; X86-NEXT: vpackusdw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x2b,0xd1]
422 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
423 ; X86-NEXT: retl # encoding: [0xc3]
425 ; X64-LABEL: test_mask_packus_epi32_rrk_512:
427 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
428 ; X64-NEXT: vpackusdw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x2b,0xd1]
429 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
430 ; X64-NEXT: retq # encoding: [0xc3]
431 %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b)
432 %2 = bitcast i32 %mask to <32 x i1>
433 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru
437 define <32 x i16> @test_mask_packus_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, i32 %mask) {
438 ; X86-LABEL: test_mask_packus_epi32_rrkz_512:
440 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
441 ; X86-NEXT: vpackusdw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x2b,0xc1]
442 ; X86-NEXT: retl # encoding: [0xc3]
444 ; X64-LABEL: test_mask_packus_epi32_rrkz_512:
446 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
447 ; X64-NEXT: vpackusdw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x2b,0xc1]
448 ; X64-NEXT: retq # encoding: [0xc3]
449 %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b)
450 %2 = bitcast i32 %mask to <32 x i1>
451 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer
455 define <32 x i16> @test_mask_packus_epi32_rm_512(<16 x i32> %a, <16 x i32>* %ptr_b) {
456 ; X86-LABEL: test_mask_packus_epi32_rm_512:
458 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
459 ; X86-NEXT: vpackusdw (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x2b,0x00]
460 ; X86-NEXT: retl # encoding: [0xc3]
462 ; X64-LABEL: test_mask_packus_epi32_rm_512:
464 ; X64-NEXT: vpackusdw (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x2b,0x07]
465 ; X64-NEXT: retq # encoding: [0xc3]
466 %b = load <16 x i32>, <16 x i32>* %ptr_b
467 %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b)
471 define <32 x i16> @test_mask_packus_epi32_rmk_512(<16 x i32> %a, <16 x i32>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
472 ; X86-LABEL: test_mask_packus_epi32_rmk_512:
474 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
475 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
476 ; X86-NEXT: vpackusdw (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x2b,0x08]
477 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
478 ; X86-NEXT: retl # encoding: [0xc3]
480 ; X64-LABEL: test_mask_packus_epi32_rmk_512:
482 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
483 ; X64-NEXT: vpackusdw (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x2b,0x0f]
484 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
485 ; X64-NEXT: retq # encoding: [0xc3]
486 %b = load <16 x i32>, <16 x i32>* %ptr_b
487 %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b)
488 %2 = bitcast i32 %mask to <32 x i1>
489 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru
493 define <32 x i16> @test_mask_packus_epi32_rmkz_512(<16 x i32> %a, <16 x i32>* %ptr_b, i32 %mask) {
494 ; X86-LABEL: test_mask_packus_epi32_rmkz_512:
496 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
497 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
498 ; X86-NEXT: vpackusdw (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x2b,0x00]
499 ; X86-NEXT: retl # encoding: [0xc3]
501 ; X64-LABEL: test_mask_packus_epi32_rmkz_512:
503 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
504 ; X64-NEXT: vpackusdw (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x2b,0x07]
505 ; X64-NEXT: retq # encoding: [0xc3]
506 %b = load <16 x i32>, <16 x i32>* %ptr_b
507 %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b)
508 %2 = bitcast i32 %mask to <32 x i1>
509 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer
513 define <32 x i16> @test_mask_packus_epi32_rmb_512(<16 x i32> %a, i32* %ptr_b) {
514 ; X86-LABEL: test_mask_packus_epi32_rmb_512:
516 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
517 ; X86-NEXT: vpackusdw (%eax){1to16}, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x58,0x2b,0x00]
518 ; X86-NEXT: retl # encoding: [0xc3]
520 ; X64-LABEL: test_mask_packus_epi32_rmb_512:
522 ; X64-NEXT: vpackusdw (%rdi){1to16}, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x58,0x2b,0x07]
523 ; X64-NEXT: retq # encoding: [0xc3]
524 %q = load i32, i32* %ptr_b
525 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
526 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
527 %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b)
531 define <32 x i16> @test_mask_packus_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, <32 x i16> %passThru, i32 %mask) {
532 ; X86-LABEL: test_mask_packus_epi32_rmbk_512:
534 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
535 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
536 ; X86-NEXT: vpackusdw (%eax){1to16}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x59,0x2b,0x08]
537 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
538 ; X86-NEXT: retl # encoding: [0xc3]
540 ; X64-LABEL: test_mask_packus_epi32_rmbk_512:
542 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
543 ; X64-NEXT: vpackusdw (%rdi){1to16}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x59,0x2b,0x0f]
544 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
545 ; X64-NEXT: retq # encoding: [0xc3]
546 %q = load i32, i32* %ptr_b
547 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
548 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
549 %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b)
550 %2 = bitcast i32 %mask to <32 x i1>
551 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru
555 define <32 x i16> @test_mask_packus_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b, i32 %mask) {
556 ; X86-LABEL: test_mask_packus_epi32_rmbkz_512:
558 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
559 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
560 ; X86-NEXT: vpackusdw (%eax){1to16}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xd9,0x2b,0x00]
561 ; X86-NEXT: retl # encoding: [0xc3]
563 ; X64-LABEL: test_mask_packus_epi32_rmbkz_512:
565 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
566 ; X64-NEXT: vpackusdw (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xd9,0x2b,0x07]
567 ; X64-NEXT: retq # encoding: [0xc3]
568 %q = load i32, i32* %ptr_b
569 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
570 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
571 %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b)
572 %2 = bitcast i32 %mask to <32 x i1>
573 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer
577 declare <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32>, <16 x i32>)
579 define <64 x i8> @test_mask_packus_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
580 ; CHECK-LABEL: test_mask_packus_epi16_rr_512:
582 ; CHECK-NEXT: vpackuswb %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x67,0xc1]
583 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
584 %1 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> %a, <32 x i16> %b)
588 define <64 x i8> @test_mask_packus_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) {
589 ; X86-LABEL: test_mask_packus_epi16_rrk_512:
591 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
592 ; X86-NEXT: vpackuswb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x67,0xd1]
593 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
594 ; X86-NEXT: retl # encoding: [0xc3]
596 ; X64-LABEL: test_mask_packus_epi16_rrk_512:
598 ; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
599 ; X64-NEXT: vpackuswb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x67,0xd1]
600 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
601 ; X64-NEXT: retq # encoding: [0xc3]
602 %1 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> %a, <32 x i16> %b)
603 %2 = bitcast i64 %mask to <64 x i1>
604 %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> %passThru
608 define <64 x i8> @test_mask_packus_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i64 %mask) {
609 ; X86-LABEL: test_mask_packus_epi16_rrkz_512:
611 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
612 ; X86-NEXT: vpackuswb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x67,0xc1]
613 ; X86-NEXT: retl # encoding: [0xc3]
615 ; X64-LABEL: test_mask_packus_epi16_rrkz_512:
617 ; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
618 ; X64-NEXT: vpackuswb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x67,0xc1]
619 ; X64-NEXT: retq # encoding: [0xc3]
620 %1 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> %a, <32 x i16> %b)
621 %2 = bitcast i64 %mask to <64 x i1>
622 %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> zeroinitializer
626 define <64 x i8> @test_mask_packus_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
627 ; X86-LABEL: test_mask_packus_epi16_rm_512:
629 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
630 ; X86-NEXT: vpackuswb (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x67,0x00]
631 ; X86-NEXT: retl # encoding: [0xc3]
633 ; X64-LABEL: test_mask_packus_epi16_rm_512:
635 ; X64-NEXT: vpackuswb (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x67,0x07]
636 ; X64-NEXT: retq # encoding: [0xc3]
637 %b = load <32 x i16>, <32 x i16>* %ptr_b
638 %1 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> %a, <32 x i16> %b)
642 define <64 x i8> @test_mask_packus_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <64 x i8> %passThru, i64 %mask) {
643 ; X86-LABEL: test_mask_packus_epi16_rmk_512:
645 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
646 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08]
647 ; X86-NEXT: vpackuswb (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x67,0x08]
648 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
649 ; X86-NEXT: retl # encoding: [0xc3]
651 ; X64-LABEL: test_mask_packus_epi16_rmk_512:
653 ; X64-NEXT: kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce]
654 ; X64-NEXT: vpackuswb (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x67,0x0f]
655 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
656 ; X64-NEXT: retq # encoding: [0xc3]
657 %b = load <32 x i16>, <32 x i16>* %ptr_b
658 %1 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> %a, <32 x i16> %b)
659 %2 = bitcast i64 %mask to <64 x i1>
660 %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> %passThru
664 define <64 x i8> @test_mask_packus_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i64 %mask) {
665 ; X86-LABEL: test_mask_packus_epi16_rmkz_512:
667 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
668 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08]
669 ; X86-NEXT: vpackuswb (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x67,0x00]
670 ; X86-NEXT: retl # encoding: [0xc3]
672 ; X64-LABEL: test_mask_packus_epi16_rmkz_512:
674 ; X64-NEXT: kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce]
675 ; X64-NEXT: vpackuswb (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x67,0x07]
676 ; X64-NEXT: retq # encoding: [0xc3]
677 %b = load <32 x i16>, <32 x i16>* %ptr_b
678 %1 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> %a, <32 x i16> %b)
679 %2 = bitcast i64 %mask to <64 x i1>
680 %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> zeroinitializer
684 declare <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16>, <32 x i16>)
686 define <32 x i16>@test_int_x86_avx512_mask_vpermt2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
687 ; X86-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_512:
689 ; X86-NEXT: vmovdqa64 %zmm1, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd9]
690 ; X86-NEXT: vpermt2w %zmm2, %zmm0, %zmm3 # encoding: [0x62,0xf2,0xfd,0x48,0x7d,0xda]
691 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
692 ; X86-NEXT: vpermt2w %zmm2, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x7d,0xca]
693 ; X86-NEXT: vpaddw %zmm3, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc3]
694 ; X86-NEXT: retl # encoding: [0xc3]
696 ; X64-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_512:
698 ; X64-NEXT: vmovdqa64 %zmm1, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd9]
699 ; X64-NEXT: vpermt2w %zmm2, %zmm0, %zmm3 # encoding: [0x62,0xf2,0xfd,0x48,0x7d,0xda]
700 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
701 ; X64-NEXT: vpermt2w %zmm2, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x7d,0xca]
702 ; X64-NEXT: vpaddw %zmm3, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc3]
703 ; X64-NEXT: retq # encoding: [0xc3]
704 %1 = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> %x1, <32 x i16> %x0, <32 x i16> %x2)
705 %2 = bitcast i32 %x3 to <32 x i1>
706 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x1
707 %4 = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> %x1, <32 x i16> %x0, <32 x i16> %x2)
708 %res2 = add <32 x i16> %3, %4
712 define <32 x i16>@test_int_x86_avx512_maskz_vpermt2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
713 ; X86-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_512:
715 ; X86-NEXT: vmovdqa64 %zmm1, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd9]
716 ; X86-NEXT: vpermt2w %zmm2, %zmm0, %zmm3 # encoding: [0x62,0xf2,0xfd,0x48,0x7d,0xda]
717 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
718 ; X86-NEXT: vpermt2w %zmm2, %zmm0, %zmm1 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x7d,0xca]
719 ; X86-NEXT: vpaddw %zmm3, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc3]
720 ; X86-NEXT: retl # encoding: [0xc3]
722 ; X64-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_512:
724 ; X64-NEXT: vmovdqa64 %zmm1, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd9]
725 ; X64-NEXT: vpermt2w %zmm2, %zmm0, %zmm3 # encoding: [0x62,0xf2,0xfd,0x48,0x7d,0xda]
726 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
727 ; X64-NEXT: vpermt2w %zmm2, %zmm0, %zmm1 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x7d,0xca]
728 ; X64-NEXT: vpaddw %zmm3, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc3]
729 ; X64-NEXT: retq # encoding: [0xc3]
730 %1 = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> %x1, <32 x i16> %x0, <32 x i16> %x2)
731 %2 = bitcast i32 %x3 to <32 x i1>
732 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer
733 %4 = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> %x1, <32 x i16> %x0, <32 x i16> %x2)
734 %res2 = add <32 x i16> %3, %4
738 declare <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16>, <32 x i16>, <32 x i16>)
740 define <32 x i16>@test_int_x86_avx512_mask_vpermi2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
741 ; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_512:
743 ; X86-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
744 ; X86-NEXT: vpermt2w %zmm2, %zmm1, %zmm3 # encoding: [0x62,0xf2,0xf5,0x48,0x7d,0xda]
745 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
746 ; X86-NEXT: vpermi2w %zmm2, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x75,0xca]
747 ; X86-NEXT: vpaddw %zmm3, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc3]
748 ; X86-NEXT: retl # encoding: [0xc3]
750 ; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_512:
752 ; X64-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
753 ; X64-NEXT: vpermt2w %zmm2, %zmm1, %zmm3 # encoding: [0x62,0xf2,0xf5,0x48,0x7d,0xda]
754 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
755 ; X64-NEXT: vpermi2w %zmm2, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x75,0xca]
756 ; X64-NEXT: vpaddw %zmm3, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc3]
757 ; X64-NEXT: retq # encoding: [0xc3]
758 %1 = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2)
759 %2 = bitcast i32 %x3 to <32 x i1>
760 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x1
761 %4 = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2)
762 %res2 = add <32 x i16> %3, %4
766 declare <64 x i8> @llvm.x86.avx512.pavg.b.512(<64 x i8>, <64 x i8>)
768 define <64 x i8> @test_int_x86_avx512_mask_pavg_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
769 ; X86-LABEL: test_int_x86_avx512_mask_pavg_b_512:
771 ; X86-NEXT: vpavgb %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0xe0,0xd9]
772 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
773 ; X86-NEXT: vpavgb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe0,0xd1]
774 ; X86-NEXT: vpaddb %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfc,0xc3]
775 ; X86-NEXT: retl # encoding: [0xc3]
777 ; X64-LABEL: test_int_x86_avx512_mask_pavg_b_512:
779 ; X64-NEXT: vpavgb %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0xe0,0xd9]
780 ; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
781 ; X64-NEXT: vpavgb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe0,0xd1]
782 ; X64-NEXT: vpaddb %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfc,0xc3]
783 ; X64-NEXT: retq # encoding: [0xc3]
784 %1 = call <64 x i8> @llvm.x86.avx512.pavg.b.512(<64 x i8> %x0, <64 x i8> %x1)
785 %2 = bitcast i64 %x3 to <64 x i1>
786 %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> %x2
787 %4 = call <64 x i8> @llvm.x86.avx512.pavg.b.512(<64 x i8> %x0, <64 x i8> %x1)
788 %res2 = add <64 x i8> %3, %4
792 declare <32 x i16> @llvm.x86.avx512.pavg.w.512(<32 x i16>, <32 x i16>)
794 define <32 x i16> @test_int_x86_avx512_mask_pavg_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
795 ; X86-LABEL: test_int_x86_avx512_mask_pavg_w_512:
797 ; X86-NEXT: vpavgw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0xe3,0xd9]
798 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
799 ; X86-NEXT: vpavgw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe3,0xd1]
800 ; X86-NEXT: vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3]
801 ; X86-NEXT: retl # encoding: [0xc3]
803 ; X64-LABEL: test_int_x86_avx512_mask_pavg_w_512:
805 ; X64-NEXT: vpavgw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0xe3,0xd9]
806 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
807 ; X64-NEXT: vpavgw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe3,0xd1]
808 ; X64-NEXT: vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3]
809 ; X64-NEXT: retq # encoding: [0xc3]
810 %1 = call <32 x i16> @llvm.x86.avx512.pavg.w.512(<32 x i16> %x0, <32 x i16> %x1)
811 %2 = bitcast i32 %x3 to <32 x i1>
812 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x2
813 %4 = call <32 x i16> @llvm.x86.avx512.pavg.w.512(<32 x i16> %x0, <32 x i16> %x1)
814 %res2 = add <32 x i16> %3, %4
818 declare <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8>, <64 x i8>)
820 define <64 x i8>@test_int_x86_avx512_pshuf_b_512(<64 x i8> %x0, <64 x i8> %x1) {
821 ; CHECK-LABEL: test_int_x86_avx512_pshuf_b_512:
823 ; CHECK-NEXT: vpshufb %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x00,0xc1]
824 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
825 %res = call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1)
829 define <64 x i8>@test_int_x86_avx512_pshuf_b_512_mask(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %mask) {
830 ; X86-LABEL: test_int_x86_avx512_pshuf_b_512_mask:
832 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
833 ; X86-NEXT: vpshufb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x00,0xd1]
834 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
835 ; X86-NEXT: retl # encoding: [0xc3]
837 ; X64-LABEL: test_int_x86_avx512_pshuf_b_512_mask:
839 ; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
840 ; X64-NEXT: vpshufb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x00,0xd1]
841 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
842 ; X64-NEXT: retq # encoding: [0xc3]
843 %res = call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1)
844 %mask.cast = bitcast i64 %mask to <64 x i1>
845 %res2 = select <64 x i1> %mask.cast, <64 x i8> %res, <64 x i8> %x2
849 define <64 x i8>@test_int_x86_avx512_pshuf_b_512_maskz(<64 x i8> %x0, <64 x i8> %x1, i64 %mask) {
850 ; X86-LABEL: test_int_x86_avx512_pshuf_b_512_maskz:
852 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
853 ; X86-NEXT: vpshufb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x00,0xc1]
854 ; X86-NEXT: retl # encoding: [0xc3]
856 ; X64-LABEL: test_int_x86_avx512_pshuf_b_512_maskz:
858 ; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
859 ; X64-NEXT: vpshufb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x00,0xc1]
860 ; X64-NEXT: retq # encoding: [0xc3]
861 %res = call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1)
862 %mask.cast = bitcast i64 %mask to <64 x i1>
863 %res2 = select <64 x i1> %mask.cast, <64 x i8> %res, <64 x i8> zeroinitializer
867 declare <32 x i16> @llvm.x86.avx512.pmulhu.w.512(<32 x i16>, <32 x i16>)
869 define <32 x i16> @test_int_x86_avx512_mask_pmulhu_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
870 ; X86-LABEL: test_int_x86_avx512_mask_pmulhu_w_512:
872 ; X86-NEXT: vpmulhuw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0xe4,0xd9]
873 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
874 ; X86-NEXT: vpmulhuw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe4,0xd1]
875 ; X86-NEXT: vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3]
876 ; X86-NEXT: retl # encoding: [0xc3]
878 ; X64-LABEL: test_int_x86_avx512_mask_pmulhu_w_512:
880 ; X64-NEXT: vpmulhuw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0xe4,0xd9]
881 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
882 ; X64-NEXT: vpmulhuw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe4,0xd1]
883 ; X64-NEXT: vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3]
884 ; X64-NEXT: retq # encoding: [0xc3]
885 %1 = call <32 x i16> @llvm.x86.avx512.pmulhu.w.512(<32 x i16> %x0, <32 x i16> %x1)
886 %2 = bitcast i32 %x3 to <32 x i1>
887 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x2
888 %4 = call <32 x i16> @llvm.x86.avx512.pmulhu.w.512(<32 x i16> %x0, <32 x i16> %x1)
889 %res2 = add <32 x i16> %3, %4
893 declare <32 x i16> @llvm.x86.avx512.pmulh.w.512(<32 x i16>, <32 x i16>)
895 define <32 x i16> @test_int_x86_avx512_mask_pmulh_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
896 ; X86-LABEL: test_int_x86_avx512_mask_pmulh_w_512:
898 ; X86-NEXT: vpmulhw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0xe5,0xd9]
899 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
900 ; X86-NEXT: vpmulhw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe5,0xd1]
901 ; X86-NEXT: vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3]
902 ; X86-NEXT: retl # encoding: [0xc3]
904 ; X64-LABEL: test_int_x86_avx512_mask_pmulh_w_512:
906 ; X64-NEXT: vpmulhw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0xe5,0xd9]
907 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
908 ; X64-NEXT: vpmulhw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe5,0xd1]
909 ; X64-NEXT: vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3]
910 ; X64-NEXT: retq # encoding: [0xc3]
911 %1 = call <32 x i16> @llvm.x86.avx512.pmulh.w.512(<32 x i16> %x0, <32 x i16> %x1)
912 %2 = bitcast i32 %x3 to <32 x i1>
913 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x2
914 %4 = call <32 x i16> @llvm.x86.avx512.pmulh.w.512(<32 x i16> %x0, <32 x i16> %x1)
915 %res2 = add <32 x i16> %3, %4
919 declare <32 x i16> @llvm.x86.avx512.pmul.hr.sw.512(<32 x i16>, <32 x i16>)
921 define <32 x i16> @test_int_x86_avx512_mask_pmulhr_sw_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
922 ; X86-LABEL: test_int_x86_avx512_mask_pmulhr_sw_512:
924 ; X86-NEXT: vpmulhrsw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf2,0x7d,0x48,0x0b,0xd9]
925 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
926 ; X86-NEXT: vpmulhrsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x0b,0xd1]
927 ; X86-NEXT: vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3]
928 ; X86-NEXT: retl # encoding: [0xc3]
930 ; X64-LABEL: test_int_x86_avx512_mask_pmulhr_sw_512:
932 ; X64-NEXT: vpmulhrsw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf2,0x7d,0x48,0x0b,0xd9]
933 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
934 ; X64-NEXT: vpmulhrsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x0b,0xd1]
935 ; X64-NEXT: vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3]
936 ; X64-NEXT: retq # encoding: [0xc3]
937 %1 = call <32 x i16> @llvm.x86.avx512.pmul.hr.sw.512(<32 x i16> %x0, <32 x i16> %x1)
938 %2 = bitcast i32 %x3 to <32 x i1>
939 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x2
940 %4 = call <32 x i16> @llvm.x86.avx512.pmul.hr.sw.512(<32 x i16> %x0, <32 x i16> %x1)
941 %res2 = add <32 x i16> %3, %4
945 define <32 x i8>@test_int_x86_avx512_mask_pmov_wb_512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) {
946 ; X86-LABEL: test_int_x86_avx512_mask_pmov_wb_512:
948 ; X86-NEXT: vpmovwb %zmm0, %ymm2 # encoding: [0x62,0xf2,0x7e,0x48,0x30,0xc2]
949 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
950 ; X86-NEXT: vpmovwb %zmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x30,0xc1]
951 ; X86-NEXT: vpmovwb %zmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xc9,0x30,0xc0]
952 ; X86-NEXT: vpaddb %ymm0, %ymm1, %ymm0 # encoding: [0xc5,0xf5,0xfc,0xc0]
953 ; X86-NEXT: vpaddb %ymm0, %ymm2, %ymm0 # encoding: [0xc5,0xed,0xfc,0xc0]
954 ; X86-NEXT: retl # encoding: [0xc3]
956 ; X64-LABEL: test_int_x86_avx512_mask_pmov_wb_512:
958 ; X64-NEXT: vpmovwb %zmm0, %ymm2 # encoding: [0x62,0xf2,0x7e,0x48,0x30,0xc2]
959 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
960 ; X64-NEXT: vpmovwb %zmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x30,0xc1]
961 ; X64-NEXT: vpmovwb %zmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xc9,0x30,0xc0]
962 ; X64-NEXT: vpaddb %ymm0, %ymm1, %ymm0 # encoding: [0xc5,0xf5,0xfc,0xc0]
963 ; X64-NEXT: vpaddb %ymm0, %ymm2, %ymm0 # encoding: [0xc5,0xed,0xfc,0xc0]
964 ; X64-NEXT: retq # encoding: [0xc3]
965 %1 = trunc <32 x i16> %x0 to <32 x i8>
966 %2 = trunc <32 x i16> %x0 to <32 x i8>
967 %3 = bitcast i32 %x2 to <32 x i1>
968 %4 = select <32 x i1> %3, <32 x i8> %2, <32 x i8> %x1
969 %5 = trunc <32 x i16> %x0 to <32 x i8>
970 %6 = bitcast i32 %x2 to <32 x i1>
971 %7 = select <32 x i1> %6, <32 x i8> %5, <32 x i8> zeroinitializer
972 %res3 = add <32 x i8> %1, %4
973 %res4 = add <32 x i8> %res3, %7
977 declare void @llvm.x86.avx512.mask.pmov.wb.mem.512(i8* %ptr, <32 x i16>, i32)
979 define void @test_int_x86_avx512_mask_pmov_wb_mem_512(i8* %ptr, <32 x i16> %x1, i32 %x2) {
980 ; X86-LABEL: test_int_x86_avx512_mask_pmov_wb_mem_512:
982 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
983 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
984 ; X86-NEXT: vpmovwb %zmm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x48,0x30,0x00]
985 ; X86-NEXT: vpmovwb %zmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x30,0x00]
986 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
987 ; X86-NEXT: retl # encoding: [0xc3]
989 ; X64-LABEL: test_int_x86_avx512_mask_pmov_wb_mem_512:
991 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
992 ; X64-NEXT: vpmovwb %zmm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x48,0x30,0x07]
993 ; X64-NEXT: vpmovwb %zmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x30,0x07]
994 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
995 ; X64-NEXT: retq # encoding: [0xc3]
996 call void @llvm.x86.avx512.mask.pmov.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 -1)
997 call void @llvm.x86.avx512.mask.pmov.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 %x2)
1001 declare <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16>, <32 x i8>, i32)
1003 define <32 x i8>@test_int_x86_avx512_mask_pmovs_wb_512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) {
1004 ; X86-LABEL: test_int_x86_avx512_mask_pmovs_wb_512:
1006 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1007 ; X86-NEXT: vpmovswb %zmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x20,0xc1]
1008 ; X86-NEXT: vpmovswb %zmm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xc9,0x20,0xc2]
1009 ; X86-NEXT: vpaddb %ymm2, %ymm1, %ymm1 # encoding: [0xc5,0xf5,0xfc,0xca]
1010 ; X86-NEXT: vpmovswb %zmm0, %ymm0 # encoding: [0x62,0xf2,0x7e,0x48,0x20,0xc0]
1011 ; X86-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xfc,0xc1]
1012 ; X86-NEXT: retl # encoding: [0xc3]
1014 ; X64-LABEL: test_int_x86_avx512_mask_pmovs_wb_512:
1016 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1017 ; X64-NEXT: vpmovswb %zmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x20,0xc1]
1018 ; X64-NEXT: vpmovswb %zmm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xc9,0x20,0xc2]
1019 ; X64-NEXT: vpaddb %ymm2, %ymm1, %ymm1 # encoding: [0xc5,0xf5,0xfc,0xca]
1020 ; X64-NEXT: vpmovswb %zmm0, %ymm0 # encoding: [0x62,0xf2,0x7e,0x48,0x20,0xc0]
1021 ; X64-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xfc,0xc1]
1022 ; X64-NEXT: retq # encoding: [0xc3]
1023 %res0 = call <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 -1)
1024 %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2)
1025 %res2 = call <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16> %x0, <32 x i8> zeroinitializer, i32 %x2)
1026 %res3 = add <32 x i8> %res0, %res1
1027 %res4 = add <32 x i8> %res3, %res2
1031 declare void @llvm.x86.avx512.mask.pmovs.wb.mem.512(i8* %ptr, <32 x i16>, i32)
1033 define void @test_int_x86_avx512_mask_pmovs_wb_mem_512(i8* %ptr, <32 x i16> %x1, i32 %x2) {
1034 ; X86-LABEL: test_int_x86_avx512_mask_pmovs_wb_mem_512:
1036 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
1037 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1038 ; X86-NEXT: vpmovswb %zmm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x48,0x20,0x00]
1039 ; X86-NEXT: vpmovswb %zmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x20,0x00]
1040 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
1041 ; X86-NEXT: retl # encoding: [0xc3]
1043 ; X64-LABEL: test_int_x86_avx512_mask_pmovs_wb_mem_512:
1045 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1046 ; X64-NEXT: vpmovswb %zmm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x48,0x20,0x07]
1047 ; X64-NEXT: vpmovswb %zmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x20,0x07]
1048 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
1049 ; X64-NEXT: retq # encoding: [0xc3]
1050 call void @llvm.x86.avx512.mask.pmovs.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 -1)
1051 call void @llvm.x86.avx512.mask.pmovs.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 %x2)
1055 declare <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16>, <32 x i8>, i32)
1057 define <32 x i8>@test_int_x86_avx512_mask_pmovus_wb_512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) {
1058 ; X86-LABEL: test_int_x86_avx512_mask_pmovus_wb_512:
1060 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1061 ; X86-NEXT: vpmovuswb %zmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x10,0xc1]
1062 ; X86-NEXT: vpmovuswb %zmm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xc9,0x10,0xc2]
1063 ; X86-NEXT: vpaddb %ymm2, %ymm1, %ymm1 # encoding: [0xc5,0xf5,0xfc,0xca]
1064 ; X86-NEXT: vpmovuswb %zmm0, %ymm0 # encoding: [0x62,0xf2,0x7e,0x48,0x10,0xc0]
1065 ; X86-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xfc,0xc1]
1066 ; X86-NEXT: retl # encoding: [0xc3]
1068 ; X64-LABEL: test_int_x86_avx512_mask_pmovus_wb_512:
1070 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1071 ; X64-NEXT: vpmovuswb %zmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x10,0xc1]
1072 ; X64-NEXT: vpmovuswb %zmm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xc9,0x10,0xc2]
1073 ; X64-NEXT: vpaddb %ymm2, %ymm1, %ymm1 # encoding: [0xc5,0xf5,0xfc,0xca]
1074 ; X64-NEXT: vpmovuswb %zmm0, %ymm0 # encoding: [0x62,0xf2,0x7e,0x48,0x10,0xc0]
1075 ; X64-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xfc,0xc1]
1076 ; X64-NEXT: retq # encoding: [0xc3]
1077 %res0 = call <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 -1)
1078 %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2)
1079 %res2 = call <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16> %x0, <32 x i8> zeroinitializer, i32 %x2)
1080 %res3 = add <32 x i8> %res0, %res1
1081 %res4 = add <32 x i8> %res3, %res2
1085 declare void @llvm.x86.avx512.mask.pmovus.wb.mem.512(i8* %ptr, <32 x i16>, i32)
1087 define void @test_int_x86_avx512_mask_pmovus_wb_mem_512(i8* %ptr, <32 x i16> %x1, i32 %x2) {
1088 ; X86-LABEL: test_int_x86_avx512_mask_pmovus_wb_mem_512:
1090 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
1091 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1092 ; X86-NEXT: vpmovuswb %zmm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x48,0x10,0x00]
1093 ; X86-NEXT: vpmovuswb %zmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x10,0x00]
1094 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
1095 ; X86-NEXT: retl # encoding: [0xc3]
1097 ; X64-LABEL: test_int_x86_avx512_mask_pmovus_wb_mem_512:
1099 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1100 ; X64-NEXT: vpmovuswb %zmm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x48,0x10,0x07]
1101 ; X64-NEXT: vpmovuswb %zmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x10,0x07]
1102 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
1103 ; X64-NEXT: retq # encoding: [0xc3]
1104 call void @llvm.x86.avx512.mask.pmovus.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 -1)
1105 call void @llvm.x86.avx512.mask.pmovus.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 %x2)
1109 declare <32 x i16> @llvm.x86.avx512.pmaddubs.w.512(<64 x i8>, <64 x i8>)
1111 define <32 x i16> @test_int_x86_avx512_mask_pmaddubs_w_512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x2, i32 %x3) {
1112 ; X86-LABEL: test_int_x86_avx512_mask_pmaddubs_w_512:
1114 ; X86-NEXT: vpmaddubsw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf2,0x7d,0x48,0x04,0xd9]
1115 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1116 ; X86-NEXT: vpmaddubsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x04,0xd1]
1117 ; X86-NEXT: vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3]
1118 ; X86-NEXT: retl # encoding: [0xc3]
1120 ; X64-LABEL: test_int_x86_avx512_mask_pmaddubs_w_512:
1122 ; X64-NEXT: vpmaddubsw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf2,0x7d,0x48,0x04,0xd9]
1123 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1124 ; X64-NEXT: vpmaddubsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x04,0xd1]
1125 ; X64-NEXT: vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3]
1126 ; X64-NEXT: retq # encoding: [0xc3]
1127 %1 = call <32 x i16> @llvm.x86.avx512.pmaddubs.w.512(<64 x i8> %x0, <64 x i8> %x1)
1128 %2 = bitcast i32 %x3 to <32 x i1>
1129 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x2
1130 %4 = call <32 x i16> @llvm.x86.avx512.pmaddubs.w.512(<64 x i8> %x0, <64 x i8> %x1)
1131 %res2 = add <32 x i16> %3, %4
1132 ret <32 x i16> %res2
1135 declare <16 x i32> @llvm.x86.avx512.pmaddw.d.512(<32 x i16>, <32 x i16>)
1137 define <16 x i32> @test_int_x86_avx512_mask_pmaddw_d_512(<32 x i16> %x0, <32 x i16> %x1, <16 x i32> %x2, i16 %x3) {
1138 ; X86-LABEL: test_int_x86_avx512_mask_pmaddw_d_512:
1140 ; X86-NEXT: vpmaddwd %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0xf5,0xd9]
1141 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1142 ; X86-NEXT: vpmaddwd %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xf5,0xd1]
1143 ; X86-NEXT: vpaddd %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc3]
1144 ; X86-NEXT: retl # encoding: [0xc3]
1146 ; X64-LABEL: test_int_x86_avx512_mask_pmaddw_d_512:
1148 ; X64-NEXT: vpmaddwd %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0xf5,0xd9]
1149 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1150 ; X64-NEXT: vpmaddwd %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xf5,0xd1]
1151 ; X64-NEXT: vpaddd %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc3]
1152 ; X64-NEXT: retq # encoding: [0xc3]
1153 %1 = call <16 x i32> @llvm.x86.avx512.pmaddw.d.512(<32 x i16> %x0, <32 x i16> %x1)
1154 %2 = bitcast i16 %x3 to <16 x i1>
1155 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x2
1156 %4 = call <16 x i32> @llvm.x86.avx512.pmaddw.d.512(<32 x i16> %x0, <32 x i16> %x1)
1157 %res2 = add <16 x i32> %3, %4
1158 ret <16 x i32> %res2
1161 declare <32 x i16> @llvm.x86.avx512.dbpsadbw.512(<64 x i8>, <64 x i8>, i32)
1163 define <32 x i16>@test_int_x86_avx512_mask_dbpsadbw_512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x3, i32 %x4) {
1164 ; X86-LABEL: test_int_x86_avx512_mask_dbpsadbw_512:
1166 ; X86-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0x7d,0x48,0x42,0xd9,0x02]
1167 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1168 ; X86-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x42,0xd1,0x02]
1169 ; X86-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xc9,0x42,0xc1,0x02]
1170 ; X86-NEXT: vpaddw %zmm3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc3]
1171 ; X86-NEXT: vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0]
1172 ; X86-NEXT: retl # encoding: [0xc3]
1174 ; X64-LABEL: test_int_x86_avx512_mask_dbpsadbw_512:
1176 ; X64-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0x7d,0x48,0x42,0xd9,0x02]
1177 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1178 ; X64-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x42,0xd1,0x02]
1179 ; X64-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xc9,0x42,0xc1,0x02]
1180 ; X64-NEXT: vpaddw %zmm3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc3]
1181 ; X64-NEXT: vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0]
1182 ; X64-NEXT: retq # encoding: [0xc3]
1183 %1 = call <32 x i16> @llvm.x86.avx512.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 2)
1184 %2 = bitcast i32 %x4 to <32 x i1>
1185 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x3
1186 %4 = call <32 x i16> @llvm.x86.avx512.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 2)
1187 %5 = bitcast i32 %x4 to <32 x i1>
1188 %6 = select <32 x i1> %5, <32 x i16> %4, <32 x i16> zeroinitializer
1189 %7 = call <32 x i16> @llvm.x86.avx512.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 2)
1190 %res3 = add <32 x i16> %3, %6
1191 %res4 = add <32 x i16> %res3, %7
1192 ret <32 x i16> %res4
1195 declare <8 x i64> @llvm.x86.avx512.psad.bw.512(<64 x i8>, <64 x i8>)
1197 define <8 x i64>@test_int_x86_avx512_mask_psadb_w_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2){
1198 ; CHECK-LABEL: test_int_x86_avx512_mask_psadb_w_512:
1200 ; CHECK-NEXT: vpsadbw %zmm1, %zmm0, %zmm1 # encoding: [0x62,0xf1,0x7d,0x48,0xf6,0xc9]
1201 ; CHECK-NEXT: vpsadbw %zmm2, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xf6,0xc2]
1202 ; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
1203 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1204 %res = call <8 x i64> @llvm.x86.avx512.psad.bw.512(<64 x i8> %x0, <64 x i8> %x1)
1205 %res1 = call <8 x i64> @llvm.x86.avx512.psad.bw.512(<64 x i8> %x0, <64 x i8> %x2)
1206 %res2 = add <8 x i64> %res, %res1
1210 declare <32 x i16> @llvm.x86.avx512.psrlv.w.512(<32 x i16>, <32 x i16>) nounwind readnone
1212 define <32 x i16> @test_x86_avx512_psrlv_w_512_const() optsize {
1213 ; X86-LABEL: test_x86_avx512_psrlv_w_512_const:
1215 ; X86-NEXT: vmovdqa64 {{.*#+}} zmm0 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,65535]
1216 ; X86-NEXT: # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0x05,A,A,A,A]
1217 ; X86-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}, kind: FK_Data_4
1218 ; X86-NEXT: vpsrlvw {{\.LCPI.*}}, %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x48,0x10,0x05,A,A,A,A]
1219 ; X86-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}, kind: FK_Data_4
1220 ; X86-NEXT: retl # encoding: [0xc3]
1222 ; X64-LABEL: test_x86_avx512_psrlv_w_512_const:
1224 ; X64-NEXT: vmovdqa64 {{.*#+}} zmm0 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,65535]
1225 ; X64-NEXT: # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0x05,A,A,A,A]
1226 ; X64-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
1227 ; X64-NEXT: vpsrlvw {{.*}}(%rip), %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x48,0x10,0x05,A,A,A,A]
1228 ; X64-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
1229 ; X64-NEXT: retq # encoding: [0xc3]
1230 %res1 = call <32 x i16> @llvm.x86.avx512.psrlv.w.512(<32 x i16> <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 -1>, <32 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 -1>)
1231 ret <32 x i16> %res1
1234 define <32 x i16>@test_int_x86_avx512_mask_psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
1235 ; X86-LABEL: test_int_x86_avx512_mask_psrlv32hi:
1237 ; X86-NEXT: vpsrlvw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf2,0xfd,0x48,0x10,0xd9]
1238 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1239 ; X86-NEXT: vpsrlvw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x10,0xd1]
1240 ; X86-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x10,0xc1]
1241 ; X86-NEXT: vpaddw %zmm3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc3]
1242 ; X86-NEXT: vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0]
1243 ; X86-NEXT: retl # encoding: [0xc3]
1245 ; X64-LABEL: test_int_x86_avx512_mask_psrlv32hi:
1247 ; X64-NEXT: vpsrlvw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf2,0xfd,0x48,0x10,0xd9]
1248 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1249 ; X64-NEXT: vpsrlvw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x10,0xd1]
1250 ; X64-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x10,0xc1]
1251 ; X64-NEXT: vpaddw %zmm3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc3]
1252 ; X64-NEXT: vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0]
1253 ; X64-NEXT: retq # encoding: [0xc3]
1254 %1 = call <32 x i16> @llvm.x86.avx512.psrlv.w.512(<32 x i16> %x0, <32 x i16> %x1)
1255 %2 = bitcast i32 %x3 to <32 x i1>
1256 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x2
1257 %4 = call <32 x i16> @llvm.x86.avx512.psrlv.w.512(<32 x i16> %x0, <32 x i16> %x1)
1258 %5 = bitcast i32 %x3 to <32 x i1>
1259 %6 = select <32 x i1> %5, <32 x i16> %4, <32 x i16> zeroinitializer
1260 %7 = call <32 x i16> @llvm.x86.avx512.psrlv.w.512(<32 x i16> %x0, <32 x i16> %x1)
1261 %res3 = add <32 x i16> %3, %6
1262 %res4 = add <32 x i16> %res3, %7
1263 ret <32 x i16> %res4
1266 declare <32 x i16> @llvm.x86.avx512.psrav.w.512(<32 x i16>, <32 x i16>)
1268 define <32 x i16>@test_int_x86_avx512_mask_psrav32_hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
1269 ; X86-LABEL: test_int_x86_avx512_mask_psrav32_hi:
1271 ; X86-NEXT: vpsravw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf2,0xfd,0x48,0x11,0xd9]
1272 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1273 ; X86-NEXT: vpsravw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x11,0xd1]
1274 ; X86-NEXT: vpsravw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x11,0xc1]
1275 ; X86-NEXT: vpaddw %zmm3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc3]
1276 ; X86-NEXT: vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0]
1277 ; X86-NEXT: retl # encoding: [0xc3]
1279 ; X64-LABEL: test_int_x86_avx512_mask_psrav32_hi:
1281 ; X64-NEXT: vpsravw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf2,0xfd,0x48,0x11,0xd9]
1282 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1283 ; X64-NEXT: vpsravw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x11,0xd1]
1284 ; X64-NEXT: vpsravw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x11,0xc1]
1285 ; X64-NEXT: vpaddw %zmm3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc3]
1286 ; X64-NEXT: vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0]
1287 ; X64-NEXT: retq # encoding: [0xc3]
1288 %1 = call <32 x i16> @llvm.x86.avx512.psrav.w.512(<32 x i16> %x0, <32 x i16> %x1)
1289 %2 = bitcast i32 %x3 to <32 x i1>
1290 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x2
1291 %4 = call <32 x i16> @llvm.x86.avx512.psrav.w.512(<32 x i16> %x0, <32 x i16> %x1)
1292 %5 = bitcast i32 %x3 to <32 x i1>
1293 %6 = select <32 x i1> %5, <32 x i16> %4, <32 x i16> zeroinitializer
1294 %7 = call <32 x i16> @llvm.x86.avx512.psrav.w.512(<32 x i16> %x0, <32 x i16> %x1)
1295 %res3 = add <32 x i16> %3, %6
1296 %res4 = add <32 x i16> %res3, %7
1297 ret <32 x i16> %res4
1300 define <32 x i16>@test_int_x86_avx512_mask_psrav32_hi_const(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
1301 ; X86-LABEL: test_int_x86_avx512_mask_psrav32_hi_const:
1303 ; X86-NEXT: vmovdqa64 {{.*#+}} zmm0 = [2,9,65524,23,65510,37,65496,51,2,9,65524,23,65510,37,65496,51,2,9,65524,23,65510,37,65496,51,2,9,65524,23,65510,37,65496,51]
1304 ; X86-NEXT: # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0x05,A,A,A,A]
1305 ; X86-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}, kind: FK_Data_4
1306 ; X86-NEXT: vpsravw {{\.LCPI.*}}, %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x48,0x11,0x05,A,A,A,A]
1307 ; X86-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}, kind: FK_Data_4
1308 ; X86-NEXT: retl # encoding: [0xc3]
1310 ; X64-LABEL: test_int_x86_avx512_mask_psrav32_hi_const:
1312 ; X64-NEXT: vmovdqa64 {{.*#+}} zmm0 = [2,9,65524,23,65510,37,65496,51,2,9,65524,23,65510,37,65496,51,2,9,65524,23,65510,37,65496,51,2,9,65524,23,65510,37,65496,51]
1313 ; X64-NEXT: # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0x05,A,A,A,A]
1314 ; X64-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
1315 ; X64-NEXT: vpsravw {{.*}}(%rip), %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x48,0x11,0x05,A,A,A,A]
1316 ; X64-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
1317 ; X64-NEXT: retq # encoding: [0xc3]
1318 %1 = call <32 x i16> @llvm.x86.avx512.psrav.w.512(<32 x i16> <i16 2, i16 9, i16 -12, i16 23, i16 -26, i16 37, i16 -40, i16 51, i16 2, i16 9, i16 -12, i16 23, i16 -26, i16 37, i16 -40, i16 51, i16 2, i16 9, i16 -12, i16 23, i16 -26, i16 37, i16 -40, i16 51, i16 2, i16 9, i16 -12, i16 23, i16 -26, i16 37, i16 -40, i16 51>, <32 x i16> <i16 1, i16 10, i16 35, i16 52, i16 69, i16 9, i16 16, i16 49, i16 1, i16 10, i16 35, i16 52, i16 69, i16 9, i16 16, i16 49, i16 1, i16 10, i16 35, i16 52, i16 69, i16 9, i16 16, i16 49, i16 1, i16 10, i16 35, i16 52, i16 69, i16 9, i16 16, i16 49>)
1322 define <32 x i16>@test_int_x86_avx512_mask_psllv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
1323 ; X86-LABEL: test_int_x86_avx512_mask_psllv32hi:
1325 ; X86-NEXT: vpsllvw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf2,0xfd,0x48,0x12,0xd9]
1326 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1327 ; X86-NEXT: vpsllvw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x12,0xd1]
1328 ; X86-NEXT: vpsllvw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x12,0xc1]
1329 ; X86-NEXT: vpaddw %zmm3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc3]
1330 ; X86-NEXT: vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0]
1331 ; X86-NEXT: retl # encoding: [0xc3]
1333 ; X64-LABEL: test_int_x86_avx512_mask_psllv32hi:
1335 ; X64-NEXT: vpsllvw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf2,0xfd,0x48,0x12,0xd9]
1336 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1337 ; X64-NEXT: vpsllvw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x12,0xd1]
1338 ; X64-NEXT: vpsllvw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x12,0xc1]
1339 ; X64-NEXT: vpaddw %zmm3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc3]
1340 ; X64-NEXT: vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0]
1341 ; X64-NEXT: retq # encoding: [0xc3]
1342 %1 = call <32 x i16> @llvm.x86.avx512.psllv.w.512(<32 x i16> %x0, <32 x i16> %x1)
1343 %2 = bitcast i32 %x3 to <32 x i1>
1344 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x2
1345 %4 = call <32 x i16> @llvm.x86.avx512.psllv.w.512(<32 x i16> %x0, <32 x i16> %x1)
1346 %5 = bitcast i32 %x3 to <32 x i1>
1347 %6 = select <32 x i1> %5, <32 x i16> %4, <32 x i16> zeroinitializer
1348 %7 = call <32 x i16> @llvm.x86.avx512.psllv.w.512(<32 x i16> %x0, <32 x i16> %x1)
1349 %res3 = add <32 x i16> %3, %6
1350 %res4 = add <32 x i16> %res3, %7
1351 ret <32 x i16> %res4
1354 declare <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16>, <32 x i16>)
1356 define <32 x i16>@test_int_x86_avx512_mask_permvar_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
1357 ; X86-LABEL: test_int_x86_avx512_mask_permvar_hi_512:
1359 ; X86-NEXT: vpermw %zmm0, %zmm1, %zmm3 # encoding: [0x62,0xf2,0xf5,0x48,0x8d,0xd8]
1360 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1361 ; X86-NEXT: vpermw %zmm0, %zmm1, %zmm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x8d,0xd0]
1362 ; X86-NEXT: vpermw %zmm0, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x8d,0xc0]
1363 ; X86-NEXT: vpaddw %zmm3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc3]
1364 ; X86-NEXT: vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0]
1365 ; X86-NEXT: retl # encoding: [0xc3]
1367 ; X64-LABEL: test_int_x86_avx512_mask_permvar_hi_512:
1369 ; X64-NEXT: vpermw %zmm0, %zmm1, %zmm3 # encoding: [0x62,0xf2,0xf5,0x48,0x8d,0xd8]
1370 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1371 ; X64-NEXT: vpermw %zmm0, %zmm1, %zmm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x8d,0xd0]
1372 ; X64-NEXT: vpermw %zmm0, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x8d,0xc0]
1373 ; X64-NEXT: vpaddw %zmm3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc3]
1374 ; X64-NEXT: vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0]
1375 ; X64-NEXT: retq # encoding: [0xc3]
1376 %1 = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %x0, <32 x i16> %x1)
1377 %2 = bitcast i32 %x3 to <32 x i1>
1378 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x2
1379 %4 = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %x0, <32 x i16> %x1)
1380 %5 = bitcast i32 %x3 to <32 x i1>
1381 %6 = select <32 x i1> %5, <32 x i16> %4, <32 x i16> zeroinitializer
1382 %7 = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %x0, <32 x i16> %x1)
1383 %res3 = add <32 x i16> %3, %6
1384 %res4 = add <32 x i16> %res3, %7
1385 ret <32 x i16> %res4
1388 define <32 x i16> @test_x86_avx512_psll_w_512(<32 x i16> %a0, <8 x i16> %a1) {
1389 ; CHECK-LABEL: test_x86_avx512_psll_w_512:
1391 ; CHECK-NEXT: vpsllw %xmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xf1,0xc1]
1392 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1393 %res = call <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16> %a0, <8 x i16> %a1) ; <<32 x i16>> [#uses=1]
1396 define <32 x i16> @test_x86_avx512_mask_psll_w_512(<32 x i16> %a0, <8 x i16> %a1, <32 x i16> %passthru, i32 %mask) {
1397 ; X86-LABEL: test_x86_avx512_mask_psll_w_512:
1399 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1400 ; X86-NEXT: vpsllw %xmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xf1,0xd1]
1401 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1402 ; X86-NEXT: retl # encoding: [0xc3]
1404 ; X64-LABEL: test_x86_avx512_mask_psll_w_512:
1406 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1407 ; X64-NEXT: vpsllw %xmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xf1,0xd1]
1408 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1409 ; X64-NEXT: retq # encoding: [0xc3]
1410 %res = call <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16> %a0, <8 x i16> %a1) ; <<32 x i16>> [#uses=1]
1411 %mask.cast = bitcast i32 %mask to <32 x i1>
1412 %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> %passthru
1413 ret <32 x i16> %res2
1415 define <32 x i16> @test_x86_avx512_maskz_psll_w_512(<32 x i16> %a0, <8 x i16> %a1, i32 %mask) {
1416 ; X86-LABEL: test_x86_avx512_maskz_psll_w_512:
1418 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1419 ; X86-NEXT: vpsllw %xmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xf1,0xc1]
1420 ; X86-NEXT: retl # encoding: [0xc3]
1422 ; X64-LABEL: test_x86_avx512_maskz_psll_w_512:
1424 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1425 ; X64-NEXT: vpsllw %xmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xf1,0xc1]
1426 ; X64-NEXT: retq # encoding: [0xc3]
1427 %res = call <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16> %a0, <8 x i16> %a1) ; <<32 x i16>> [#uses=1]
1428 %mask.cast = bitcast i32 %mask to <32 x i1>
1429 %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> zeroinitializer
1430 ret <32 x i16> %res2
1432 declare <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16>, <8 x i16>) nounwind readnone
1435 define <32 x i16> @test_x86_avx512_psllv_w_512_const() optsize {
1436 ; X86-LABEL: test_x86_avx512_psllv_w_512_const:
1438 ; X86-NEXT: vmovdqa64 {{.*#+}} zmm0 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,65535]
1439 ; X86-NEXT: # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0x05,A,A,A,A]
1440 ; X86-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}, kind: FK_Data_4
1441 ; X86-NEXT: vpsllvw {{\.LCPI.*}}, %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x48,0x12,0x05,A,A,A,A]
1442 ; X86-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}, kind: FK_Data_4
1443 ; X86-NEXT: retl # encoding: [0xc3]
1445 ; X64-LABEL: test_x86_avx512_psllv_w_512_const:
1447 ; X64-NEXT: vmovdqa64 {{.*#+}} zmm0 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,65535]
1448 ; X64-NEXT: # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0x05,A,A,A,A]
1449 ; X64-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
1450 ; X64-NEXT: vpsllvw {{.*}}(%rip), %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x48,0x12,0x05,A,A,A,A]
1451 ; X64-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
1452 ; X64-NEXT: retq # encoding: [0xc3]
1453 %res1 = call <32 x i16> @llvm.x86.avx512.psllv.w.512(<32 x i16> <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 -1>, <32 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 -1>)
1454 ret <32 x i16> %res1
1456 declare <32 x i16> @llvm.x86.avx512.psllv.w.512(<32 x i16>, <32 x i16>) nounwind readnone
1458 define <32 x i16> @test_x86_avx512_pslli_w_512(<32 x i16> %a0) {
1459 ; CHECK-LABEL: test_x86_avx512_pslli_w_512:
1461 ; CHECK-NEXT: vpsllw $7, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x71,0xf0,0x07]
1462 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1463 %res = call <32 x i16> @llvm.x86.avx512.pslli.w.512(<32 x i16> %a0, i32 7) ; <<32 x i16>> [#uses=1]
1466 define <32 x i16> @test_x86_avx512_mask_pslli_w_512(<32 x i16> %a0, <32 x i16> %passthru, i32 %mask) {
1467 ; X86-LABEL: test_x86_avx512_mask_pslli_w_512:
1469 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1470 ; X86-NEXT: vpsllw $7, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x71,0xf0,0x07]
1471 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1472 ; X86-NEXT: retl # encoding: [0xc3]
1474 ; X64-LABEL: test_x86_avx512_mask_pslli_w_512:
1476 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1477 ; X64-NEXT: vpsllw $7, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x71,0xf0,0x07]
1478 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1479 ; X64-NEXT: retq # encoding: [0xc3]
1480 %res = call <32 x i16> @llvm.x86.avx512.pslli.w.512(<32 x i16> %a0, i32 7) ; <<32 x i16>> [#uses=1]
1481 %mask.cast = bitcast i32 %mask to <32 x i1>
1482 %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> %passthru
1483 ret <32 x i16> %res2
1485 define <32 x i16> @test_x86_avx512_maskz_pslli_w_512(<32 x i16> %a0, i32 %mask) {
1486 ; X86-LABEL: test_x86_avx512_maskz_pslli_w_512:
1488 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1489 ; X86-NEXT: vpsllw $7, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x71,0xf0,0x07]
1490 ; X86-NEXT: retl # encoding: [0xc3]
1492 ; X64-LABEL: test_x86_avx512_maskz_pslli_w_512:
1494 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1495 ; X64-NEXT: vpsllw $7, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x71,0xf0,0x07]
1496 ; X64-NEXT: retq # encoding: [0xc3]
1497 %res = call <32 x i16> @llvm.x86.avx512.pslli.w.512(<32 x i16> %a0, i32 7) ; <<32 x i16>> [#uses=1]
1498 %mask.cast = bitcast i32 %mask to <32 x i1>
1499 %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> zeroinitializer
1500 ret <32 x i16> %res2
1502 declare <32 x i16> @llvm.x86.avx512.pslli.w.512(<32 x i16>, i32) nounwind readnone
1505 define <32 x i16> @test_x86_avx512_psra_w_512(<32 x i16> %a0, <8 x i16> %a1) {
1506 ; CHECK-LABEL: test_x86_avx512_psra_w_512:
1508 ; CHECK-NEXT: vpsraw %xmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xe1,0xc1]
1509 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1510 %res = call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %a0, <8 x i16> %a1) ; <<32 x i16>> [#uses=1]
1513 define <32 x i16> @test_x86_avx512_mask_psra_w_512(<32 x i16> %a0, <8 x i16> %a1, <32 x i16> %passthru, i32 %mask) {
1514 ; X86-LABEL: test_x86_avx512_mask_psra_w_512:
1516 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1517 ; X86-NEXT: vpsraw %xmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe1,0xd1]
1518 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1519 ; X86-NEXT: retl # encoding: [0xc3]
1521 ; X64-LABEL: test_x86_avx512_mask_psra_w_512:
1523 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1524 ; X64-NEXT: vpsraw %xmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe1,0xd1]
1525 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1526 ; X64-NEXT: retq # encoding: [0xc3]
1527 %res = call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %a0, <8 x i16> %a1) ; <<32 x i16>> [#uses=1]
1528 %mask.cast = bitcast i32 %mask to <32 x i1>
1529 %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> %passthru
1530 ret <32 x i16> %res2
1532 define <32 x i16> @test_x86_avx512_maskz_psra_w_512(<32 x i16> %a0, <8 x i16> %a1, i32 %mask) {
1533 ; X86-LABEL: test_x86_avx512_maskz_psra_w_512:
1535 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1536 ; X86-NEXT: vpsraw %xmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe1,0xc1]
1537 ; X86-NEXT: retl # encoding: [0xc3]
1539 ; X64-LABEL: test_x86_avx512_maskz_psra_w_512:
1541 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1542 ; X64-NEXT: vpsraw %xmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe1,0xc1]
1543 ; X64-NEXT: retq # encoding: [0xc3]
1544 %res = call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %a0, <8 x i16> %a1) ; <<32 x i16>> [#uses=1]
1545 %mask.cast = bitcast i32 %mask to <32 x i1>
1546 %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> zeroinitializer
1547 ret <32 x i16> %res2
1549 declare <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16>, <8 x i16>) nounwind readnone
1552 define <32 x i16> @test_x86_avx512_psrai_w_512(<32 x i16> %a0) {
1553 ; CHECK-LABEL: test_x86_avx512_psrai_w_512:
1555 ; CHECK-NEXT: vpsraw $7, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x71,0xe0,0x07]
1556 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1557 %res = call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> %a0, i32 7) ; <<32 x i16>> [#uses=1]
1560 define <32 x i16> @test_x86_avx512_mask_psrai_w_512(<32 x i16> %a0, <32 x i16> %passthru, i32 %mask) {
1561 ; X86-LABEL: test_x86_avx512_mask_psrai_w_512:
1563 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1564 ; X86-NEXT: vpsraw $7, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x71,0xe0,0x07]
1565 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1566 ; X86-NEXT: retl # encoding: [0xc3]
1568 ; X64-LABEL: test_x86_avx512_mask_psrai_w_512:
1570 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1571 ; X64-NEXT: vpsraw $7, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x71,0xe0,0x07]
1572 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1573 ; X64-NEXT: retq # encoding: [0xc3]
1574 %res = call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> %a0, i32 7) ; <<32 x i16>> [#uses=1]
1575 %mask.cast = bitcast i32 %mask to <32 x i1>
1576 %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> %passthru
1577 ret <32 x i16> %res2
1579 define <32 x i16> @test_x86_avx512_maskz_psrai_w_512(<32 x i16> %a0, <32 x i16> %passthru, i32 %mask) {
1580 ; X86-LABEL: test_x86_avx512_maskz_psrai_w_512:
1582 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1583 ; X86-NEXT: vpsraw $7, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x71,0xe0,0x07]
1584 ; X86-NEXT: retl # encoding: [0xc3]
1586 ; X64-LABEL: test_x86_avx512_maskz_psrai_w_512:
1588 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1589 ; X64-NEXT: vpsraw $7, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x71,0xe0,0x07]
1590 ; X64-NEXT: retq # encoding: [0xc3]
1591 %res = call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> %a0, i32 7) ; <<32 x i16>> [#uses=1]
1592 %mask.cast = bitcast i32 %mask to <32 x i1>
1593 %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> zeroinitializer
1594 ret <32 x i16> %res2
1596 declare <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16>, i32) nounwind readnone
1599 define <32 x i16> @test_x86_avx512_psrl_w_512(<32 x i16> %a0, <8 x i16> %a1) {
1600 ; CHECK-LABEL: test_x86_avx512_psrl_w_512:
1602 ; CHECK-NEXT: vpsrlw %xmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xd1,0xc1]
1603 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1604 %res = call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> %a0, <8 x i16> %a1) ; <<32 x i16>> [#uses=1]
1607 define <32 x i16> @test_x86_avx512_mask_psrl_w_512(<32 x i16> %a0, <8 x i16> %a1, <32 x i16> %passthru, i32 %mask) {
1608 ; X86-LABEL: test_x86_avx512_mask_psrl_w_512:
1610 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1611 ; X86-NEXT: vpsrlw %xmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xd1,0xd1]
1612 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1613 ; X86-NEXT: retl # encoding: [0xc3]
1615 ; X64-LABEL: test_x86_avx512_mask_psrl_w_512:
1617 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1618 ; X64-NEXT: vpsrlw %xmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xd1,0xd1]
1619 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1620 ; X64-NEXT: retq # encoding: [0xc3]
1621 %res = call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> %a0, <8 x i16> %a1) ; <<32 x i16>> [#uses=1]
1622 %mask.cast = bitcast i32 %mask to <32 x i1>
1623 %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> %passthru
1624 ret <32 x i16> %res2
1626 define <32 x i16> @test_x86_avx512_maskz_psrl_w_512(<32 x i16> %a0, <8 x i16> %a1, i32 %mask) {
1627 ; X86-LABEL: test_x86_avx512_maskz_psrl_w_512:
1629 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1630 ; X86-NEXT: vpsrlw %xmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xd1,0xc1]
1631 ; X86-NEXT: retl # encoding: [0xc3]
1633 ; X64-LABEL: test_x86_avx512_maskz_psrl_w_512:
1635 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1636 ; X64-NEXT: vpsrlw %xmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xd1,0xc1]
1637 ; X64-NEXT: retq # encoding: [0xc3]
1638 %res = call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> %a0, <8 x i16> %a1) ; <<32 x i16>> [#uses=1]
1639 %mask.cast = bitcast i32 %mask to <32 x i1>
1640 %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> zeroinitializer
1641 ret <32 x i16> %res2
1643 declare <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16>, <8 x i16>) nounwind readnone
1645 define <32 x i16> @test_x86_avx512_psrl_w_512_load(<32 x i16> %a0, <8 x i16>* %p) {
1646 ; X86-LABEL: test_x86_avx512_psrl_w_512_load:
1648 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1649 ; X86-NEXT: vpsrlw (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xd1,0x00]
1650 ; X86-NEXT: retl # encoding: [0xc3]
1652 ; X64-LABEL: test_x86_avx512_psrl_w_512_load:
1654 ; X64-NEXT: vpsrlw (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xd1,0x07]
1655 ; X64-NEXT: retq # encoding: [0xc3]
1656 %a1 = load <8 x i16>, <8 x i16>* %p
1657 %res = call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> %a0, <8 x i16> %a1) ; <<32 x i16>> [#uses=1]
1661 define <32 x i16> @test_x86_avx512_psrli_w_512(<32 x i16> %a0) {
1662 ; CHECK-LABEL: test_x86_avx512_psrli_w_512:
1664 ; CHECK-NEXT: vpsrlw $7, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x71,0xd0,0x07]
1665 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1666 %res = call <32 x i16> @llvm.x86.avx512.psrli.w.512(<32 x i16> %a0, i32 7) ; <<32 x i16>> [#uses=1]
1669 define <32 x i16> @test_x86_avx512_mask_psrli_w_512(<32 x i16> %a0, <32 x i16> %passthru, i32 %mask) {
1670 ; X86-LABEL: test_x86_avx512_mask_psrli_w_512:
1672 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1673 ; X86-NEXT: vpsrlw $7, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x71,0xd0,0x07]
1674 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1675 ; X86-NEXT: retl # encoding: [0xc3]
1677 ; X64-LABEL: test_x86_avx512_mask_psrli_w_512:
1679 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1680 ; X64-NEXT: vpsrlw $7, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x71,0xd0,0x07]
1681 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1682 ; X64-NEXT: retq # encoding: [0xc3]
1683 %res = call <32 x i16> @llvm.x86.avx512.psrli.w.512(<32 x i16> %a0, i32 7) ; <<32 x i16>> [#uses=1]
1684 %mask.cast = bitcast i32 %mask to <32 x i1>
1685 %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> %passthru
1686 ret <32 x i16> %res2
1688 define <32 x i16> @test_x86_avx512_maskz_psrli_w_512(<32 x i16> %a0, i32 %mask) {
1689 ; X86-LABEL: test_x86_avx512_maskz_psrli_w_512:
1691 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1692 ; X86-NEXT: vpsrlw $7, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x71,0xd0,0x07]
1693 ; X86-NEXT: retl # encoding: [0xc3]
1695 ; X64-LABEL: test_x86_avx512_maskz_psrli_w_512:
1697 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1698 ; X64-NEXT: vpsrlw $7, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x71,0xd0,0x07]
1699 ; X64-NEXT: retq # encoding: [0xc3]
1700 %res = call <32 x i16> @llvm.x86.avx512.psrli.w.512(<32 x i16> %a0, i32 7) ; <<32 x i16>> [#uses=1]
1701 %mask.cast = bitcast i32 %mask to <32 x i1>
1702 %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> zeroinitializer
1703 ret <32 x i16> %res2
1705 declare <32 x i16> @llvm.x86.avx512.psrli.w.512(<32 x i16>, i32) nounwind readnone