1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512bw --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64
5 declare i32 @llvm.x86.avx512.kunpck.wd(i32, i32)
7 define i32 @test_int_x86_avx512_kunpck_wd(i32 %x0, i32 %x1) nounwind {
8 ; X86-LABEL: test_int_x86_avx512_kunpck_wd:
10 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k0 # encoding: [0xc5,0xf8,0x90,0x44,0x24,0x04]
11 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
12 ; X86-NEXT: kunpckwd %k1, %k0, %k0 # encoding: [0xc5,0xfc,0x4b,0xc1]
13 ; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
14 ; X86-NEXT: retl # encoding: [0xc3]
16 ; X64-LABEL: test_int_x86_avx512_kunpck_wd:
18 ; X64-NEXT: kmovd %edi, %k0 # encoding: [0xc5,0xfb,0x92,0xc7]
19 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
20 ; X64-NEXT: kunpckwd %k1, %k0, %k0 # encoding: [0xc5,0xfc,0x4b,0xc1]
21 ; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
22 ; X64-NEXT: retq # encoding: [0xc3]
23 %res = call i32 @llvm.x86.avx512.kunpck.wd(i32 %x0, i32 %x1)
27 declare i64 @llvm.x86.avx512.kunpck.dq(i64, i64)
29 define i64 @test_int_x86_avx512_kunpck_qd(i64 %x0, i64 %x1) nounwind {
30 ; X86-LABEL: test_int_x86_avx512_kunpck_qd:
32 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx # encoding: [0x8b,0x54,0x24,0x04]
33 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x0c]
34 ; X86-NEXT: retl # encoding: [0xc3]
36 ; X64-LABEL: test_int_x86_avx512_kunpck_qd:
38 ; X64-NEXT: kmovq %rdi, %k0 # encoding: [0xc4,0xe1,0xfb,0x92,0xc7]
39 ; X64-NEXT: kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce]
40 ; X64-NEXT: kunpckdq %k1, %k0, %k0 # encoding: [0xc4,0xe1,0xfc,0x4b,0xc1]
41 ; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0]
42 ; X64-NEXT: retq # encoding: [0xc3]
43 %res = call i64 @llvm.x86.avx512.kunpck.dq(i64 %x0, i64 %x1)
47 declare <64 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.512(i8, <64 x i8>, i64)
49 define { <64 x i8>, <64 x i8>, <64 x i8> } @test_int_x86_avx512_mask_pbroadcast_b_gpr_512(i8 %x0, <64 x i8> %x1, i64 %mask) nounwind {
50 ; X86-LABEL: test_int_x86_avx512_mask_pbroadcast_b_gpr_512:
52 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08]
53 ; X86-NEXT: vpbroadcastb {{[0-9]+}}(%esp), %zmm3 # encoding: [0x62,0xf2,0x7d,0x48,0x78,0x5c,0x24,0x04]
54 ; X86-NEXT: vpblendmb %zmm3, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x66,0xcb]
55 ; X86-NEXT: vmovdqu8 %zmm3, %zmm2 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0xc9,0x6f,0xd3]
56 ; X86-NEXT: vmovdqa64 %zmm3, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc3]
57 ; X86-NEXT: retl # encoding: [0xc3]
59 ; X64-LABEL: test_int_x86_avx512_mask_pbroadcast_b_gpr_512:
61 ; X64-NEXT: vmovdqa64 %zmm0, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc8]
62 ; X64-NEXT: vpbroadcastb %edi, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x7a,0xc7]
63 ; X64-NEXT: kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce]
64 ; X64-NEXT: vpbroadcastb %edi, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x7a,0xcf]
65 ; X64-NEXT: vpbroadcastb %edi, %zmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x7a,0xd7]
66 ; X64-NEXT: retq # encoding: [0xc3]
67 %res = call <64 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.512(i8 %x0, <64 x i8> %x1, i64 -1)
68 %res1 = call <64 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.512(i8 %x0, <64 x i8> %x1, i64 %mask)
69 %res2 = call <64 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.512(i8 %x0, <64 x i8> zeroinitializer, i64 %mask)
70 %res3 = insertvalue { <64 x i8>, <64 x i8>, <64 x i8> } poison, <64 x i8> %res, 0
71 %res4 = insertvalue { <64 x i8>, <64 x i8>, <64 x i8> } %res3, <64 x i8> %res1, 1
72 %res5 = insertvalue { <64 x i8>, <64 x i8>, <64 x i8> } %res4, <64 x i8> %res2, 2
73 ret { <64 x i8>, <64 x i8>, <64 x i8> } %res5
76 declare <32 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.512(i16, <32 x i16>, i32)
78 define { <32 x i16>, <32 x i16>, <32 x i16> } @test_int_x86_avx512_mask_pbroadcast_w_gpr_512(i16 %x0, <32 x i16> %x1, i32 %mask) nounwind {
79 ; X86-LABEL: test_int_x86_avx512_mask_pbroadcast_w_gpr_512:
81 ; X86-NEXT: vpbroadcastw {{[0-9]+}}(%esp), %zmm3 # encoding: [0x62,0xf2,0x7d,0x48,0x79,0x5c,0x24,0x02]
82 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
83 ; X86-NEXT: vpblendmw %zmm3, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x66,0xcb]
84 ; X86-NEXT: vmovdqu16 %zmm3, %zmm2 {%k1} {z} # encoding: [0x62,0xf1,0xff,0xc9,0x6f,0xd3]
85 ; X86-NEXT: vmovdqa64 %zmm3, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc3]
86 ; X86-NEXT: retl # encoding: [0xc3]
88 ; X64-LABEL: test_int_x86_avx512_mask_pbroadcast_w_gpr_512:
90 ; X64-NEXT: vmovdqa64 %zmm0, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc8]
91 ; X64-NEXT: vpbroadcastw %edi, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x7b,0xc7]
92 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
93 ; X64-NEXT: vpbroadcastw %edi, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x7b,0xcf]
94 ; X64-NEXT: vpbroadcastw %edi, %zmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x7b,0xd7]
95 ; X64-NEXT: retq # encoding: [0xc3]
96 %res = call <32 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.512(i16 %x0, <32 x i16> %x1, i32 -1)
97 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.512(i16 %x0, <32 x i16> %x1, i32 %mask)
98 %res2 = call <32 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.512(i16 %x0, <32 x i16> zeroinitializer, i32 %mask)
99 %res3 = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } poison, <32 x i16> %res, 0
100 %res4 = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } %res3, <32 x i16> %res1, 1
101 %res5 = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } %res4, <32 x i16> %res2, 2
102 ret { <32 x i16>, <32 x i16>, <32 x i16> } %res5
105 declare void @llvm.x86.avx512.mask.storeu.b.512(ptr, <64 x i8>, i64)
107 define void @test_int_x86_avx512_mask_storeu_b_512(ptr %ptr1, ptr %ptr2, <64 x i8> %x1, i64 %x2) nounwind {
108 ; X86-LABEL: test_int_x86_avx512_mask_storeu_b_512:
110 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
111 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x0c]
112 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
113 ; X86-NEXT: vmovdqu8 %zmm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0x7f,0x49,0x7f,0x01]
114 ; X86-NEXT: vmovdqu64 %zmm0, (%eax) # encoding: [0x62,0xf1,0xfe,0x48,0x7f,0x00]
115 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
116 ; X86-NEXT: retl # encoding: [0xc3]
118 ; X64-LABEL: test_int_x86_avx512_mask_storeu_b_512:
120 ; X64-NEXT: kmovq %rdx, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xca]
121 ; X64-NEXT: vmovdqu8 %zmm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0x7f,0x49,0x7f,0x07]
122 ; X64-NEXT: vmovdqu64 %zmm0, (%rsi) # encoding: [0x62,0xf1,0xfe,0x48,0x7f,0x06]
123 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
124 ; X64-NEXT: retq # encoding: [0xc3]
125 call void @llvm.x86.avx512.mask.storeu.b.512(ptr %ptr1, <64 x i8> %x1, i64 %x2)
126 call void @llvm.x86.avx512.mask.storeu.b.512(ptr %ptr2, <64 x i8> %x1, i64 -1)
130 declare void @llvm.x86.avx512.mask.storeu.w.512(ptr, <32 x i16>, i32)
132 define void @test_int_x86_avx512_mask_storeu_w_512(ptr %ptr1, ptr %ptr2, <32 x i16> %x1, i32 %x2) nounwind {
133 ; X86-LABEL: test_int_x86_avx512_mask_storeu_w_512:
135 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
136 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
137 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x0c]
138 ; X86-NEXT: vmovdqu16 %zmm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0xff,0x49,0x7f,0x01]
139 ; X86-NEXT: vmovdqu64 %zmm0, (%eax) # encoding: [0x62,0xf1,0xfe,0x48,0x7f,0x00]
140 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
141 ; X86-NEXT: retl # encoding: [0xc3]
143 ; X64-LABEL: test_int_x86_avx512_mask_storeu_w_512:
145 ; X64-NEXT: kmovd %edx, %k1 # encoding: [0xc5,0xfb,0x92,0xca]
146 ; X64-NEXT: vmovdqu16 %zmm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0xff,0x49,0x7f,0x07]
147 ; X64-NEXT: vmovdqu64 %zmm0, (%rsi) # encoding: [0x62,0xf1,0xfe,0x48,0x7f,0x06]
148 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
149 ; X64-NEXT: retq # encoding: [0xc3]
150 call void @llvm.x86.avx512.mask.storeu.w.512(ptr %ptr1, <32 x i16> %x1, i32 %x2)
151 call void @llvm.x86.avx512.mask.storeu.w.512(ptr %ptr2, <32 x i16> %x1, i32 -1)
155 declare <32 x i16> @llvm.x86.avx512.mask.loadu.w.512(ptr, <32 x i16>, i32)
157 define { <32 x i16>, <32 x i16>, <32 x i16> } @test_int_x86_avx512_mask_loadu_w_512(ptr %ptr, ptr %ptr2, <32 x i16> %x1, i32 %mask) nounwind {
158 ; X86-LABEL: test_int_x86_avx512_mask_loadu_w_512:
160 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
161 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
162 ; X86-NEXT: vmovdqu64 (%ecx), %zmm0 # encoding: [0x62,0xf1,0xfe,0x48,0x6f,0x01]
163 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x0c]
164 ; X86-NEXT: vpblendmw (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x66,0x08]
165 ; X86-NEXT: vmovdqu16 (%ecx), %zmm2 {%k1} {z} # encoding: [0x62,0xf1,0xff,0xc9,0x6f,0x11]
166 ; X86-NEXT: retl # encoding: [0xc3]
168 ; X64-LABEL: test_int_x86_avx512_mask_loadu_w_512:
170 ; X64-NEXT: vmovdqu64 (%rdi), %zmm0 # encoding: [0x62,0xf1,0xfe,0x48,0x6f,0x07]
171 ; X64-NEXT: kmovd %edx, %k1 # encoding: [0xc5,0xfb,0x92,0xca]
172 ; X64-NEXT: vpblendmw (%rsi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x66,0x0e]
173 ; X64-NEXT: vmovdqu16 (%rdi), %zmm2 {%k1} {z} # encoding: [0x62,0xf1,0xff,0xc9,0x6f,0x17]
174 ; X64-NEXT: retq # encoding: [0xc3]
175 %res = call <32 x i16> @llvm.x86.avx512.mask.loadu.w.512(ptr %ptr, <32 x i16> %x1, i32 -1)
176 %res1 = call <32 x i16> @llvm.x86.avx512.mask.loadu.w.512(ptr %ptr2, <32 x i16> %res, i32 %mask)
177 %res2 = call <32 x i16> @llvm.x86.avx512.mask.loadu.w.512(ptr %ptr, <32 x i16> zeroinitializer, i32 %mask)
178 %res3 = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } poison, <32 x i16> %res, 0
179 %res4 = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } %res3, <32 x i16> %res1, 1
180 %res5 = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } %res4, <32 x i16> %res2, 2
181 ret { <32 x i16>, <32 x i16>, <32 x i16> } %res5
184 declare <64 x i8> @llvm.x86.avx512.mask.loadu.b.512(ptr, <64 x i8>, i64)
186 define { <64 x i8>, <64 x i8>, <64 x i8> } @test_int_x86_avx512_mask_loadu_b_512(ptr %ptr, ptr %ptr2, <64 x i8> %x1, i64 %mask) nounwind {
187 ; X86-LABEL: test_int_x86_avx512_mask_loadu_b_512:
189 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
190 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x0c]
191 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
192 ; X86-NEXT: vmovdqu64 (%ecx), %zmm0 # encoding: [0x62,0xf1,0xfe,0x48,0x6f,0x01]
193 ; X86-NEXT: vpblendmb (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x66,0x08]
194 ; X86-NEXT: vmovdqu8 (%ecx), %zmm2 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0xc9,0x6f,0x11]
195 ; X86-NEXT: retl # encoding: [0xc3]
197 ; X64-LABEL: test_int_x86_avx512_mask_loadu_b_512:
199 ; X64-NEXT: vmovdqu64 (%rdi), %zmm0 # encoding: [0x62,0xf1,0xfe,0x48,0x6f,0x07]
200 ; X64-NEXT: kmovq %rdx, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xca]
201 ; X64-NEXT: vpblendmb (%rsi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x66,0x0e]
202 ; X64-NEXT: vmovdqu8 (%rdi), %zmm2 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0xc9,0x6f,0x17]
203 ; X64-NEXT: retq # encoding: [0xc3]
204 %res = call <64 x i8> @llvm.x86.avx512.mask.loadu.b.512(ptr %ptr, <64 x i8> %x1, i64 -1)
205 %res1 = call <64 x i8> @llvm.x86.avx512.mask.loadu.b.512(ptr %ptr2, <64 x i8> %res, i64 %mask)
206 %res2 = call <64 x i8> @llvm.x86.avx512.mask.loadu.b.512(ptr %ptr, <64 x i8> zeroinitializer, i64 %mask)
207 %res3 = insertvalue { <64 x i8>, <64 x i8>, <64 x i8> } poison, <64 x i8> %res, 0
208 %res4 = insertvalue { <64 x i8>, <64 x i8>, <64 x i8> } %res3, <64 x i8> %res1, 1
209 %res5 = insertvalue { <64 x i8>, <64 x i8>, <64 x i8> } %res4, <64 x i8> %res2, 2
210 ret { <64 x i8>, <64 x i8>, <64 x i8> } %res5
213 declare <8 x i64> @llvm.x86.avx512.psll.dq.512(<8 x i64>, i32)
215 define { <8 x i64>, <8 x i64> } @test_int_x86_avx512_psll_dq_512(<8 x i64> %x0) nounwind {
216 ; CHECK-LABEL: test_int_x86_avx512_psll_dq_512:
218 ; CHECK-NEXT: vpslldq $8, %zmm0, %zmm2 # encoding: [0x62,0xf1,0x6d,0x48,0x73,0xf8,0x08]
219 ; CHECK-NEXT: # zmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zmm0[0,1,2,3,4,5,6,7],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[16,17,18,19,20,21,22,23],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[32,33,34,35,36,37,38,39],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[48,49,50,51,52,53,54,55]
220 ; CHECK-NEXT: vpslldq $4, %zmm0, %zmm1 # encoding: [0x62,0xf1,0x75,0x48,0x73,0xf8,0x04]
221 ; CHECK-NEXT: # zmm1 = zero,zero,zero,zero,zmm0[0,1,2,3,4,5,6,7,8,9,10,11],zero,zero,zero,zero,zmm0[16,17,18,19,20,21,22,23,24,25,26,27],zero,zero,zero,zero,zmm0[32,33,34,35,36,37,38,39,40,41,42,43],zero,zero,zero,zero,zmm0[48,49,50,51,52,53,54,55,56,57,58,59]
222 ; CHECK-NEXT: vmovapd %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2]
223 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
224 %res = call <8 x i64> @llvm.x86.avx512.psll.dq.512(<8 x i64> %x0, i32 8)
225 %res1 = call <8 x i64> @llvm.x86.avx512.psll.dq.512(<8 x i64> %x0, i32 4)
226 %res2 = insertvalue { <8 x i64>, <8 x i64> } poison, <8 x i64> %res, 0
227 %res3 = insertvalue { <8 x i64>, <8 x i64> } %res2, <8 x i64> %res1, 1
228 ret { <8 x i64>, <8 x i64> } %res3
231 define <8 x i64> @test_int_x86_avx512_psll_load_dq_512(ptr %p0) nounwind {
232 ; X86-LABEL: test_int_x86_avx512_psll_load_dq_512:
234 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
235 ; X86-NEXT: vpslldq $4, (%eax), %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x73,0x38,0x04]
236 ; X86-NEXT: # zmm0 = zero,zero,zero,zero,mem[0,1,2,3,4,5,6,7,8,9,10,11],zero,zero,zero,zero,mem[16,17,18,19,20,21,22,23,24,25,26,27],zero,zero,zero,zero,mem[32,33,34,35,36,37,38,39,40,41,42,43],zero,zero,zero,zero,mem[48,49,50,51,52,53,54,55,56,57,58,59]
237 ; X86-NEXT: retl # encoding: [0xc3]
239 ; X64-LABEL: test_int_x86_avx512_psll_load_dq_512:
241 ; X64-NEXT: vpslldq $4, (%rdi), %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x73,0x3f,0x04]
242 ; X64-NEXT: # zmm0 = zero,zero,zero,zero,mem[0,1,2,3,4,5,6,7,8,9,10,11],zero,zero,zero,zero,mem[16,17,18,19,20,21,22,23,24,25,26,27],zero,zero,zero,zero,mem[32,33,34,35,36,37,38,39,40,41,42,43],zero,zero,zero,zero,mem[48,49,50,51,52,53,54,55,56,57,58,59]
243 ; X64-NEXT: retq # encoding: [0xc3]
244 %x0 = load <8 x i64>, ptr%p0
245 %res = call <8 x i64> @llvm.x86.avx512.psll.dq.512(<8 x i64> %x0, i32 4)
249 declare <8 x i64> @llvm.x86.avx512.psrl.dq.512(<8 x i64>, i32)
251 define { <8 x i64>, <8 x i64> } @test_int_x86_avx512_psrl_dq_512(<8 x i64> %x0) nounwind {
252 ; CHECK-LABEL: test_int_x86_avx512_psrl_dq_512:
254 ; CHECK-NEXT: vpsrldq $8, %zmm0, %zmm2 # encoding: [0x62,0xf1,0x6d,0x48,0x73,0xd8,0x08]
255 ; CHECK-NEXT: # zmm2 = zmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[40,41,42,43,44,45,46,47],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[56,57,58,59,60,61,62,63],zero,zero,zero,zero,zero,zero,zero,zero
256 ; CHECK-NEXT: vpsrldq $4, %zmm0, %zmm1 # encoding: [0x62,0xf1,0x75,0x48,0x73,0xd8,0x04]
257 ; CHECK-NEXT: # zmm1 = zmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zmm0[20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero,zero,zmm0[36,37,38,39,40,41,42,43,44,45,46,47],zero,zero,zero,zero,zmm0[52,53,54,55,56,57,58,59,60,61,62,63],zero,zero,zero,zero
258 ; CHECK-NEXT: vmovapd %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2]
259 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
260 %res = call <8 x i64> @llvm.x86.avx512.psrl.dq.512(<8 x i64> %x0, i32 8)
261 %res1 = call <8 x i64> @llvm.x86.avx512.psrl.dq.512(<8 x i64> %x0, i32 4)
262 %res2 = insertvalue { <8 x i64>, <8 x i64> } poison, <8 x i64> %res, 0
263 %res3 = insertvalue { <8 x i64>, <8 x i64> } %res2, <8 x i64> %res1, 1
264 ret { <8 x i64>, <8 x i64> } %res3
267 define <8 x i64> @test_int_x86_avx512_psrl_load_dq_512(ptr %p0) nounwind {
268 ; X86-LABEL: test_int_x86_avx512_psrl_load_dq_512:
270 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
271 ; X86-NEXT: vpsrldq $4, (%eax), %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x73,0x18,0x04]
272 ; X86-NEXT: # zmm0 = mem[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,mem[20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero,zero,mem[36,37,38,39,40,41,42,43,44,45,46,47],zero,zero,zero,zero,mem[52,53,54,55,56,57,58,59,60,61,62,63],zero,zero,zero,zero
273 ; X86-NEXT: retl # encoding: [0xc3]
275 ; X64-LABEL: test_int_x86_avx512_psrl_load_dq_512:
277 ; X64-NEXT: vpsrldq $4, (%rdi), %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x73,0x1f,0x04]
278 ; X64-NEXT: # zmm0 = mem[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,mem[20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero,zero,mem[36,37,38,39,40,41,42,43,44,45,46,47],zero,zero,zero,zero,mem[52,53,54,55,56,57,58,59,60,61,62,63],zero,zero,zero,zero
279 ; X64-NEXT: retq # encoding: [0xc3]
280 %x0 = load <8 x i64>, ptr%p0
281 %res = call <8 x i64> @llvm.x86.avx512.psrl.dq.512(<8 x i64> %x0, i32 4)
285 declare <64 x i8> @llvm.x86.avx512.mask.palignr.512(<64 x i8>, <64 x i8>, i32, <64 x i8>, i64)
287 define <64 x i8> @test_int_x86_avx512_palignr_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x3) nounwind {
288 ; CHECK-LABEL: test_int_x86_avx512_palignr_512:
290 ; CHECK-NEXT: vpalignr $2, %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf3,0x7d,0x48,0x0f,0xc1,0x02]
291 ; CHECK-NEXT: # zmm0 = zmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zmm0[0,1],zmm1[18,19,20,21,22,23,24,25,26,27,28,29,30,31],zmm0[16,17],zmm1[34,35,36,37,38,39,40,41,42,43,44,45,46,47],zmm0[32,33],zmm1[50,51,52,53,54,55,56,57,58,59,60,61,62,63],zmm0[48,49]
292 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
293 %res = call <64 x i8> @llvm.x86.avx512.mask.palignr.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <64 x i8> %x3, i64 -1)
297 define <64 x i8> @test_int_x86_avx512_mask_palignr_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x3, i64 %x4) nounwind {
298 ; X86-LABEL: test_int_x86_avx512_mask_palignr_512:
300 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
301 ; X86-NEXT: vpalignr $2, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x0f,0xd1,0x02]
302 ; X86-NEXT: # zmm2 {%k1} = zmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zmm0[0,1],zmm1[18,19,20,21,22,23,24,25,26,27,28,29,30,31],zmm0[16,17],zmm1[34,35,36,37,38,39,40,41,42,43,44,45,46,47],zmm0[32,33],zmm1[50,51,52,53,54,55,56,57,58,59,60,61,62,63],zmm0[48,49]
303 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
304 ; X86-NEXT: retl # encoding: [0xc3]
306 ; X64-LABEL: test_int_x86_avx512_mask_palignr_512:
308 ; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
309 ; X64-NEXT: vpalignr $2, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x0f,0xd1,0x02]
310 ; X64-NEXT: # zmm2 {%k1} = zmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zmm0[0,1],zmm1[18,19,20,21,22,23,24,25,26,27,28,29,30,31],zmm0[16,17],zmm1[34,35,36,37,38,39,40,41,42,43,44,45,46,47],zmm0[32,33],zmm1[50,51,52,53,54,55,56,57,58,59,60,61,62,63],zmm0[48,49]
311 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
312 ; X64-NEXT: retq # encoding: [0xc3]
313 %res = call <64 x i8> @llvm.x86.avx512.mask.palignr.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <64 x i8> %x3, i64 %x4)
317 define <64 x i8> @test_int_x86_avx512_maskz_palignr_512(<64 x i8> %x0, <64 x i8> %x1, i64 %x4) nounwind {
318 ; X86-LABEL: test_int_x86_avx512_maskz_palignr_512:
320 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
321 ; X86-NEXT: vpalignr $2, %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xc9,0x0f,0xc1,0x02]
322 ; X86-NEXT: # zmm0 {%k1} {z} = zmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zmm0[0,1],zmm1[18,19,20,21,22,23,24,25,26,27,28,29,30,31],zmm0[16,17],zmm1[34,35,36,37,38,39,40,41,42,43,44,45,46,47],zmm0[32,33],zmm1[50,51,52,53,54,55,56,57,58,59,60,61,62,63],zmm0[48,49]
323 ; X86-NEXT: retl # encoding: [0xc3]
325 ; X64-LABEL: test_int_x86_avx512_maskz_palignr_512:
327 ; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
328 ; X64-NEXT: vpalignr $2, %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xc9,0x0f,0xc1,0x02]
329 ; X64-NEXT: # zmm0 {%k1} {z} = zmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zmm0[0,1],zmm1[18,19,20,21,22,23,24,25,26,27,28,29,30,31],zmm0[16,17],zmm1[34,35,36,37,38,39,40,41,42,43,44,45,46,47],zmm0[32,33],zmm1[50,51,52,53,54,55,56,57,58,59,60,61,62,63],zmm0[48,49]
330 ; X64-NEXT: retq # encoding: [0xc3]
331 %res = call <64 x i8> @llvm.x86.avx512.mask.palignr.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <64 x i8> zeroinitializer, i64 %x4)
335 declare <32 x i16> @llvm.x86.avx512.mask.pshufh.w.512(<32 x i16>, i32, <32 x i16>, i32)
337 define <32 x i16> @test_int_x86_avx512_pshufh_w_512(<32 x i16> %x0, i32 %x1, <32 x i16> %x2) nounwind {
338 ; CHECK-LABEL: test_int_x86_avx512_pshufh_w_512:
340 ; CHECK-NEXT: vpshufhw $3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7e,0x48,0x70,0xc0,0x03]
341 ; CHECK-NEXT: # zmm0 = zmm0[0,1,2,3,7,4,4,4,8,9,10,11,15,12,12,12,16,17,18,19,23,20,20,20,24,25,26,27,31,28,28,28]
342 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
343 %res = call <32 x i16> @llvm.x86.avx512.mask.pshufh.w.512(<32 x i16> %x0, i32 3, <32 x i16> %x2, i32 -1)
347 define <32 x i16> @test_int_x86_avx512_mask_pshufh_w_512(<32 x i16> %x0, i32 %x1, <32 x i16> %x2, i32 %x3) nounwind {
348 ; X86-LABEL: test_int_x86_avx512_mask_pshufh_w_512:
350 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
351 ; X86-NEXT: vpshufhw $3, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x49,0x70,0xc8,0x03]
352 ; X86-NEXT: # zmm1 {%k1} = zmm0[0,1,2,3,7,4,4,4,8,9,10,11,15,12,12,12,16,17,18,19,23,20,20,20,24,25,26,27,31,28,28,28]
353 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
354 ; X86-NEXT: retl # encoding: [0xc3]
356 ; X64-LABEL: test_int_x86_avx512_mask_pshufh_w_512:
358 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
359 ; X64-NEXT: vpshufhw $3, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x49,0x70,0xc8,0x03]
360 ; X64-NEXT: # zmm1 {%k1} = zmm0[0,1,2,3,7,4,4,4,8,9,10,11,15,12,12,12,16,17,18,19,23,20,20,20,24,25,26,27,31,28,28,28]
361 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
362 ; X64-NEXT: retq # encoding: [0xc3]
363 %res = call <32 x i16> @llvm.x86.avx512.mask.pshufh.w.512(<32 x i16> %x0, i32 3, <32 x i16> %x2, i32 %x3)
367 define <32 x i16> @test_int_x86_avx512_maskz_pshufh_w_512(<32 x i16> %x0, i32 %x3) nounwind {
368 ; X86-LABEL: test_int_x86_avx512_maskz_pshufh_w_512:
370 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
371 ; X86-NEXT: vpshufhw $3, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0xc9,0x70,0xc0,0x03]
372 ; X86-NEXT: # zmm0 {%k1} {z} = zmm0[0,1,2,3,7,4,4,4,8,9,10,11,15,12,12,12,16,17,18,19,23,20,20,20,24,25,26,27,31,28,28,28]
373 ; X86-NEXT: retl # encoding: [0xc3]
375 ; X64-LABEL: test_int_x86_avx512_maskz_pshufh_w_512:
377 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
378 ; X64-NEXT: vpshufhw $3, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0xc9,0x70,0xc0,0x03]
379 ; X64-NEXT: # zmm0 {%k1} {z} = zmm0[0,1,2,3,7,4,4,4,8,9,10,11,15,12,12,12,16,17,18,19,23,20,20,20,24,25,26,27,31,28,28,28]
380 ; X64-NEXT: retq # encoding: [0xc3]
381 %res = call <32 x i16> @llvm.x86.avx512.mask.pshufh.w.512(<32 x i16> %x0, i32 3, <32 x i16> zeroinitializer, i32 %x3)
385 declare <32 x i16> @llvm.x86.avx512.mask.pshufl.w.512(<32 x i16>, i32, <32 x i16>, i32)
387 define <32 x i16> @test_int_x86_avx512_pshufl_w_512(<32 x i16> %x0, i32 %x1, <32 x i16> %x2) nounwind {
388 ; CHECK-LABEL: test_int_x86_avx512_pshufl_w_512:
390 ; CHECK-NEXT: vpshuflw $3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7f,0x48,0x70,0xc0,0x03]
391 ; CHECK-NEXT: # zmm0 = zmm0[3,0,0,0,4,5,6,7,11,8,8,8,12,13,14,15,19,16,16,16,20,21,22,23,27,24,24,24,28,29,30,31]
392 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
393 %res = call <32 x i16> @llvm.x86.avx512.mask.pshufl.w.512(<32 x i16> %x0, i32 3, <32 x i16> %x2, i32 -1)
397 define <32 x i16> @test_int_x86_avx512_mask_pshufl_w_512(<32 x i16> %x0, i32 %x1, <32 x i16> %x2, i32 %x3) nounwind {
398 ; X86-LABEL: test_int_x86_avx512_mask_pshufl_w_512:
400 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
401 ; X86-NEXT: vpshuflw $3, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7f,0x49,0x70,0xc8,0x03]
402 ; X86-NEXT: # zmm1 {%k1} = zmm0[3,0,0,0,4,5,6,7,11,8,8,8,12,13,14,15,19,16,16,16,20,21,22,23,27,24,24,24,28,29,30,31]
403 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
404 ; X86-NEXT: retl # encoding: [0xc3]
406 ; X64-LABEL: test_int_x86_avx512_mask_pshufl_w_512:
408 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
409 ; X64-NEXT: vpshuflw $3, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7f,0x49,0x70,0xc8,0x03]
410 ; X64-NEXT: # zmm1 {%k1} = zmm0[3,0,0,0,4,5,6,7,11,8,8,8,12,13,14,15,19,16,16,16,20,21,22,23,27,24,24,24,28,29,30,31]
411 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
412 ; X64-NEXT: retq # encoding: [0xc3]
413 %res = call <32 x i16> @llvm.x86.avx512.mask.pshufl.w.512(<32 x i16> %x0, i32 3, <32 x i16> %x2, i32 %x3)
417 define <32 x i16> @test_int_x86_avx512_maskz_pshufl_w_512(<32 x i16> %x0, i32 %x3) nounwind {
418 ; X86-LABEL: test_int_x86_avx512_maskz_pshufl_w_512:
420 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
421 ; X86-NEXT: vpshuflw $3, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0xc9,0x70,0xc0,0x03]
422 ; X86-NEXT: # zmm0 {%k1} {z} = zmm0[3,0,0,0,4,5,6,7,11,8,8,8,12,13,14,15,19,16,16,16,20,21,22,23,27,24,24,24,28,29,30,31]
423 ; X86-NEXT: retl # encoding: [0xc3]
425 ; X64-LABEL: test_int_x86_avx512_maskz_pshufl_w_512:
427 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
428 ; X64-NEXT: vpshuflw $3, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0xc9,0x70,0xc0,0x03]
429 ; X64-NEXT: # zmm0 {%k1} {z} = zmm0[3,0,0,0,4,5,6,7,11,8,8,8,12,13,14,15,19,16,16,16,20,21,22,23,27,24,24,24,28,29,30,31]
430 ; X64-NEXT: retq # encoding: [0xc3]
431 %res = call <32 x i16> @llvm.x86.avx512.mask.pshufl.w.512(<32 x i16> %x0, i32 3, <32 x i16> zeroinitializer, i32 %x3)
435 define i64 @test_pcmpeq_b(<64 x i8> %a, <64 x i8> %b) nounwind {
436 ; X86-LABEL: test_pcmpeq_b:
438 ; X86-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x74,0xc1]
439 ; X86-NEXT: kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20]
440 ; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
441 ; X86-NEXT: kmovd %k1, %edx # encoding: [0xc5,0xfb,0x93,0xd1]
442 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
443 ; X86-NEXT: retl # encoding: [0xc3]
445 ; X64-LABEL: test_pcmpeq_b:
447 ; X64-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x74,0xc1]
448 ; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0]
449 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
450 ; X64-NEXT: retq # encoding: [0xc3]
451 %res = call i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8> %a, <64 x i8> %b, i64 -1)
455 define i64 @test_mask_pcmpeq_b(<64 x i8> %a, <64 x i8> %b, i64 %mask) nounwind {
456 ; X86-LABEL: test_mask_pcmpeq_b:
458 ; X86-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x74,0xc1]
459 ; X86-NEXT: kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20]
460 ; X86-NEXT: kmovd %k1, %edx # encoding: [0xc5,0xfb,0x93,0xd1]
461 ; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
462 ; X86-NEXT: andl {{[0-9]+}}(%esp), %eax # encoding: [0x23,0x44,0x24,0x04]
463 ; X86-NEXT: andl {{[0-9]+}}(%esp), %edx # encoding: [0x23,0x54,0x24,0x08]
464 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
465 ; X86-NEXT: retl # encoding: [0xc3]
467 ; X64-LABEL: test_mask_pcmpeq_b:
469 ; X64-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x74,0xc1]
470 ; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0]
471 ; X64-NEXT: andq %rdi, %rax # encoding: [0x48,0x21,0xf8]
472 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
473 ; X64-NEXT: retq # encoding: [0xc3]
474 %res = call i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8> %a, <64 x i8> %b, i64 %mask)
478 declare i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8>, <64 x i8>, i64)
480 define i32 @test_pcmpeq_w(<32 x i16> %a, <32 x i16> %b) nounwind {
481 ; CHECK-LABEL: test_pcmpeq_w:
483 ; CHECK-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x75,0xc1]
484 ; CHECK-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
485 ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
486 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
487 %res = call i32 @llvm.x86.avx512.mask.pcmpeq.w.512(<32 x i16> %a, <32 x i16> %b, i32 -1)
491 define i32 @test_mask_pcmpeq_w(<32 x i16> %a, <32 x i16> %b, i32 %mask) nounwind {
492 ; X86-LABEL: test_mask_pcmpeq_w:
494 ; X86-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x75,0xc1]
495 ; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
496 ; X86-NEXT: andl {{[0-9]+}}(%esp), %eax # encoding: [0x23,0x44,0x24,0x04]
497 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
498 ; X86-NEXT: retl # encoding: [0xc3]
500 ; X64-LABEL: test_mask_pcmpeq_w:
502 ; X64-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x75,0xc1]
503 ; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
504 ; X64-NEXT: andl %edi, %eax # encoding: [0x21,0xf8]
505 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
506 ; X64-NEXT: retq # encoding: [0xc3]
507 %res = call i32 @llvm.x86.avx512.mask.pcmpeq.w.512(<32 x i16> %a, <32 x i16> %b, i32 %mask)
511 declare i32 @llvm.x86.avx512.mask.pcmpeq.w.512(<32 x i16>, <32 x i16>, i32)
513 define i64 @test_pcmpgt_b(<64 x i8> %a, <64 x i8> %b) nounwind {
514 ; X86-LABEL: test_pcmpgt_b:
516 ; X86-NEXT: vpcmpgtb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x64,0xc1]
517 ; X86-NEXT: kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20]
518 ; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
519 ; X86-NEXT: kmovd %k1, %edx # encoding: [0xc5,0xfb,0x93,0xd1]
520 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
521 ; X86-NEXT: retl # encoding: [0xc3]
523 ; X64-LABEL: test_pcmpgt_b:
525 ; X64-NEXT: vpcmpgtb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x64,0xc1]
526 ; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0]
527 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
528 ; X64-NEXT: retq # encoding: [0xc3]
529 %res = call i64 @llvm.x86.avx512.mask.pcmpgt.b.512(<64 x i8> %a, <64 x i8> %b, i64 -1)
533 define i64 @test_mask_pcmpgt_b(<64 x i8> %a, <64 x i8> %b, i64 %mask) nounwind {
534 ; X86-LABEL: test_mask_pcmpgt_b:
536 ; X86-NEXT: vpcmpgtb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x64,0xc1]
537 ; X86-NEXT: kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20]
538 ; X86-NEXT: kmovd %k1, %edx # encoding: [0xc5,0xfb,0x93,0xd1]
539 ; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
540 ; X86-NEXT: andl {{[0-9]+}}(%esp), %eax # encoding: [0x23,0x44,0x24,0x04]
541 ; X86-NEXT: andl {{[0-9]+}}(%esp), %edx # encoding: [0x23,0x54,0x24,0x08]
542 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
543 ; X86-NEXT: retl # encoding: [0xc3]
545 ; X64-LABEL: test_mask_pcmpgt_b:
547 ; X64-NEXT: vpcmpgtb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x64,0xc1]
548 ; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0]
549 ; X64-NEXT: andq %rdi, %rax # encoding: [0x48,0x21,0xf8]
550 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
551 ; X64-NEXT: retq # encoding: [0xc3]
552 %res = call i64 @llvm.x86.avx512.mask.pcmpgt.b.512(<64 x i8> %a, <64 x i8> %b, i64 %mask)
556 declare i64 @llvm.x86.avx512.mask.pcmpgt.b.512(<64 x i8>, <64 x i8>, i64)
558 define i32 @test_pcmpgt_w(<32 x i16> %a, <32 x i16> %b) nounwind {
559 ; CHECK-LABEL: test_pcmpgt_w:
561 ; CHECK-NEXT: vpcmpgtw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x65,0xc1]
562 ; CHECK-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
563 ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
564 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
565 %res = call i32 @llvm.x86.avx512.mask.pcmpgt.w.512(<32 x i16> %a, <32 x i16> %b, i32 -1)
569 define i32 @test_mask_pcmpgt_w(<32 x i16> %a, <32 x i16> %b, i32 %mask) nounwind {
570 ; X86-LABEL: test_mask_pcmpgt_w:
572 ; X86-NEXT: vpcmpgtw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x65,0xc1]
573 ; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
574 ; X86-NEXT: andl {{[0-9]+}}(%esp), %eax # encoding: [0x23,0x44,0x24,0x04]
575 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
576 ; X86-NEXT: retl # encoding: [0xc3]
578 ; X64-LABEL: test_mask_pcmpgt_w:
580 ; X64-NEXT: vpcmpgtw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x65,0xc1]
581 ; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
582 ; X64-NEXT: andl %edi, %eax # encoding: [0x21,0xf8]
583 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
584 ; X64-NEXT: retq # encoding: [0xc3]
585 %res = call i32 @llvm.x86.avx512.mask.pcmpgt.w.512(<32 x i16> %a, <32 x i16> %b, i32 %mask)
589 declare i32 @llvm.x86.avx512.mask.pcmpgt.w.512(<32 x i16>, <32 x i16>, i32)
591 declare <64 x i8> @llvm.x86.avx512.mask.punpckhb.w.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
593 define <64 x i8> @test_int_x86_avx512_punpckhb_w_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2) nounwind {
594 ; CHECK-LABEL: test_int_x86_avx512_punpckhb_w_512:
596 ; CHECK-NEXT: vpunpckhbw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x68,0xc1]
597 ; CHECK-NEXT: # zmm0 = zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31],zmm0[40],zmm1[40],zmm0[41],zmm1[41],zmm0[42],zmm1[42],zmm0[43],zmm1[43],zmm0[44],zmm1[44],zmm0[45],zmm1[45],zmm0[46],zmm1[46],zmm0[47],zmm1[47],zmm0[56],zmm1[56],zmm0[57],zmm1[57],zmm0[58],zmm1[58],zmm0[59],zmm1[59],zmm0[60],zmm1[60],zmm0[61],zmm1[61],zmm0[62],zmm1[62],zmm0[63],zmm1[63]
598 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
599 %res = call <64 x i8> @llvm.x86.avx512.mask.punpckhb.w.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
603 define <64 x i8> @test_int_x86_avx512_mask_punpckhb_w_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) nounwind {
604 ; X86-LABEL: test_int_x86_avx512_mask_punpckhb_w_512:
606 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
607 ; X86-NEXT: vpunpckhbw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x68,0xd1]
608 ; X86-NEXT: # zmm2 {%k1} = zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31],zmm0[40],zmm1[40],zmm0[41],zmm1[41],zmm0[42],zmm1[42],zmm0[43],zmm1[43],zmm0[44],zmm1[44],zmm0[45],zmm1[45],zmm0[46],zmm1[46],zmm0[47],zmm1[47],zmm0[56],zmm1[56],zmm0[57],zmm1[57],zmm0[58],zmm1[58],zmm0[59],zmm1[59],zmm0[60],zmm1[60],zmm0[61],zmm1[61],zmm0[62],zmm1[62],zmm0[63],zmm1[63]
609 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
610 ; X86-NEXT: retl # encoding: [0xc3]
612 ; X64-LABEL: test_int_x86_avx512_mask_punpckhb_w_512:
614 ; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
615 ; X64-NEXT: vpunpckhbw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x68,0xd1]
616 ; X64-NEXT: # zmm2 {%k1} = zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31],zmm0[40],zmm1[40],zmm0[41],zmm1[41],zmm0[42],zmm1[42],zmm0[43],zmm1[43],zmm0[44],zmm1[44],zmm0[45],zmm1[45],zmm0[46],zmm1[46],zmm0[47],zmm1[47],zmm0[56],zmm1[56],zmm0[57],zmm1[57],zmm0[58],zmm1[58],zmm0[59],zmm1[59],zmm0[60],zmm1[60],zmm0[61],zmm1[61],zmm0[62],zmm1[62],zmm0[63],zmm1[63]
617 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
618 ; X64-NEXT: retq # encoding: [0xc3]
619 %res = call <64 x i8> @llvm.x86.avx512.mask.punpckhb.w.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
623 declare <64 x i8> @llvm.x86.avx512.mask.punpcklb.w.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
625 define <64 x i8> @test_int_x86_avx512_punpcklb_w_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2) nounwind {
626 ; CHECK-LABEL: test_int_x86_avx512_punpcklb_w_512:
628 ; CHECK-NEXT: vpunpcklbw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x60,0xc1]
629 ; CHECK-NEXT: # zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[32],zmm1[32],zmm0[33],zmm1[33],zmm0[34],zmm1[34],zmm0[35],zmm1[35],zmm0[36],zmm1[36],zmm0[37],zmm1[37],zmm0[38],zmm1[38],zmm0[39],zmm1[39],zmm0[48],zmm1[48],zmm0[49],zmm1[49],zmm0[50],zmm1[50],zmm0[51],zmm1[51],zmm0[52],zmm1[52],zmm0[53],zmm1[53],zmm0[54],zmm1[54],zmm0[55],zmm1[55]
630 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
631 %res = call <64 x i8> @llvm.x86.avx512.mask.punpcklb.w.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
635 define <64 x i8> @test_int_x86_avx512_mask_punpcklb_w_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) nounwind {
636 ; X86-LABEL: test_int_x86_avx512_mask_punpcklb_w_512:
638 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
639 ; X86-NEXT: vpunpcklbw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x60,0xd1]
640 ; X86-NEXT: # zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[32],zmm1[32],zmm0[33],zmm1[33],zmm0[34],zmm1[34],zmm0[35],zmm1[35],zmm0[36],zmm1[36],zmm0[37],zmm1[37],zmm0[38],zmm1[38],zmm0[39],zmm1[39],zmm0[48],zmm1[48],zmm0[49],zmm1[49],zmm0[50],zmm1[50],zmm0[51],zmm1[51],zmm0[52],zmm1[52],zmm0[53],zmm1[53],zmm0[54],zmm1[54],zmm0[55],zmm1[55]
641 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
642 ; X86-NEXT: retl # encoding: [0xc3]
644 ; X64-LABEL: test_int_x86_avx512_mask_punpcklb_w_512:
646 ; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
647 ; X64-NEXT: vpunpcklbw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x60,0xd1]
648 ; X64-NEXT: # zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[32],zmm1[32],zmm0[33],zmm1[33],zmm0[34],zmm1[34],zmm0[35],zmm1[35],zmm0[36],zmm1[36],zmm0[37],zmm1[37],zmm0[38],zmm1[38],zmm0[39],zmm1[39],zmm0[48],zmm1[48],zmm0[49],zmm1[49],zmm0[50],zmm1[50],zmm0[51],zmm1[51],zmm0[52],zmm1[52],zmm0[53],zmm1[53],zmm0[54],zmm1[54],zmm0[55],zmm1[55]
649 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
650 ; X64-NEXT: retq # encoding: [0xc3]
651 %res = call <64 x i8> @llvm.x86.avx512.mask.punpcklb.w.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
655 declare <32 x i16> @llvm.x86.avx512.mask.punpckhw.d.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
657 define <32 x i16> @test_int_x86_avx512_punpckhw_d_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2) nounwind {
658 ; CHECK-LABEL: test_int_x86_avx512_punpckhw_d_512:
660 ; CHECK-NEXT: vpunpckhwd %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x69,0xc1]
661 ; CHECK-NEXT: # zmm0 = zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31]
662 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
663 %res = call <32 x i16> @llvm.x86.avx512.mask.punpckhw.d.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
667 define <32 x i16> @test_int_x86_avx512_mask_punpckhw_d_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) nounwind {
668 ; X86-LABEL: test_int_x86_avx512_mask_punpckhw_d_512:
670 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
671 ; X86-NEXT: vpunpckhwd %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x69,0xd1]
672 ; X86-NEXT: # zmm2 {%k1} = zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31]
673 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
674 ; X86-NEXT: retl # encoding: [0xc3]
676 ; X64-LABEL: test_int_x86_avx512_mask_punpckhw_d_512:
678 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
679 ; X64-NEXT: vpunpckhwd %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x69,0xd1]
680 ; X64-NEXT: # zmm2 {%k1} = zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31]
681 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
682 ; X64-NEXT: retq # encoding: [0xc3]
683 %res = call <32 x i16> @llvm.x86.avx512.mask.punpckhw.d.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
687 declare <32 x i16> @llvm.x86.avx512.mask.punpcklw.d.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
689 define <32 x i16> @test_int_x86_avx512_punpcklw_d_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2) nounwind {
690 ; CHECK-LABEL: test_int_x86_avx512_punpcklw_d_512:
692 ; CHECK-NEXT: vpunpcklwd %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x61,0xc1]
693 ; CHECK-NEXT: # zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27]
694 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
695 %res = call <32 x i16> @llvm.x86.avx512.mask.punpcklw.d.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
699 define <32 x i16> @test_int_x86_avx512_mask_punpcklw_d_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) nounwind {
700 ; X86-LABEL: test_int_x86_avx512_mask_punpcklw_d_512:
702 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
703 ; X86-NEXT: vpunpcklwd %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x61,0xd1]
704 ; X86-NEXT: # zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27]
705 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
706 ; X86-NEXT: retl # encoding: [0xc3]
708 ; X64-LABEL: test_int_x86_avx512_mask_punpcklw_d_512:
710 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
711 ; X64-NEXT: vpunpcklwd %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x61,0xd1]
712 ; X64-NEXT: # zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27]
713 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
714 ; X64-NEXT: retq # encoding: [0xc3]
715 %res = call <32 x i16> @llvm.x86.avx512.mask.punpcklw.d.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
719 declare <64 x i8> @llvm.x86.avx512.mask.pmaxs.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
721 define <64 x i8> @test_int_x86_avx512_pmaxs_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2) nounwind {
722 ; CHECK-LABEL: test_int_x86_avx512_pmaxs_b_512:
724 ; CHECK-NEXT: vpmaxsb %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x3c,0xc1]
725 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
726 %res = call <64 x i8> @llvm.x86.avx512.mask.pmaxs.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
730 define <64 x i8> @test_int_x86_avx512_mask_pmaxs_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) nounwind {
731 ; X86-LABEL: test_int_x86_avx512_mask_pmaxs_b_512:
733 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
734 ; X86-NEXT: vpmaxsb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x3c,0xd1]
735 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
736 ; X86-NEXT: retl # encoding: [0xc3]
738 ; X64-LABEL: test_int_x86_avx512_mask_pmaxs_b_512:
740 ; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
741 ; X64-NEXT: vpmaxsb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x3c,0xd1]
742 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
743 ; X64-NEXT: retq # encoding: [0xc3]
744 %res = call <64 x i8> @llvm.x86.avx512.mask.pmaxs.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
748 declare <32 x i16> @llvm.x86.avx512.mask.pmaxs.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
750 define <32 x i16> @test_int_x86_avx512_pmaxs_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2) nounwind {
751 ; CHECK-LABEL: test_int_x86_avx512_pmaxs_w_512:
753 ; CHECK-NEXT: vpmaxsw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xee,0xc1]
754 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
755 %res = call <32 x i16> @llvm.x86.avx512.mask.pmaxs.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
759 define <32 x i16> @test_int_x86_avx512_mask_pmaxs_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) nounwind {
760 ; X86-LABEL: test_int_x86_avx512_mask_pmaxs_w_512:
762 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
763 ; X86-NEXT: vpmaxsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xee,0xd1]
764 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
765 ; X86-NEXT: retl # encoding: [0xc3]
767 ; X64-LABEL: test_int_x86_avx512_mask_pmaxs_w_512:
769 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
770 ; X64-NEXT: vpmaxsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xee,0xd1]
771 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
772 ; X64-NEXT: retq # encoding: [0xc3]
773 %res = call <32 x i16> @llvm.x86.avx512.mask.pmaxs.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
777 declare <64 x i8> @llvm.x86.avx512.mask.pmaxu.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
779 define <64 x i8> @test_int_x86_avx512_pmaxu_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2) nounwind {
780 ; CHECK-LABEL: test_int_x86_avx512_pmaxu_b_512:
782 ; CHECK-NEXT: vpmaxub %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xde,0xc1]
783 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
784 %res = call <64 x i8> @llvm.x86.avx512.mask.pmaxu.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
788 define <64 x i8> @test_int_x86_avx512_mask_pmaxu_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) nounwind {
789 ; X86-LABEL: test_int_x86_avx512_mask_pmaxu_b_512:
791 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
792 ; X86-NEXT: vpmaxub %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xde,0xd1]
793 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
794 ; X86-NEXT: retl # encoding: [0xc3]
796 ; X64-LABEL: test_int_x86_avx512_mask_pmaxu_b_512:
798 ; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
799 ; X64-NEXT: vpmaxub %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xde,0xd1]
800 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
801 ; X64-NEXT: retq # encoding: [0xc3]
802 %res = call <64 x i8> @llvm.x86.avx512.mask.pmaxu.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
806 declare <32 x i16> @llvm.x86.avx512.mask.pmaxu.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
808 define <32 x i16> @test_int_x86_avx512_pmaxu_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2) nounwind {
809 ; CHECK-LABEL: test_int_x86_avx512_pmaxu_w_512:
811 ; CHECK-NEXT: vpmaxuw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x3e,0xc1]
812 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
813 %res = call <32 x i16> @llvm.x86.avx512.mask.pmaxu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
817 define <32 x i16> @test_int_x86_avx512_mask_pmaxu_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) nounwind {
818 ; X86-LABEL: test_int_x86_avx512_mask_pmaxu_w_512:
820 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
821 ; X86-NEXT: vpmaxuw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x3e,0xd1]
822 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
823 ; X86-NEXT: retl # encoding: [0xc3]
825 ; X64-LABEL: test_int_x86_avx512_mask_pmaxu_w_512:
827 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
828 ; X64-NEXT: vpmaxuw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x3e,0xd1]
829 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
830 ; X64-NEXT: retq # encoding: [0xc3]
831 %res = call <32 x i16> @llvm.x86.avx512.mask.pmaxu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
835 declare <64 x i8> @llvm.x86.avx512.mask.pmins.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
837 define <64 x i8> @test_int_x86_avx512_pmins_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2) nounwind {
838 ; CHECK-LABEL: test_int_x86_avx512_pmins_b_512:
840 ; CHECK-NEXT: vpminsb %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x38,0xc1]
841 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
842 %res = call <64 x i8> @llvm.x86.avx512.mask.pmins.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
846 define <64 x i8> @test_int_x86_avx512_mask_pmins_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) nounwind {
847 ; X86-LABEL: test_int_x86_avx512_mask_pmins_b_512:
849 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
850 ; X86-NEXT: vpminsb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x38,0xd1]
851 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
852 ; X86-NEXT: retl # encoding: [0xc3]
854 ; X64-LABEL: test_int_x86_avx512_mask_pmins_b_512:
856 ; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
857 ; X64-NEXT: vpminsb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x38,0xd1]
858 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
859 ; X64-NEXT: retq # encoding: [0xc3]
860 %res = call <64 x i8> @llvm.x86.avx512.mask.pmins.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
864 declare <32 x i16> @llvm.x86.avx512.mask.pmins.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
866 define <32 x i16> @test_int_x86_avx512_pmins_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2) nounwind {
867 ; CHECK-LABEL: test_int_x86_avx512_pmins_w_512:
869 ; CHECK-NEXT: vpminsw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xea,0xc1]
870 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
871 %res = call <32 x i16> @llvm.x86.avx512.mask.pmins.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
875 define <32 x i16> @test_int_x86_avx512_mask_pmins_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) nounwind {
876 ; X86-LABEL: test_int_x86_avx512_mask_pmins_w_512:
878 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
879 ; X86-NEXT: vpminsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xea,0xd1]
880 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
881 ; X86-NEXT: retl # encoding: [0xc3]
883 ; X64-LABEL: test_int_x86_avx512_mask_pmins_w_512:
885 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
886 ; X64-NEXT: vpminsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xea,0xd1]
887 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
888 ; X64-NEXT: retq # encoding: [0xc3]
889 %res = call <32 x i16> @llvm.x86.avx512.mask.pmins.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
893 declare <64 x i8> @llvm.x86.avx512.mask.pminu.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
895 define <64 x i8> @test_int_x86_avx512_pminu_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2) nounwind {
896 ; CHECK-LABEL: test_int_x86_avx512_pminu_b_512:
898 ; CHECK-NEXT: vpminub %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xda,0xc1]
899 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
900 %res = call <64 x i8> @llvm.x86.avx512.mask.pminu.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
904 define <64 x i8> @test_int_x86_avx512_mask_pminu_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) nounwind {
905 ; X86-LABEL: test_int_x86_avx512_mask_pminu_b_512:
907 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
908 ; X86-NEXT: vpminub %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xda,0xd1]
909 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
910 ; X86-NEXT: retl # encoding: [0xc3]
912 ; X64-LABEL: test_int_x86_avx512_mask_pminu_b_512:
914 ; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
915 ; X64-NEXT: vpminub %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xda,0xd1]
916 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
917 ; X64-NEXT: retq # encoding: [0xc3]
918 %res = call <64 x i8> @llvm.x86.avx512.mask.pminu.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
922 declare <32 x i16> @llvm.x86.avx512.mask.pminu.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
924 define <32 x i16> @test_int_x86_avx512_pminu_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2) nounwind {
925 ; CHECK-LABEL: test_int_x86_avx512_pminu_w_512:
927 ; CHECK-NEXT: vpminuw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x3a,0xc1]
928 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
929 %res = call <32 x i16> @llvm.x86.avx512.mask.pminu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
933 define <32 x i16> @test_int_x86_avx512_mask_pminu_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) nounwind {
934 ; X86-LABEL: test_int_x86_avx512_mask_pminu_w_512:
936 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
937 ; X86-NEXT: vpminuw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x3a,0xd1]
938 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
939 ; X86-NEXT: retl # encoding: [0xc3]
941 ; X64-LABEL: test_int_x86_avx512_mask_pminu_w_512:
943 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
944 ; X64-NEXT: vpminuw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x3a,0xd1]
945 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
946 ; X64-NEXT: retq # encoding: [0xc3]
947 %res = call <32 x i16> @llvm.x86.avx512.mask.pminu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
951 declare <32 x i16> @llvm.x86.avx512.mask.pmovzxb.w.512(<32 x i8>, <32 x i16>, i32)
953 define <32 x i16> @test_int_x86_avx512_pmovzxb_w_512(<32 x i8> %x0, <32 x i16> %x1) nounwind {
954 ; CHECK-LABEL: test_int_x86_avx512_pmovzxb_w_512:
956 ; CHECK-NEXT: vpmovzxbw %ymm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x30,0xc0]
957 ; CHECK-NEXT: # zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
958 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
959 %res = call <32 x i16> @llvm.x86.avx512.mask.pmovzxb.w.512(<32 x i8> %x0, <32 x i16> %x1, i32 -1)
963 define <32 x i16> @test_int_x86_avx512_mask_pmovzxb_w_512(<32 x i8> %x0, <32 x i16> %x1, i32 %x2) nounwind {
964 ; X86-LABEL: test_int_x86_avx512_mask_pmovzxb_w_512:
966 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
967 ; X86-NEXT: vpmovzxbw %ymm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x30,0xc8]
968 ; X86-NEXT: # zmm1 {%k1} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
969 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
970 ; X86-NEXT: retl # encoding: [0xc3]
972 ; X64-LABEL: test_int_x86_avx512_mask_pmovzxb_w_512:
974 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
975 ; X64-NEXT: vpmovzxbw %ymm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x30,0xc8]
976 ; X64-NEXT: # zmm1 {%k1} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
977 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
978 ; X64-NEXT: retq # encoding: [0xc3]
979 %res = call <32 x i16> @llvm.x86.avx512.mask.pmovzxb.w.512(<32 x i8> %x0, <32 x i16> %x1, i32 %x2)
983 define <32 x i16> @test_int_x86_avx512_maskz_pmovzxb_w_512(<32 x i8> %x0, i32 %x2) nounwind {
984 ; X86-LABEL: test_int_x86_avx512_maskz_pmovzxb_w_512:
986 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
987 ; X86-NEXT: vpmovzxbw %ymm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x30,0xc0]
988 ; X86-NEXT: # zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
989 ; X86-NEXT: retl # encoding: [0xc3]
991 ; X64-LABEL: test_int_x86_avx512_maskz_pmovzxb_w_512:
993 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
994 ; X64-NEXT: vpmovzxbw %ymm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x30,0xc0]
995 ; X64-NEXT: # zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
996 ; X64-NEXT: retq # encoding: [0xc3]
997 %res = call <32 x i16> @llvm.x86.avx512.mask.pmovzxb.w.512(<32 x i8> %x0, <32 x i16> zeroinitializer, i32 %x2)
1001 declare <32 x i16> @llvm.x86.avx512.mask.pmovsxb.w.512(<32 x i8>, <32 x i16>, i32)
1003 define <32 x i16> @test_int_x86_avx512_pmovsxb_w_512(<32 x i8> %x0, <32 x i16> %x1) nounwind {
1004 ; CHECK-LABEL: test_int_x86_avx512_pmovsxb_w_512:
1006 ; CHECK-NEXT: vpmovsxbw %ymm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x20,0xc0]
1007 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1008 %res = call <32 x i16> @llvm.x86.avx512.mask.pmovsxb.w.512(<32 x i8> %x0, <32 x i16> %x1, i32 -1)
1012 define <32 x i16> @test_int_x86_avx512_mask_pmovsxb_w_512(<32 x i8> %x0, <32 x i16> %x1, i32 %x2) nounwind {
1013 ; X86-LABEL: test_int_x86_avx512_mask_pmovsxb_w_512:
1015 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1016 ; X86-NEXT: vpmovsxbw %ymm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x20,0xc8]
1017 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1018 ; X86-NEXT: retl # encoding: [0xc3]
1020 ; X64-LABEL: test_int_x86_avx512_mask_pmovsxb_w_512:
1022 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1023 ; X64-NEXT: vpmovsxbw %ymm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x20,0xc8]
1024 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1025 ; X64-NEXT: retq # encoding: [0xc3]
1026 %res = call <32 x i16> @llvm.x86.avx512.mask.pmovsxb.w.512(<32 x i8> %x0, <32 x i16> %x1, i32 %x2)
1030 define <32 x i16> @test_int_x86_avx512_maskz_pmovsxb_w_512(<32 x i8> %x0, i32 %x2) nounwind {
1031 ; X86-LABEL: test_int_x86_avx512_maskz_pmovsxb_w_512:
1033 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1034 ; X86-NEXT: vpmovsxbw %ymm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x20,0xc0]
1035 ; X86-NEXT: retl # encoding: [0xc3]
1037 ; X64-LABEL: test_int_x86_avx512_maskz_pmovsxb_w_512:
1039 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1040 ; X64-NEXT: vpmovsxbw %ymm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x20,0xc0]
1041 ; X64-NEXT: retq # encoding: [0xc3]
1042 %res = call <32 x i16> @llvm.x86.avx512.mask.pmovsxb.w.512(<32 x i8> %x0, <32 x i16> zeroinitializer, i32 %x2)
1046 declare <32 x i16> @llvm.x86.avx512.mask.psrl.w.512(<32 x i16>, <8 x i16>, <32 x i16>, i32)
1048 define <32 x i16> @test_int_x86_avx512_psrl_w_512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2) nounwind {
1049 ; CHECK-LABEL: test_int_x86_avx512_psrl_w_512:
1051 ; CHECK-NEXT: vpsrlw %xmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xd1,0xc1]
1052 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1053 %res = call <32 x i16> @llvm.x86.avx512.mask.psrl.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 -1)
1057 define <32 x i16> @test_int_x86_avx512_mask_psrl_w_512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 %x3) nounwind {
1058 ; X86-LABEL: test_int_x86_avx512_mask_psrl_w_512:
1060 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1061 ; X86-NEXT: vpsrlw %xmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xd1,0xd1]
1062 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1063 ; X86-NEXT: retl # encoding: [0xc3]
1065 ; X64-LABEL: test_int_x86_avx512_mask_psrl_w_512:
1067 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1068 ; X64-NEXT: vpsrlw %xmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xd1,0xd1]
1069 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1070 ; X64-NEXT: retq # encoding: [0xc3]
1071 %res = call <32 x i16> @llvm.x86.avx512.mask.psrl.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 %x3)
1075 define <32 x i16> @test_int_x86_avx512_maskz_psrl_w_512(<32 x i16> %x0, <8 x i16> %x1, i32 %x3) nounwind {
1076 ; X86-LABEL: test_int_x86_avx512_maskz_psrl_w_512:
1078 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1079 ; X86-NEXT: vpsrlw %xmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xd1,0xc1]
1080 ; X86-NEXT: retl # encoding: [0xc3]
1082 ; X64-LABEL: test_int_x86_avx512_maskz_psrl_w_512:
1084 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1085 ; X64-NEXT: vpsrlw %xmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xd1,0xc1]
1086 ; X64-NEXT: retq # encoding: [0xc3]
1087 %res = call <32 x i16> @llvm.x86.avx512.mask.psrl.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3)
1091 declare <32 x i16> @llvm.x86.avx512.mask.psrl.wi.512(<32 x i16>, i32, <32 x i16>, i32)
1093 define { <32 x i16>, <32 x i16>, <32 x i16> } @test_int_x86_avx512_mask_psrl_wi_512(<32 x i16> %x0, i32 %x1, <32 x i16> %x2, i32 %x3) nounwind {
1094 ; X86-LABEL: test_int_x86_avx512_mask_psrl_wi_512:
1096 ; X86-NEXT: vmovdqa64 %zmm1, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd9]
1097 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
1098 ; X86-NEXT: vpsrlw $3, %zmm0, %zmm3 {%k1} # encoding: [0x62,0xf1,0x65,0x49,0x71,0xd0,0x03]
1099 ; X86-NEXT: vpsrlw $4, %zmm0, %zmm1 # encoding: [0x62,0xf1,0x75,0x48,0x71,0xd0,0x04]
1100 ; X86-NEXT: vpsrlw $5, %zmm0, %zmm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0xc9,0x71,0xd0,0x05]
1101 ; X86-NEXT: vmovdqa64 %zmm3, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc3]
1102 ; X86-NEXT: retl # encoding: [0xc3]
1104 ; X64-LABEL: test_int_x86_avx512_mask_psrl_wi_512:
1106 ; X64-NEXT: vmovdqa64 %zmm1, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd9]
1107 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1108 ; X64-NEXT: vpsrlw $3, %zmm0, %zmm3 {%k1} # encoding: [0x62,0xf1,0x65,0x49,0x71,0xd0,0x03]
1109 ; X64-NEXT: vpsrlw $4, %zmm0, %zmm1 # encoding: [0x62,0xf1,0x75,0x48,0x71,0xd0,0x04]
1110 ; X64-NEXT: vpsrlw $5, %zmm0, %zmm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0xc9,0x71,0xd0,0x05]
1111 ; X64-NEXT: vmovdqa64 %zmm3, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc3]
1112 ; X64-NEXT: retq # encoding: [0xc3]
1113 %res = call <32 x i16> @llvm.x86.avx512.mask.psrl.wi.512(<32 x i16> %x0, i32 3, <32 x i16> %x2, i32 %x3)
1114 %res1 = call <32 x i16> @llvm.x86.avx512.mask.psrl.wi.512(<32 x i16> %x0, i32 4, <32 x i16> %x2, i32 -1)
1115 %res2 = call <32 x i16> @llvm.x86.avx512.mask.psrl.wi.512(<32 x i16> %x0, i32 5, <32 x i16> zeroinitializer, i32 %x3)
1116 %res3 = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } poison, <32 x i16> %res, 0
1117 %res4 = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } %res3, <32 x i16> %res1, 1
1118 %res5 = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } %res4, <32 x i16> %res2, 2
1119 ret { <32 x i16>, <32 x i16>, <32 x i16> } %res5
1122 declare <32 x i16> @llvm.x86.avx512.mask.psra.w.512(<32 x i16>, <8 x i16>, <32 x i16>, i32)
1124 define <32 x i16> @test_int_x86_avx512_psra_w_512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2) nounwind {
1125 ; CHECK-LABEL: test_int_x86_avx512_psra_w_512:
1127 ; CHECK-NEXT: vpsraw %xmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xe1,0xc1]
1128 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1129 %res = call <32 x i16> @llvm.x86.avx512.mask.psra.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 -1)
1133 define <32 x i16> @test_int_x86_avx512_mask_psra_w_512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 %x3) nounwind {
1134 ; X86-LABEL: test_int_x86_avx512_mask_psra_w_512:
1136 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1137 ; X86-NEXT: vpsraw %xmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe1,0xd1]
1138 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1139 ; X86-NEXT: retl # encoding: [0xc3]
1141 ; X64-LABEL: test_int_x86_avx512_mask_psra_w_512:
1143 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1144 ; X64-NEXT: vpsraw %xmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe1,0xd1]
1145 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1146 ; X64-NEXT: retq # encoding: [0xc3]
1147 %res = call <32 x i16> @llvm.x86.avx512.mask.psra.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 %x3)
1151 define <32 x i16> @test_int_x86_avx512_maskz_psra_w_512(<32 x i16> %x0, <8 x i16> %x1, i32 %x3) nounwind {
1152 ; X86-LABEL: test_int_x86_avx512_maskz_psra_w_512:
1154 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1155 ; X86-NEXT: vpsraw %xmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe1,0xc1]
1156 ; X86-NEXT: retl # encoding: [0xc3]
1158 ; X64-LABEL: test_int_x86_avx512_maskz_psra_w_512:
1160 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1161 ; X64-NEXT: vpsraw %xmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe1,0xc1]
1162 ; X64-NEXT: retq # encoding: [0xc3]
1163 %res = call <32 x i16> @llvm.x86.avx512.mask.psra.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3)
1167 declare <32 x i16> @llvm.x86.avx512.mask.psra.wi.512(<32 x i16>, i32, <32 x i16>, i32)
1169 define { <32 x i16>, <32 x i16>, <32 x i16> } @test_int_x86_avx512_mask_psra_wi_512(<32 x i16> %x0, i32 %x1, <32 x i16> %x2, i32 %x3) nounwind {
1170 ; X86-LABEL: test_int_x86_avx512_mask_psra_wi_512:
1172 ; X86-NEXT: vmovdqa64 %zmm1, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd9]
1173 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
1174 ; X86-NEXT: vpsraw $3, %zmm0, %zmm3 {%k1} # encoding: [0x62,0xf1,0x65,0x49,0x71,0xe0,0x03]
1175 ; X86-NEXT: vpsraw $4, %zmm0, %zmm1 {%k1} {z} # encoding: [0x62,0xf1,0x75,0xc9,0x71,0xe0,0x04]
1176 ; X86-NEXT: vpsraw $5, %zmm0, %zmm2 # encoding: [0x62,0xf1,0x6d,0x48,0x71,0xe0,0x05]
1177 ; X86-NEXT: vmovdqa64 %zmm3, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc3]
1178 ; X86-NEXT: retl # encoding: [0xc3]
1180 ; X64-LABEL: test_int_x86_avx512_mask_psra_wi_512:
1182 ; X64-NEXT: vmovdqa64 %zmm1, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd9]
1183 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1184 ; X64-NEXT: vpsraw $3, %zmm0, %zmm3 {%k1} # encoding: [0x62,0xf1,0x65,0x49,0x71,0xe0,0x03]
1185 ; X64-NEXT: vpsraw $4, %zmm0, %zmm1 {%k1} {z} # encoding: [0x62,0xf1,0x75,0xc9,0x71,0xe0,0x04]
1186 ; X64-NEXT: vpsraw $5, %zmm0, %zmm2 # encoding: [0x62,0xf1,0x6d,0x48,0x71,0xe0,0x05]
1187 ; X64-NEXT: vmovdqa64 %zmm3, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc3]
1188 ; X64-NEXT: retq # encoding: [0xc3]
1189 %res = call <32 x i16> @llvm.x86.avx512.mask.psra.wi.512(<32 x i16> %x0, i32 3, <32 x i16> %x2, i32 %x3)
1190 %res1 = call <32 x i16> @llvm.x86.avx512.mask.psra.wi.512(<32 x i16> %x0, i32 4, <32 x i16> zeroinitializer, i32 %x3)
1191 %res2 = call <32 x i16> @llvm.x86.avx512.mask.psra.wi.512(<32 x i16> %x0, i32 5, <32 x i16> %x2, i32 -1)
1192 %res3 = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } poison, <32 x i16> %res, 0
1193 %res4 = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } %res3, <32 x i16> %res1, 1
1194 %res5 = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } %res4, <32 x i16> %res2, 2
1195 ret { <32 x i16>, <32 x i16>, <32 x i16> } %res5
1198 declare <32 x i16> @llvm.x86.avx512.mask.psll.w.512(<32 x i16>, <8 x i16>, <32 x i16>, i32)
1200 define <32 x i16> @test_int_x86_avx512_psll_w_512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2) nounwind {
1201 ; CHECK-LABEL: test_int_x86_avx512_psll_w_512:
1203 ; CHECK-NEXT: vpsllw %xmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xf1,0xc1]
1204 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1205 %res = call <32 x i16> @llvm.x86.avx512.mask.psll.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 -1)
1209 define <32 x i16> @test_int_x86_avx512_mask_psll_w_512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 %x3) nounwind {
1210 ; X86-LABEL: test_int_x86_avx512_mask_psll_w_512:
1212 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1213 ; X86-NEXT: vpsllw %xmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xf1,0xd1]
1214 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1215 ; X86-NEXT: retl # encoding: [0xc3]
1217 ; X64-LABEL: test_int_x86_avx512_mask_psll_w_512:
1219 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1220 ; X64-NEXT: vpsllw %xmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xf1,0xd1]
1221 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1222 ; X64-NEXT: retq # encoding: [0xc3]
1223 %res = call <32 x i16> @llvm.x86.avx512.mask.psll.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 %x3)
1227 define <32 x i16> @test_int_x86_avx512_maskz_psll_w_512(<32 x i16> %x0, <8 x i16> %x1, i32 %x3) nounwind {
1228 ; X86-LABEL: test_int_x86_avx512_maskz_psll_w_512:
1230 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1231 ; X86-NEXT: vpsllw %xmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xf1,0xc1]
1232 ; X86-NEXT: retl # encoding: [0xc3]
1234 ; X64-LABEL: test_int_x86_avx512_maskz_psll_w_512:
1236 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1237 ; X64-NEXT: vpsllw %xmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xf1,0xc1]
1238 ; X64-NEXT: retq # encoding: [0xc3]
1239 %res = call <32 x i16> @llvm.x86.avx512.mask.psll.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3)
1243 declare <32 x i16> @llvm.x86.avx512.mask.psll.wi.512(<32 x i16>, i32, <32 x i16>, i32)
1245 define { <32 x i16>, <32 x i16>, <32 x i16> } @test_int_x86_avx512_mask_psll_wi_512(<32 x i16> %x0, i32 %x1, <32 x i16> %x2, i32 %x3) nounwind {
1246 ; X86-LABEL: test_int_x86_avx512_mask_psll_wi_512:
1248 ; X86-NEXT: vmovdqa64 %zmm1, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd9]
1249 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
1250 ; X86-NEXT: vpsllw $3, %zmm0, %zmm3 {%k1} # encoding: [0x62,0xf1,0x65,0x49,0x71,0xf0,0x03]
1251 ; X86-NEXT: vpsllw $4, %zmm0, %zmm1 {%k1} {z} # encoding: [0x62,0xf1,0x75,0xc9,0x71,0xf0,0x04]
1252 ; X86-NEXT: vpsllw $5, %zmm0, %zmm2 # encoding: [0x62,0xf1,0x6d,0x48,0x71,0xf0,0x05]
1253 ; X86-NEXT: vmovdqa64 %zmm3, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc3]
1254 ; X86-NEXT: retl # encoding: [0xc3]
1256 ; X64-LABEL: test_int_x86_avx512_mask_psll_wi_512:
1258 ; X64-NEXT: vmovdqa64 %zmm1, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd9]
1259 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1260 ; X64-NEXT: vpsllw $3, %zmm0, %zmm3 {%k1} # encoding: [0x62,0xf1,0x65,0x49,0x71,0xf0,0x03]
1261 ; X64-NEXT: vpsllw $4, %zmm0, %zmm1 {%k1} {z} # encoding: [0x62,0xf1,0x75,0xc9,0x71,0xf0,0x04]
1262 ; X64-NEXT: vpsllw $5, %zmm0, %zmm2 # encoding: [0x62,0xf1,0x6d,0x48,0x71,0xf0,0x05]
1263 ; X64-NEXT: vmovdqa64 %zmm3, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc3]
1264 ; X64-NEXT: retq # encoding: [0xc3]
1265 %res = call <32 x i16> @llvm.x86.avx512.mask.psll.wi.512(<32 x i16> %x0, i32 3, <32 x i16> %x2, i32 %x3)
1266 %res1 = call <32 x i16> @llvm.x86.avx512.mask.psll.wi.512(<32 x i16> %x0, i32 4, <32 x i16> zeroinitializer, i32 %x3)
1267 %res2 = call <32 x i16> @llvm.x86.avx512.mask.psll.wi.512(<32 x i16> %x0, i32 5, <32 x i16> %x2, i32 -1)
1268 %res3 = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } poison, <32 x i16> %res, 0
1269 %res4 = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } %res3, <32 x i16> %res1, 1
1270 %res5 = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } %res4, <32 x i16> %res2, 2
1271 ret { <32 x i16>, <32 x i16>, <32 x i16> } %res5
1274 declare <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
1276 define <64 x i8> @test_int_x86_avx512_pshuf_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2) nounwind {
1277 ; CHECK-LABEL: test_int_x86_avx512_pshuf_b_512:
1279 ; CHECK-NEXT: vpshufb %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x00,0xc1]
1280 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1281 %res = call <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
1285 define <64 x i8> @test_int_x86_avx512_mask_pshuf_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) nounwind {
1286 ; X86-LABEL: test_int_x86_avx512_mask_pshuf_b_512:
1288 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
1289 ; X86-NEXT: vpshufb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x00,0xd1]
1290 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1291 ; X86-NEXT: retl # encoding: [0xc3]
1293 ; X64-LABEL: test_int_x86_avx512_mask_pshuf_b_512:
1295 ; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
1296 ; X64-NEXT: vpshufb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x00,0xd1]
1297 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1298 ; X64-NEXT: retq # encoding: [0xc3]
1299 %res = call <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
1304 declare <64 x i8> @llvm.x86.avx512.cvtmask2b.512(i64)
1306 define <64 x i8> @test_int_x86_avx512_cvtmask2b_512(i64 %x0) nounwind {
1307 ; X86-LABEL: test_int_x86_avx512_cvtmask2b_512:
1309 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k0 # encoding: [0xc4,0xe1,0xf8,0x90,0x44,0x24,0x04]
1310 ; X86-NEXT: vpmovm2b %k0, %zmm0 # encoding: [0x62,0xf2,0x7e,0x48,0x28,0xc0]
1311 ; X86-NEXT: retl # encoding: [0xc3]
1313 ; X64-LABEL: test_int_x86_avx512_cvtmask2b_512:
1315 ; X64-NEXT: kmovq %rdi, %k0 # encoding: [0xc4,0xe1,0xfb,0x92,0xc7]
1316 ; X64-NEXT: vpmovm2b %k0, %zmm0 # encoding: [0x62,0xf2,0x7e,0x48,0x28,0xc0]
1317 ; X64-NEXT: retq # encoding: [0xc3]
1318 %res = call <64 x i8> @llvm.x86.avx512.cvtmask2b.512(i64 %x0)
1322 declare <32 x i16> @llvm.x86.avx512.cvtmask2w.512(i32)
1324 define <32 x i16> @test_int_x86_avx512_cvtmask2w_512(i32 %x0) nounwind {
1325 ; X86-LABEL: test_int_x86_avx512_cvtmask2w_512:
1327 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k0 # encoding: [0xc4,0xe1,0xf9,0x90,0x44,0x24,0x04]
1328 ; X86-NEXT: vpmovm2w %k0, %zmm0 # encoding: [0x62,0xf2,0xfe,0x48,0x28,0xc0]
1329 ; X86-NEXT: retl # encoding: [0xc3]
1331 ; X64-LABEL: test_int_x86_avx512_cvtmask2w_512:
1333 ; X64-NEXT: kmovd %edi, %k0 # encoding: [0xc5,0xfb,0x92,0xc7]
1334 ; X64-NEXT: vpmovm2w %k0, %zmm0 # encoding: [0x62,0xf2,0xfe,0x48,0x28,0xc0]
1335 ; X64-NEXT: retq # encoding: [0xc3]
1336 %res = call <32 x i16> @llvm.x86.avx512.cvtmask2w.512(i32 %x0)
1339 define <32 x i16> @test_mask_packs_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) nounwind {
1340 ; CHECK-LABEL: test_mask_packs_epi32_rr_512:
1342 ; CHECK-NEXT: vpackssdw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x6b,0xc1]
1343 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1344 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1)
1348 define <32 x i16> @test_mask_packs_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) nounwind {
1349 ; X86-LABEL: test_mask_packs_epi32_rrk_512:
1351 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1352 ; X86-NEXT: vpackssdw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x6b,0xd1]
1353 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1354 ; X86-NEXT: retl # encoding: [0xc3]
1356 ; X64-LABEL: test_mask_packs_epi32_rrk_512:
1358 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1359 ; X64-NEXT: vpackssdw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x6b,0xd1]
1360 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1361 ; X64-NEXT: retq # encoding: [0xc3]
1362 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask)
1366 define <32 x i16> @test_mask_packs_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, i32 %mask) nounwind {
1367 ; X86-LABEL: test_mask_packs_epi32_rrkz_512:
1369 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1370 ; X86-NEXT: vpackssdw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x6b,0xc1]
1371 ; X86-NEXT: retl # encoding: [0xc3]
1373 ; X64-LABEL: test_mask_packs_epi32_rrkz_512:
1375 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1376 ; X64-NEXT: vpackssdw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x6b,0xc1]
1377 ; X64-NEXT: retq # encoding: [0xc3]
1378 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask)
1382 define <32 x i16> @test_mask_packs_epi32_rm_512(<16 x i32> %a, ptr %ptr_b) nounwind {
1383 ; X86-LABEL: test_mask_packs_epi32_rm_512:
1385 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1386 ; X86-NEXT: vpackssdw (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x6b,0x00]
1387 ; X86-NEXT: retl # encoding: [0xc3]
1389 ; X64-LABEL: test_mask_packs_epi32_rm_512:
1391 ; X64-NEXT: vpackssdw (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x6b,0x07]
1392 ; X64-NEXT: retq # encoding: [0xc3]
1393 %b = load <16 x i32>, ptr %ptr_b
1394 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1)
1398 define <32 x i16> @test_mask_packs_epi32_rmk_512(<16 x i32> %a, ptr %ptr_b, <32 x i16> %passThru, i32 %mask) nounwind {
1399 ; X86-LABEL: test_mask_packs_epi32_rmk_512:
1401 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1402 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
1403 ; X86-NEXT: vpackssdw (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x6b,0x08]
1404 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1405 ; X86-NEXT: retl # encoding: [0xc3]
1407 ; X64-LABEL: test_mask_packs_epi32_rmk_512:
1409 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1410 ; X64-NEXT: vpackssdw (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x6b,0x0f]
1411 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1412 ; X64-NEXT: retq # encoding: [0xc3]
1413 %b = load <16 x i32>, ptr %ptr_b
1414 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask)
1418 define <32 x i16> @test_mask_packs_epi32_rmkz_512(<16 x i32> %a, ptr %ptr_b, i32 %mask) nounwind {
1419 ; X86-LABEL: test_mask_packs_epi32_rmkz_512:
1421 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1422 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
1423 ; X86-NEXT: vpackssdw (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x6b,0x00]
1424 ; X86-NEXT: retl # encoding: [0xc3]
1426 ; X64-LABEL: test_mask_packs_epi32_rmkz_512:
1428 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1429 ; X64-NEXT: vpackssdw (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x6b,0x07]
1430 ; X64-NEXT: retq # encoding: [0xc3]
1431 %b = load <16 x i32>, ptr %ptr_b
1432 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask)
1436 define <32 x i16> @test_mask_packs_epi32_rmb_512(<16 x i32> %a, ptr %ptr_b) nounwind {
1437 ; X86-LABEL: test_mask_packs_epi32_rmb_512:
1439 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1440 ; X86-NEXT: vpackssdw (%eax){1to16}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x58,0x6b,0x00]
1441 ; X86-NEXT: retl # encoding: [0xc3]
1443 ; X64-LABEL: test_mask_packs_epi32_rmb_512:
1445 ; X64-NEXT: vpackssdw (%rdi){1to16}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x58,0x6b,0x07]
1446 ; X64-NEXT: retq # encoding: [0xc3]
1447 %q = load i32, ptr %ptr_b
1448 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
1449 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
1450 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1)
1454 define <32 x i16> @test_mask_packs_epi32_rmbk_512(<16 x i32> %a, ptr %ptr_b, <32 x i16> %passThru, i32 %mask) nounwind {
1455 ; X86-LABEL: test_mask_packs_epi32_rmbk_512:
1457 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1458 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
1459 ; X86-NEXT: vpackssdw (%eax){1to16}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x59,0x6b,0x08]
1460 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1461 ; X86-NEXT: retl # encoding: [0xc3]
1463 ; X64-LABEL: test_mask_packs_epi32_rmbk_512:
1465 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1466 ; X64-NEXT: vpackssdw (%rdi){1to16}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x59,0x6b,0x0f]
1467 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1468 ; X64-NEXT: retq # encoding: [0xc3]
1469 %q = load i32, ptr %ptr_b
1470 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
1471 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
1472 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask)
1476 define <32 x i16> @test_mask_packs_epi32_rmbkz_512(<16 x i32> %a, ptr %ptr_b, i32 %mask) nounwind {
1477 ; X86-LABEL: test_mask_packs_epi32_rmbkz_512:
1479 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1480 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
1481 ; X86-NEXT: vpackssdw (%eax){1to16}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xd9,0x6b,0x00]
1482 ; X86-NEXT: retl # encoding: [0xc3]
1484 ; X64-LABEL: test_mask_packs_epi32_rmbkz_512:
1486 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1487 ; X64-NEXT: vpackssdw (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xd9,0x6b,0x07]
1488 ; X64-NEXT: retq # encoding: [0xc3]
1489 %q = load i32, ptr %ptr_b
1490 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
1491 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
1492 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask)
1496 declare <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32>, <16 x i32>, <32 x i16>, i32)
1498 define <64 x i8> @test_mask_packs_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) nounwind {
1499 ; CHECK-LABEL: test_mask_packs_epi16_rr_512:
1501 ; CHECK-NEXT: vpacksswb %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x63,0xc1]
1502 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1503 %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1)
1507 define <64 x i8> @test_mask_packs_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) nounwind {
1508 ; X86-LABEL: test_mask_packs_epi16_rrk_512:
1510 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
1511 ; X86-NEXT: vpacksswb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x63,0xd1]
1512 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1513 ; X86-NEXT: retl # encoding: [0xc3]
1515 ; X64-LABEL: test_mask_packs_epi16_rrk_512:
1517 ; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
1518 ; X64-NEXT: vpacksswb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x63,0xd1]
1519 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1520 ; X64-NEXT: retq # encoding: [0xc3]
1521 %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask)
1525 define <64 x i8> @test_mask_packs_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i64 %mask) nounwind {
1526 ; X86-LABEL: test_mask_packs_epi16_rrkz_512:
1528 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
1529 ; X86-NEXT: vpacksswb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x63,0xc1]
1530 ; X86-NEXT: retl # encoding: [0xc3]
1532 ; X64-LABEL: test_mask_packs_epi16_rrkz_512:
1534 ; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
1535 ; X64-NEXT: vpacksswb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x63,0xc1]
1536 ; X64-NEXT: retq # encoding: [0xc3]
1537 %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask)
1541 define <64 x i8> @test_mask_packs_epi16_rm_512(<32 x i16> %a, ptr %ptr_b) nounwind {
1542 ; X86-LABEL: test_mask_packs_epi16_rm_512:
1544 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1545 ; X86-NEXT: vpacksswb (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x63,0x00]
1546 ; X86-NEXT: retl # encoding: [0xc3]
1548 ; X64-LABEL: test_mask_packs_epi16_rm_512:
1550 ; X64-NEXT: vpacksswb (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x63,0x07]
1551 ; X64-NEXT: retq # encoding: [0xc3]
1552 %b = load <32 x i16>, ptr %ptr_b
1553 %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1)
1557 define <64 x i8> @test_mask_packs_epi16_rmk_512(<32 x i16> %a, ptr %ptr_b, <64 x i8> %passThru, i64 %mask) nounwind {
1558 ; X86-LABEL: test_mask_packs_epi16_rmk_512:
1560 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1561 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08]
1562 ; X86-NEXT: vpacksswb (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x63,0x08]
1563 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1564 ; X86-NEXT: retl # encoding: [0xc3]
1566 ; X64-LABEL: test_mask_packs_epi16_rmk_512:
1568 ; X64-NEXT: kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce]
1569 ; X64-NEXT: vpacksswb (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x63,0x0f]
1570 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1571 ; X64-NEXT: retq # encoding: [0xc3]
1572 %b = load <32 x i16>, ptr %ptr_b
1573 %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask)
1577 define <64 x i8> @test_mask_packs_epi16_rmkz_512(<32 x i16> %a, ptr %ptr_b, i64 %mask) nounwind {
1578 ; X86-LABEL: test_mask_packs_epi16_rmkz_512:
1580 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1581 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08]
1582 ; X86-NEXT: vpacksswb (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x63,0x00]
1583 ; X86-NEXT: retl # encoding: [0xc3]
1585 ; X64-LABEL: test_mask_packs_epi16_rmkz_512:
1587 ; X64-NEXT: kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce]
1588 ; X64-NEXT: vpacksswb (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x63,0x07]
1589 ; X64-NEXT: retq # encoding: [0xc3]
1590 %b = load <32 x i16>, ptr %ptr_b
1591 %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask)
1595 declare <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16>, <32 x i16>, <64 x i8>, i64)
1598 define <32 x i16> @test_mask_packus_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) nounwind {
1599 ; CHECK-LABEL: test_mask_packus_epi32_rr_512:
1601 ; CHECK-NEXT: vpackusdw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x2b,0xc1]
1602 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1603 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1)
1607 define <32 x i16> @test_mask_packus_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) nounwind {
1608 ; X86-LABEL: test_mask_packus_epi32_rrk_512:
1610 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1611 ; X86-NEXT: vpackusdw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x2b,0xd1]
1612 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1613 ; X86-NEXT: retl # encoding: [0xc3]
1615 ; X64-LABEL: test_mask_packus_epi32_rrk_512:
1617 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1618 ; X64-NEXT: vpackusdw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x2b,0xd1]
1619 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1620 ; X64-NEXT: retq # encoding: [0xc3]
1621 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask)
1625 define <32 x i16> @test_mask_packus_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, i32 %mask) nounwind {
1626 ; X86-LABEL: test_mask_packus_epi32_rrkz_512:
1628 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1629 ; X86-NEXT: vpackusdw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x2b,0xc1]
1630 ; X86-NEXT: retl # encoding: [0xc3]
1632 ; X64-LABEL: test_mask_packus_epi32_rrkz_512:
1634 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1635 ; X64-NEXT: vpackusdw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x2b,0xc1]
1636 ; X64-NEXT: retq # encoding: [0xc3]
1637 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask)
1641 define <32 x i16> @test_mask_packus_epi32_rm_512(<16 x i32> %a, ptr %ptr_b) nounwind {
1642 ; X86-LABEL: test_mask_packus_epi32_rm_512:
1644 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1645 ; X86-NEXT: vpackusdw (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x2b,0x00]
1646 ; X86-NEXT: retl # encoding: [0xc3]
1648 ; X64-LABEL: test_mask_packus_epi32_rm_512:
1650 ; X64-NEXT: vpackusdw (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x2b,0x07]
1651 ; X64-NEXT: retq # encoding: [0xc3]
1652 %b = load <16 x i32>, ptr %ptr_b
1653 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1)
1657 define <32 x i16> @test_mask_packus_epi32_rmk_512(<16 x i32> %a, ptr %ptr_b, <32 x i16> %passThru, i32 %mask) nounwind {
1658 ; X86-LABEL: test_mask_packus_epi32_rmk_512:
1660 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1661 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
1662 ; X86-NEXT: vpackusdw (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x2b,0x08]
1663 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1664 ; X86-NEXT: retl # encoding: [0xc3]
1666 ; X64-LABEL: test_mask_packus_epi32_rmk_512:
1668 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1669 ; X64-NEXT: vpackusdw (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x2b,0x0f]
1670 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1671 ; X64-NEXT: retq # encoding: [0xc3]
1672 %b = load <16 x i32>, ptr %ptr_b
1673 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask)
1677 define <32 x i16> @test_mask_packus_epi32_rmkz_512(<16 x i32> %a, ptr %ptr_b, i32 %mask) nounwind {
1678 ; X86-LABEL: test_mask_packus_epi32_rmkz_512:
1680 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1681 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
1682 ; X86-NEXT: vpackusdw (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x2b,0x00]
1683 ; X86-NEXT: retl # encoding: [0xc3]
1685 ; X64-LABEL: test_mask_packus_epi32_rmkz_512:
1687 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1688 ; X64-NEXT: vpackusdw (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x2b,0x07]
1689 ; X64-NEXT: retq # encoding: [0xc3]
1690 %b = load <16 x i32>, ptr %ptr_b
1691 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask)
1695 define <32 x i16> @test_mask_packus_epi32_rmb_512(<16 x i32> %a, ptr %ptr_b) nounwind {
1696 ; X86-LABEL: test_mask_packus_epi32_rmb_512:
1698 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1699 ; X86-NEXT: vpackusdw (%eax){1to16}, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x58,0x2b,0x00]
1700 ; X86-NEXT: retl # encoding: [0xc3]
1702 ; X64-LABEL: test_mask_packus_epi32_rmb_512:
1704 ; X64-NEXT: vpackusdw (%rdi){1to16}, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x58,0x2b,0x07]
1705 ; X64-NEXT: retq # encoding: [0xc3]
1706 %q = load i32, ptr %ptr_b
1707 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
1708 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
1709 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1)
1713 define <32 x i16> @test_mask_packus_epi32_rmbk_512(<16 x i32> %a, ptr %ptr_b, <32 x i16> %passThru, i32 %mask) nounwind {
1714 ; X86-LABEL: test_mask_packus_epi32_rmbk_512:
1716 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1717 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
1718 ; X86-NEXT: vpackusdw (%eax){1to16}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x59,0x2b,0x08]
1719 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1720 ; X86-NEXT: retl # encoding: [0xc3]
1722 ; X64-LABEL: test_mask_packus_epi32_rmbk_512:
1724 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1725 ; X64-NEXT: vpackusdw (%rdi){1to16}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x59,0x2b,0x0f]
1726 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1727 ; X64-NEXT: retq # encoding: [0xc3]
1728 %q = load i32, ptr %ptr_b
1729 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
1730 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
1731 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask)
1735 define <32 x i16> @test_mask_packus_epi32_rmbkz_512(<16 x i32> %a, ptr %ptr_b, i32 %mask) nounwind {
1736 ; X86-LABEL: test_mask_packus_epi32_rmbkz_512:
1738 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1739 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
1740 ; X86-NEXT: vpackusdw (%eax){1to16}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xd9,0x2b,0x00]
1741 ; X86-NEXT: retl # encoding: [0xc3]
1743 ; X64-LABEL: test_mask_packus_epi32_rmbkz_512:
1745 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1746 ; X64-NEXT: vpackusdw (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xd9,0x2b,0x07]
1747 ; X64-NEXT: retq # encoding: [0xc3]
1748 %q = load i32, ptr %ptr_b
1749 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
1750 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
1751 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask)
1755 declare <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32>, <16 x i32>, <32 x i16>, i32)
1757 define <64 x i8> @test_mask_packus_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) nounwind {
1758 ; CHECK-LABEL: test_mask_packus_epi16_rr_512:
1760 ; CHECK-NEXT: vpackuswb %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x67,0xc1]
1761 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1762 %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1)
1766 define <64 x i8> @test_mask_packus_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) nounwind {
1767 ; X86-LABEL: test_mask_packus_epi16_rrk_512:
1769 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
1770 ; X86-NEXT: vpackuswb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x67,0xd1]
1771 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1772 ; X86-NEXT: retl # encoding: [0xc3]
1774 ; X64-LABEL: test_mask_packus_epi16_rrk_512:
1776 ; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
1777 ; X64-NEXT: vpackuswb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x67,0xd1]
1778 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1779 ; X64-NEXT: retq # encoding: [0xc3]
1780 %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask)
1784 define <64 x i8> @test_mask_packus_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i64 %mask) nounwind {
1785 ; X86-LABEL: test_mask_packus_epi16_rrkz_512:
1787 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
1788 ; X86-NEXT: vpackuswb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x67,0xc1]
1789 ; X86-NEXT: retl # encoding: [0xc3]
1791 ; X64-LABEL: test_mask_packus_epi16_rrkz_512:
1793 ; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
1794 ; X64-NEXT: vpackuswb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x67,0xc1]
1795 ; X64-NEXT: retq # encoding: [0xc3]
1796 %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask)
1800 define <64 x i8> @test_mask_packus_epi16_rm_512(<32 x i16> %a, ptr %ptr_b) nounwind {
1801 ; X86-LABEL: test_mask_packus_epi16_rm_512:
1803 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1804 ; X86-NEXT: vpackuswb (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x67,0x00]
1805 ; X86-NEXT: retl # encoding: [0xc3]
1807 ; X64-LABEL: test_mask_packus_epi16_rm_512:
1809 ; X64-NEXT: vpackuswb (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x67,0x07]
1810 ; X64-NEXT: retq # encoding: [0xc3]
1811 %b = load <32 x i16>, ptr %ptr_b
1812 %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1)
1816 define <64 x i8> @test_mask_packus_epi16_rmk_512(<32 x i16> %a, ptr %ptr_b, <64 x i8> %passThru, i64 %mask) nounwind {
1817 ; X86-LABEL: test_mask_packus_epi16_rmk_512:
1819 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1820 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08]
1821 ; X86-NEXT: vpackuswb (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x67,0x08]
1822 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1823 ; X86-NEXT: retl # encoding: [0xc3]
1825 ; X64-LABEL: test_mask_packus_epi16_rmk_512:
1827 ; X64-NEXT: kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce]
1828 ; X64-NEXT: vpackuswb (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x67,0x0f]
1829 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1830 ; X64-NEXT: retq # encoding: [0xc3]
1831 %b = load <32 x i16>, ptr %ptr_b
1832 %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask)
1836 define <64 x i8> @test_mask_packus_epi16_rmkz_512(<32 x i16> %a, ptr %ptr_b, i64 %mask) nounwind {
1837 ; X86-LABEL: test_mask_packus_epi16_rmkz_512:
1839 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1840 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08]
1841 ; X86-NEXT: vpackuswb (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x67,0x00]
1842 ; X86-NEXT: retl # encoding: [0xc3]
1844 ; X64-LABEL: test_mask_packus_epi16_rmkz_512:
1846 ; X64-NEXT: kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce]
1847 ; X64-NEXT: vpackuswb (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x67,0x07]
1848 ; X64-NEXT: retq # encoding: [0xc3]
1849 %b = load <32 x i16>, ptr %ptr_b
1850 %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask)
1854 declare <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16>, <32 x i16>, <64 x i8>, i64)
1856 define i64 @test_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1) nounwind {
1857 ; X86-LABEL: test_cmp_b_512:
1859 ; X86-NEXT: pushl %edi # encoding: [0x57]
1860 ; X86-NEXT: pushl %esi # encoding: [0x56]
1861 ; X86-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x74,0xc1]
1862 ; X86-NEXT: kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20]
1863 ; X86-NEXT: kmovd %k1, %ecx # encoding: [0xc5,0xfb,0x93,0xc9]
1864 ; X86-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0]
1865 ; X86-NEXT: vpcmpgtb %zmm0, %zmm1, %k0 # encoding: [0x62,0xf1,0x75,0x48,0x64,0xc0]
1866 ; X86-NEXT: kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20]
1867 ; X86-NEXT: kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1]
1868 ; X86-NEXT: kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0]
1869 ; X86-NEXT: addl %edx, %esi # encoding: [0x01,0xd6]
1870 ; X86-NEXT: adcl %ecx, %eax # encoding: [0x11,0xc8]
1871 ; X86-NEXT: vpcmpleb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3f,0xc1,0x02]
1872 ; X86-NEXT: kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20]
1873 ; X86-NEXT: kmovd %k1, %ecx # encoding: [0xc5,0xfb,0x93,0xc9]
1874 ; X86-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0]
1875 ; X86-NEXT: addl %esi, %edx # encoding: [0x01,0xf2]
1876 ; X86-NEXT: adcl %eax, %ecx # encoding: [0x11,0xc1]
1877 ; X86-NEXT: vpcmpneqb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3f,0xc1,0x04]
1878 ; X86-NEXT: kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20]
1879 ; X86-NEXT: kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1]
1880 ; X86-NEXT: kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0]
1881 ; X86-NEXT: addl %edx, %esi # encoding: [0x01,0xd6]
1882 ; X86-NEXT: adcl %ecx, %eax # encoding: [0x11,0xc8]
1883 ; X86-NEXT: vpcmpnltb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3f,0xc1,0x05]
1884 ; X86-NEXT: kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20]
1885 ; X86-NEXT: kmovd %k1, %ecx # encoding: [0xc5,0xfb,0x93,0xc9]
1886 ; X86-NEXT: kmovd %k0, %edi # encoding: [0xc5,0xfb,0x93,0xf8]
1887 ; X86-NEXT: addl %esi, %edi # encoding: [0x01,0xf7]
1888 ; X86-NEXT: adcl %eax, %ecx # encoding: [0x11,0xc1]
1889 ; X86-NEXT: vpcmpgtb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x64,0xc1]
1890 ; X86-NEXT: kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20]
1891 ; X86-NEXT: kmovd %k1, %edx # encoding: [0xc5,0xfb,0x93,0xd1]
1892 ; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
1893 ; X86-NEXT: addl %edi, %eax # encoding: [0x01,0xf8]
1894 ; X86-NEXT: adcl %ecx, %edx # encoding: [0x11,0xca]
1895 ; X86-NEXT: addl $-1, %eax # encoding: [0x83,0xc0,0xff]
1896 ; X86-NEXT: adcl $-1, %edx # encoding: [0x83,0xd2,0xff]
1897 ; X86-NEXT: popl %esi # encoding: [0x5e]
1898 ; X86-NEXT: popl %edi # encoding: [0x5f]
1899 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
1900 ; X86-NEXT: retl # encoding: [0xc3]
1902 ; X64-LABEL: test_cmp_b_512:
1904 ; X64-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x74,0xc1]
1905 ; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0]
1906 ; X64-NEXT: vpcmpgtb %zmm0, %zmm1, %k0 # encoding: [0x62,0xf1,0x75,0x48,0x64,0xc0]
1907 ; X64-NEXT: kmovq %k0, %rcx # encoding: [0xc4,0xe1,0xfb,0x93,0xc8]
1908 ; X64-NEXT: addq %rax, %rcx # encoding: [0x48,0x01,0xc1]
1909 ; X64-NEXT: vpcmpleb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3f,0xc1,0x02]
1910 ; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0]
1911 ; X64-NEXT: vpcmpneqb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3f,0xc1,0x04]
1912 ; X64-NEXT: kmovq %k0, %rdx # encoding: [0xc4,0xe1,0xfb,0x93,0xd0]
1913 ; X64-NEXT: addq %rax, %rdx # encoding: [0x48,0x01,0xc2]
1914 ; X64-NEXT: addq %rcx, %rdx # encoding: [0x48,0x01,0xca]
1915 ; X64-NEXT: vpcmpnltb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3f,0xc1,0x05]
1916 ; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0]
1917 ; X64-NEXT: addq %rdx, %rax # encoding: [0x48,0x01,0xd0]
1918 ; X64-NEXT: vpcmpgtb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x64,0xc1]
1919 ; X64-NEXT: kmovq %k0, %rcx # encoding: [0xc4,0xe1,0xfb,0x93,0xc8]
1920 ; X64-NEXT: leaq -1(%rcx,%rax), %rax # encoding: [0x48,0x8d,0x44,0x01,0xff]
1921 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
1922 ; X64-NEXT: retq # encoding: [0xc3]
1923 %res0 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 -1)
1924 %res1 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 -1)
1925 %ret1 = add i64 %res0, %res1
1926 %res2 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 2, i64 -1)
1927 %ret2 = add i64 %ret1, %res2
1928 %res3 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 3, i64 -1)
1929 %ret3 = add i64 %ret2, %res3
1930 %res4 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 4, i64 -1)
1931 %ret4 = add i64 %ret3, %res4
1932 %res5 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 5, i64 -1)
1933 %ret5 = add i64 %ret4, %res5
1934 %res6 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 6, i64 -1)
1935 %ret6 = add i64 %ret5, %res6
1936 %res7 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 7, i64 -1)
1937 %ret7 = add i64 %ret6, %res7
1941 define i64 @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) nounwind {
1942 ; X86-LABEL: test_mask_cmp_b_512:
1944 ; X86-NEXT: pushl %edi # encoding: [0x57]
1945 ; X86-NEXT: pushl %esi # encoding: [0x56]
1946 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x0c]
1947 ; X86-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x74,0xc1]
1948 ; X86-NEXT: kshiftrq $32, %k0, %k2 # encoding: [0xc4,0xe3,0xf9,0x31,0xd0,0x20]
1949 ; X86-NEXT: kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2]
1950 ; X86-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0]
1951 ; X86-NEXT: vpcmpgtb %zmm0, %zmm1, %k0 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x64,0xc0]
1952 ; X86-NEXT: kshiftrq $32, %k0, %k2 # encoding: [0xc4,0xe3,0xf9,0x31,0xd0,0x20]
1953 ; X86-NEXT: kmovd %k2, %ecx # encoding: [0xc5,0xfb,0x93,0xca]
1954 ; X86-NEXT: kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0]
1955 ; X86-NEXT: addl %edx, %esi # encoding: [0x01,0xd6]
1956 ; X86-NEXT: adcl %eax, %ecx # encoding: [0x11,0xc1]
1957 ; X86-NEXT: vpcmpleb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3f,0xc1,0x02]
1958 ; X86-NEXT: kshiftrq $32, %k0, %k2 # encoding: [0xc4,0xe3,0xf9,0x31,0xd0,0x20]
1959 ; X86-NEXT: kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2]
1960 ; X86-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0]
1961 ; X86-NEXT: addl %esi, %edx # encoding: [0x01,0xf2]
1962 ; X86-NEXT: adcl %ecx, %eax # encoding: [0x11,0xc8]
1963 ; X86-NEXT: vpcmpneqb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3f,0xc1,0x04]
1964 ; X86-NEXT: kshiftrq $32, %k0, %k2 # encoding: [0xc4,0xe3,0xf9,0x31,0xd0,0x20]
1965 ; X86-NEXT: kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0]
1966 ; X86-NEXT: addl %edx, %esi # encoding: [0x01,0xd6]
1967 ; X86-NEXT: kmovd %k2, %edx # encoding: [0xc5,0xfb,0x93,0xd2]
1968 ; X86-NEXT: adcl %eax, %edx # encoding: [0x11,0xc2]
1969 ; X86-NEXT: vpcmpnltb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3f,0xc1,0x05]
1970 ; X86-NEXT: kshiftrq $32, %k0, %k2 # encoding: [0xc4,0xe3,0xf9,0x31,0xd0,0x20]
1971 ; X86-NEXT: kmovd %k2, %ecx # encoding: [0xc5,0xfb,0x93,0xca]
1972 ; X86-NEXT: kmovd %k0, %edi # encoding: [0xc5,0xfb,0x93,0xf8]
1973 ; X86-NEXT: addl %esi, %edi # encoding: [0x01,0xf7]
1974 ; X86-NEXT: adcl %edx, %ecx # encoding: [0x11,0xd1]
1975 ; X86-NEXT: vpcmpgtb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x64,0xc1]
1976 ; X86-NEXT: kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20]
1977 ; X86-NEXT: kmovd %k1, %edx # encoding: [0xc5,0xfb,0x93,0xd1]
1978 ; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
1979 ; X86-NEXT: addl %edi, %eax # encoding: [0x01,0xf8]
1980 ; X86-NEXT: adcl %ecx, %edx # encoding: [0x11,0xca]
1981 ; X86-NEXT: addl {{[0-9]+}}(%esp), %eax # encoding: [0x03,0x44,0x24,0x0c]
1982 ; X86-NEXT: adcl {{[0-9]+}}(%esp), %edx # encoding: [0x13,0x54,0x24,0x10]
1983 ; X86-NEXT: popl %esi # encoding: [0x5e]
1984 ; X86-NEXT: popl %edi # encoding: [0x5f]
1985 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
1986 ; X86-NEXT: retl # encoding: [0xc3]
1988 ; X64-LABEL: test_mask_cmp_b_512:
1990 ; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
1991 ; X64-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x74,0xc1]
1992 ; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0]
1993 ; X64-NEXT: vpcmpgtb %zmm0, %zmm1, %k0 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x64,0xc0]
1994 ; X64-NEXT: kmovq %k0, %rcx # encoding: [0xc4,0xe1,0xfb,0x93,0xc8]
1995 ; X64-NEXT: addq %rax, %rcx # encoding: [0x48,0x01,0xc1]
1996 ; X64-NEXT: vpcmpleb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3f,0xc1,0x02]
1997 ; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0]
1998 ; X64-NEXT: vpcmpneqb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3f,0xc1,0x04]
1999 ; X64-NEXT: kmovq %k0, %rdx # encoding: [0xc4,0xe1,0xfb,0x93,0xd0]
2000 ; X64-NEXT: addq %rax, %rdx # encoding: [0x48,0x01,0xc2]
2001 ; X64-NEXT: addq %rcx, %rdx # encoding: [0x48,0x01,0xca]
2002 ; X64-NEXT: vpcmpnltb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3f,0xc1,0x05]
2003 ; X64-NEXT: kmovq %k0, %rcx # encoding: [0xc4,0xe1,0xfb,0x93,0xc8]
2004 ; X64-NEXT: vpcmpgtb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x64,0xc1]
2005 ; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0]
2006 ; X64-NEXT: addq %rcx, %rax # encoding: [0x48,0x01,0xc8]
2007 ; X64-NEXT: addq %rdi, %rax # encoding: [0x48,0x01,0xf8]
2008 ; X64-NEXT: addq %rdx, %rax # encoding: [0x48,0x01,0xd0]
2009 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
2010 ; X64-NEXT: retq # encoding: [0xc3]
2011 %res0 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 %mask)
2012 %res1 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 %mask)
2013 %ret1 = add i64 %res0, %res1
2014 %res2 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 2, i64 %mask)
2015 %ret2 = add i64 %ret1, %res2
2016 %res3 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 3, i64 %mask)
2017 %ret3 = add i64 %ret2, %res3
2018 %res4 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 4, i64 %mask)
2019 %ret4 = add i64 %ret3, %res4
2020 %res5 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 5, i64 %mask)
2021 %ret5 = add i64 %ret4, %res5
2022 %res6 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 6, i64 %mask)
2023 %ret6 = add i64 %ret5, %res6
2024 %res7 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 7, i64 %mask)
2025 %ret7 = add i64 %ret6, %res7
2029 declare i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8>, <64 x i8>, i32, i64) nounwind readnone
2031 define i64 @test_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1) nounwind {
2032 ; X86-LABEL: test_ucmp_b_512:
2034 ; X86-NEXT: pushl %edi # encoding: [0x57]
2035 ; X86-NEXT: pushl %esi # encoding: [0x56]
2036 ; X86-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x74,0xc1]
2037 ; X86-NEXT: kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20]
2038 ; X86-NEXT: kmovd %k1, %ecx # encoding: [0xc5,0xfb,0x93,0xc9]
2039 ; X86-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0]
2040 ; X86-NEXT: vpcmpltub %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3e,0xc1,0x01]
2041 ; X86-NEXT: kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20]
2042 ; X86-NEXT: kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1]
2043 ; X86-NEXT: kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0]
2044 ; X86-NEXT: addl %edx, %esi # encoding: [0x01,0xd6]
2045 ; X86-NEXT: adcl %ecx, %eax # encoding: [0x11,0xc8]
2046 ; X86-NEXT: vpcmpleub %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3e,0xc1,0x02]
2047 ; X86-NEXT: kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20]
2048 ; X86-NEXT: kmovd %k1, %ecx # encoding: [0xc5,0xfb,0x93,0xc9]
2049 ; X86-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0]
2050 ; X86-NEXT: addl %esi, %edx # encoding: [0x01,0xf2]
2051 ; X86-NEXT: adcl %eax, %ecx # encoding: [0x11,0xc1]
2052 ; X86-NEXT: vpcmpneqb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3f,0xc1,0x04]
2053 ; X86-NEXT: kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20]
2054 ; X86-NEXT: kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1]
2055 ; X86-NEXT: kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0]
2056 ; X86-NEXT: addl %edx, %esi # encoding: [0x01,0xd6]
2057 ; X86-NEXT: adcl %ecx, %eax # encoding: [0x11,0xc8]
2058 ; X86-NEXT: vpcmpnltub %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3e,0xc1,0x05]
2059 ; X86-NEXT: kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20]
2060 ; X86-NEXT: kmovd %k1, %ecx # encoding: [0xc5,0xfb,0x93,0xc9]
2061 ; X86-NEXT: kmovd %k0, %edi # encoding: [0xc5,0xfb,0x93,0xf8]
2062 ; X86-NEXT: addl %esi, %edi # encoding: [0x01,0xf7]
2063 ; X86-NEXT: adcl %eax, %ecx # encoding: [0x11,0xc1]
2064 ; X86-NEXT: vpcmpnleub %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3e,0xc1,0x06]
2065 ; X86-NEXT: kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20]
2066 ; X86-NEXT: kmovd %k1, %edx # encoding: [0xc5,0xfb,0x93,0xd1]
2067 ; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
2068 ; X86-NEXT: addl %edi, %eax # encoding: [0x01,0xf8]
2069 ; X86-NEXT: adcl %ecx, %edx # encoding: [0x11,0xca]
2070 ; X86-NEXT: addl $-1, %eax # encoding: [0x83,0xc0,0xff]
2071 ; X86-NEXT: adcl $-1, %edx # encoding: [0x83,0xd2,0xff]
2072 ; X86-NEXT: popl %esi # encoding: [0x5e]
2073 ; X86-NEXT: popl %edi # encoding: [0x5f]
2074 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
2075 ; X86-NEXT: retl # encoding: [0xc3]
2077 ; X64-LABEL: test_ucmp_b_512:
2079 ; X64-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x74,0xc1]
2080 ; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0]
2081 ; X64-NEXT: vpcmpltub %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3e,0xc1,0x01]
2082 ; X64-NEXT: kmovq %k0, %rcx # encoding: [0xc4,0xe1,0xfb,0x93,0xc8]
2083 ; X64-NEXT: addq %rax, %rcx # encoding: [0x48,0x01,0xc1]
2084 ; X64-NEXT: vpcmpleub %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3e,0xc1,0x02]
2085 ; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0]
2086 ; X64-NEXT: vpcmpneqb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3f,0xc1,0x04]
2087 ; X64-NEXT: kmovq %k0, %rdx # encoding: [0xc4,0xe1,0xfb,0x93,0xd0]
2088 ; X64-NEXT: addq %rax, %rdx # encoding: [0x48,0x01,0xc2]
2089 ; X64-NEXT: addq %rcx, %rdx # encoding: [0x48,0x01,0xca]
2090 ; X64-NEXT: vpcmpnltub %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3e,0xc1,0x05]
2091 ; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0]
2092 ; X64-NEXT: addq %rdx, %rax # encoding: [0x48,0x01,0xd0]
2093 ; X64-NEXT: vpcmpnleub %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3e,0xc1,0x06]
2094 ; X64-NEXT: kmovq %k0, %rcx # encoding: [0xc4,0xe1,0xfb,0x93,0xc8]
2095 ; X64-NEXT: leaq -1(%rcx,%rax), %rax # encoding: [0x48,0x8d,0x44,0x01,0xff]
2096 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
2097 ; X64-NEXT: retq # encoding: [0xc3]
2098 %res0 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 -1)
2099 %res1 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 -1)
2100 %ret1 = add i64 %res0, %res1
2101 %res2 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 2, i64 -1)
2102 %ret2 = add i64 %ret1, %res2
2103 %res3 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 3, i64 -1)
2104 %ret3 = add i64 %ret2, %res3
2105 %res4 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 4, i64 -1)
2106 %ret4 = add i64 %ret3, %res4
2107 %res5 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 5, i64 -1)
2108 %ret5 = add i64 %ret4, %res5
2109 %res6 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 6, i64 -1)
2110 %ret6 = add i64 %ret5, %res6
2111 %res7 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 7, i64 -1)
2112 %ret7 = add i64 %ret6, %res7
2116 define i64 @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) nounwind {
2117 ; X86-LABEL: test_mask_x86_avx512_ucmp_b_512:
2119 ; X86-NEXT: pushl %edi # encoding: [0x57]
2120 ; X86-NEXT: pushl %esi # encoding: [0x56]
2121 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x0c]
2122 ; X86-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x74,0xc1]
2123 ; X86-NEXT: kshiftrq $32, %k0, %k2 # encoding: [0xc4,0xe3,0xf9,0x31,0xd0,0x20]
2124 ; X86-NEXT: kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2]
2125 ; X86-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0]
2126 ; X86-NEXT: vpcmpltub %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3e,0xc1,0x01]
2127 ; X86-NEXT: kshiftrq $32, %k0, %k2 # encoding: [0xc4,0xe3,0xf9,0x31,0xd0,0x20]
2128 ; X86-NEXT: kmovd %k2, %ecx # encoding: [0xc5,0xfb,0x93,0xca]
2129 ; X86-NEXT: kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0]
2130 ; X86-NEXT: addl %edx, %esi # encoding: [0x01,0xd6]
2131 ; X86-NEXT: adcl %eax, %ecx # encoding: [0x11,0xc1]
2132 ; X86-NEXT: vpcmpleub %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3e,0xc1,0x02]
2133 ; X86-NEXT: kshiftrq $32, %k0, %k2 # encoding: [0xc4,0xe3,0xf9,0x31,0xd0,0x20]
2134 ; X86-NEXT: kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2]
2135 ; X86-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0]
2136 ; X86-NEXT: addl %esi, %edx # encoding: [0x01,0xf2]
2137 ; X86-NEXT: adcl %ecx, %eax # encoding: [0x11,0xc8]
2138 ; X86-NEXT: vpcmpneqb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3f,0xc1,0x04]
2139 ; X86-NEXT: kshiftrq $32, %k0, %k2 # encoding: [0xc4,0xe3,0xf9,0x31,0xd0,0x20]
2140 ; X86-NEXT: kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0]
2141 ; X86-NEXT: addl %edx, %esi # encoding: [0x01,0xd6]
2142 ; X86-NEXT: kmovd %k2, %edx # encoding: [0xc5,0xfb,0x93,0xd2]
2143 ; X86-NEXT: adcl %eax, %edx # encoding: [0x11,0xc2]
2144 ; X86-NEXT: vpcmpnltub %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3e,0xc1,0x05]
2145 ; X86-NEXT: kshiftrq $32, %k0, %k2 # encoding: [0xc4,0xe3,0xf9,0x31,0xd0,0x20]
2146 ; X86-NEXT: kmovd %k2, %ecx # encoding: [0xc5,0xfb,0x93,0xca]
2147 ; X86-NEXT: kmovd %k0, %edi # encoding: [0xc5,0xfb,0x93,0xf8]
2148 ; X86-NEXT: addl %esi, %edi # encoding: [0x01,0xf7]
2149 ; X86-NEXT: adcl %edx, %ecx # encoding: [0x11,0xd1]
2150 ; X86-NEXT: vpcmpnleub %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3e,0xc1,0x06]
2151 ; X86-NEXT: kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20]
2152 ; X86-NEXT: kmovd %k1, %edx # encoding: [0xc5,0xfb,0x93,0xd1]
2153 ; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
2154 ; X86-NEXT: addl %edi, %eax # encoding: [0x01,0xf8]
2155 ; X86-NEXT: adcl %ecx, %edx # encoding: [0x11,0xca]
2156 ; X86-NEXT: addl {{[0-9]+}}(%esp), %eax # encoding: [0x03,0x44,0x24,0x0c]
2157 ; X86-NEXT: adcl {{[0-9]+}}(%esp), %edx # encoding: [0x13,0x54,0x24,0x10]
2158 ; X86-NEXT: popl %esi # encoding: [0x5e]
2159 ; X86-NEXT: popl %edi # encoding: [0x5f]
2160 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
2161 ; X86-NEXT: retl # encoding: [0xc3]
2163 ; X64-LABEL: test_mask_x86_avx512_ucmp_b_512:
2165 ; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
2166 ; X64-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x74,0xc1]
2167 ; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0]
2168 ; X64-NEXT: vpcmpltub %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3e,0xc1,0x01]
2169 ; X64-NEXT: kmovq %k0, %rcx # encoding: [0xc4,0xe1,0xfb,0x93,0xc8]
2170 ; X64-NEXT: addq %rax, %rcx # encoding: [0x48,0x01,0xc1]
2171 ; X64-NEXT: vpcmpleub %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3e,0xc1,0x02]
2172 ; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0]
2173 ; X64-NEXT: vpcmpneqb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3f,0xc1,0x04]
2174 ; X64-NEXT: kmovq %k0, %rdx # encoding: [0xc4,0xe1,0xfb,0x93,0xd0]
2175 ; X64-NEXT: addq %rax, %rdx # encoding: [0x48,0x01,0xc2]
2176 ; X64-NEXT: addq %rcx, %rdx # encoding: [0x48,0x01,0xca]
2177 ; X64-NEXT: vpcmpnltub %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3e,0xc1,0x05]
2178 ; X64-NEXT: kmovq %k0, %rcx # encoding: [0xc4,0xe1,0xfb,0x93,0xc8]
2179 ; X64-NEXT: vpcmpnleub %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3e,0xc1,0x06]
2180 ; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0]
2181 ; X64-NEXT: addq %rcx, %rax # encoding: [0x48,0x01,0xc8]
2182 ; X64-NEXT: addq %rdi, %rax # encoding: [0x48,0x01,0xf8]
2183 ; X64-NEXT: addq %rdx, %rax # encoding: [0x48,0x01,0xd0]
2184 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
2185 ; X64-NEXT: retq # encoding: [0xc3]
2186 %res0 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 %mask)
2187 %res1 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 %mask)
2188 %ret1 = add i64 %res0, %res1
2189 %res2 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 2, i64 %mask)
2190 %ret2 = add i64 %ret1, %res2
2191 %res3 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 3, i64 %mask)
2192 %ret3 = add i64 %ret2, %res3
2193 %res4 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 4, i64 %mask)
2194 %ret4 = add i64 %ret3, %res4
2195 %res5 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 5, i64 %mask)
2196 %ret5 = add i64 %ret4, %res5
2197 %res6 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 6, i64 %mask)
2198 %ret6 = add i64 %ret5, %res6
2199 %res7 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 7, i64 %mask)
2200 %ret7 = add i64 %ret6, %res7
2204 declare i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8>, <64 x i8>, i32, i64) nounwind readnone
2206 define i32 @test_cmp_w_512(<32 x i16> %a0, <32 x i16> %a1) nounwind {
2207 ; X86-LABEL: test_cmp_w_512:
2209 ; X86-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x75,0xc1]
2210 ; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
2211 ; X86-NEXT: vpcmpgtw %zmm0, %zmm1, %k0 # encoding: [0x62,0xf1,0x75,0x48,0x65,0xc0]
2212 ; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
2213 ; X86-NEXT: addl %eax, %ecx # encoding: [0x01,0xc1]
2214 ; X86-NEXT: vpcmplew %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3f,0xc1,0x02]
2215 ; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
2216 ; X86-NEXT: vpcmpneqw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3f,0xc1,0x04]
2217 ; X86-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0]
2218 ; X86-NEXT: addl %eax, %edx # encoding: [0x01,0xc2]
2219 ; X86-NEXT: addl %ecx, %edx # encoding: [0x01,0xca]
2220 ; X86-NEXT: vpcmpnltw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3f,0xc1,0x05]
2221 ; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
2222 ; X86-NEXT: addl %edx, %eax # encoding: [0x01,0xd0]
2223 ; X86-NEXT: vpcmpgtw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x65,0xc1]
2224 ; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
2225 ; X86-NEXT: leal -1(%ecx,%eax), %eax # encoding: [0x8d,0x44,0x01,0xff]
2226 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
2227 ; X86-NEXT: retl # encoding: [0xc3]
2229 ; X64-LABEL: test_cmp_w_512:
2231 ; X64-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x75,0xc1]
2232 ; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
2233 ; X64-NEXT: vpcmpgtw %zmm0, %zmm1, %k0 # encoding: [0x62,0xf1,0x75,0x48,0x65,0xc0]
2234 ; X64-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
2235 ; X64-NEXT: addl %eax, %ecx # encoding: [0x01,0xc1]
2236 ; X64-NEXT: vpcmplew %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3f,0xc1,0x02]
2237 ; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
2238 ; X64-NEXT: vpcmpneqw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3f,0xc1,0x04]
2239 ; X64-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0]
2240 ; X64-NEXT: addl %eax, %edx # encoding: [0x01,0xc2]
2241 ; X64-NEXT: addl %ecx, %edx # encoding: [0x01,0xca]
2242 ; X64-NEXT: vpcmpnltw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3f,0xc1,0x05]
2243 ; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
2244 ; X64-NEXT: addl %edx, %eax # encoding: [0x01,0xd0]
2245 ; X64-NEXT: vpcmpgtw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x65,0xc1]
2246 ; X64-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
2247 ; X64-NEXT: leal -1(%rcx,%rax), %eax # encoding: [0x8d,0x44,0x01,0xff]
2248 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
2249 ; X64-NEXT: retq # encoding: [0xc3]
2250 %res0 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 -1)
2251 %res1 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 1, i32 -1)
2252 %ret1 = add i32 %res0, %res1
2253 %res2 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 2, i32 -1)
2254 %ret2 = add i32 %ret1, %res2
2255 %res3 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 3, i32 -1)
2256 %ret3 = add i32 %ret2, %res3
2257 %res4 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 4, i32 -1)
2258 %ret4 = add i32 %ret3, %res4
2259 %res5 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 5, i32 -1)
2260 %ret5 = add i32 %ret4, %res5
2261 %res6 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 6, i32 -1)
2262 %ret6 = add i32 %ret5, %res6
2263 %res7 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 7, i32 -1)
2264 %ret7 = add i32 %ret6, %res7
2268 define i32 @test_mask_cmp_w_512(<32 x i16> %a0, <32 x i16> %a1, i32 %mask) nounwind {
2269 ; X86-LABEL: test_mask_cmp_w_512:
2271 ; X86-NEXT: pushl %esi # encoding: [0x56]
2272 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x08]
2273 ; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
2274 ; X86-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x75,0xc1]
2275 ; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
2276 ; X86-NEXT: vpcmpgtw %zmm0, %zmm1, %k0 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x65,0xc0]
2277 ; X86-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0]
2278 ; X86-NEXT: addl %eax, %edx # encoding: [0x01,0xc2]
2279 ; X86-NEXT: vpcmplew %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3f,0xc1,0x02]
2280 ; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
2281 ; X86-NEXT: vpcmpneqw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3f,0xc1,0x04]
2282 ; X86-NEXT: kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0]
2283 ; X86-NEXT: addl %eax, %esi # encoding: [0x01,0xc6]
2284 ; X86-NEXT: addl %edx, %esi # encoding: [0x01,0xd6]
2285 ; X86-NEXT: vpcmpnltw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3f,0xc1,0x05]
2286 ; X86-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0]
2287 ; X86-NEXT: vpcmpgtw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x65,0xc1]
2288 ; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
2289 ; X86-NEXT: addl %edx, %eax # encoding: [0x01,0xd0]
2290 ; X86-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8]
2291 ; X86-NEXT: addl %esi, %eax # encoding: [0x01,0xf0]
2292 ; X86-NEXT: popl %esi # encoding: [0x5e]
2293 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
2294 ; X86-NEXT: retl # encoding: [0xc3]
2296 ; X64-LABEL: test_mask_cmp_w_512:
2298 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2299 ; X64-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x75,0xc1]
2300 ; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
2301 ; X64-NEXT: vpcmpgtw %zmm0, %zmm1, %k0 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x65,0xc0]
2302 ; X64-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
2303 ; X64-NEXT: addl %eax, %ecx # encoding: [0x01,0xc1]
2304 ; X64-NEXT: vpcmplew %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3f,0xc1,0x02]
2305 ; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
2306 ; X64-NEXT: vpcmpneqw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3f,0xc1,0x04]
2307 ; X64-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0]
2308 ; X64-NEXT: addl %eax, %edx # encoding: [0x01,0xc2]
2309 ; X64-NEXT: addl %ecx, %edx # encoding: [0x01,0xca]
2310 ; X64-NEXT: vpcmpnltw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3f,0xc1,0x05]
2311 ; X64-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
2312 ; X64-NEXT: vpcmpgtw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x65,0xc1]
2313 ; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
2314 ; X64-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8]
2315 ; X64-NEXT: addl %edi, %eax # encoding: [0x01,0xf8]
2316 ; X64-NEXT: addl %edx, %eax # encoding: [0x01,0xd0]
2317 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
2318 ; X64-NEXT: retq # encoding: [0xc3]
2319 %res0 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 %mask)
2320 %res1 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 1, i32 %mask)
2321 %ret1 = add i32 %res0, %res1
2322 %res2 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 2, i32 %mask)
2323 %ret2 = add i32 %ret1, %res2
2324 %res3 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 3, i32 %mask)
2325 %ret3 = add i32 %ret2, %res3
2326 %res4 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 4, i32 %mask)
2327 %ret4 = add i32 %ret3, %res4
2328 %res5 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 5, i32 %mask)
2329 %ret5 = add i32 %ret4, %res5
2330 %res6 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 6, i32 %mask)
2331 %ret6 = add i32 %ret5, %res6
2332 %res7 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 7, i32 %mask)
2333 %ret7 = add i32 %ret6, %res7
2337 declare i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16>, <32 x i16>, i32, i32) nounwind readnone
2339 define i32 @test_ucmp_w_512(<32 x i16> %a0, <32 x i16> %a1) nounwind {
2340 ; X86-LABEL: test_ucmp_w_512:
2342 ; X86-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x75,0xc1]
2343 ; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
2344 ; X86-NEXT: vpcmpltuw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3e,0xc1,0x01]
2345 ; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
2346 ; X86-NEXT: addl %eax, %ecx # encoding: [0x01,0xc1]
2347 ; X86-NEXT: vpcmpleuw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3e,0xc1,0x02]
2348 ; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
2349 ; X86-NEXT: vpcmpneqw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3f,0xc1,0x04]
2350 ; X86-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0]
2351 ; X86-NEXT: addl %eax, %edx # encoding: [0x01,0xc2]
2352 ; X86-NEXT: addl %ecx, %edx # encoding: [0x01,0xca]
2353 ; X86-NEXT: vpcmpnltuw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3e,0xc1,0x05]
2354 ; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
2355 ; X86-NEXT: addl %edx, %eax # encoding: [0x01,0xd0]
2356 ; X86-NEXT: vpcmpnleuw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3e,0xc1,0x06]
2357 ; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
2358 ; X86-NEXT: leal -1(%ecx,%eax), %eax # encoding: [0x8d,0x44,0x01,0xff]
2359 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
2360 ; X86-NEXT: retl # encoding: [0xc3]
2362 ; X64-LABEL: test_ucmp_w_512:
2364 ; X64-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x75,0xc1]
2365 ; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
2366 ; X64-NEXT: vpcmpltuw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3e,0xc1,0x01]
2367 ; X64-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
2368 ; X64-NEXT: addl %eax, %ecx # encoding: [0x01,0xc1]
2369 ; X64-NEXT: vpcmpleuw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3e,0xc1,0x02]
2370 ; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
2371 ; X64-NEXT: vpcmpneqw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3f,0xc1,0x04]
2372 ; X64-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0]
2373 ; X64-NEXT: addl %eax, %edx # encoding: [0x01,0xc2]
2374 ; X64-NEXT: addl %ecx, %edx # encoding: [0x01,0xca]
2375 ; X64-NEXT: vpcmpnltuw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3e,0xc1,0x05]
2376 ; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
2377 ; X64-NEXT: addl %edx, %eax # encoding: [0x01,0xd0]
2378 ; X64-NEXT: vpcmpnleuw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3e,0xc1,0x06]
2379 ; X64-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
2380 ; X64-NEXT: leal -1(%rcx,%rax), %eax # encoding: [0x8d,0x44,0x01,0xff]
2381 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
2382 ; X64-NEXT: retq # encoding: [0xc3]
2383 %res0 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 -1)
2384 %res1 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 1, i32 -1)
2385 %ret1 = add i32 %res0, %res1
2386 %res2 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 2, i32 -1)
2387 %ret2 = add i32 %ret1, %res2
2388 %res3 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 3, i32 -1)
2389 %ret3 = add i32 %ret2, %res3
2390 %res4 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 4, i32 -1)
2391 %ret4 = add i32 %ret3, %res4
2392 %res5 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 5, i32 -1)
2393 %ret5 = add i32 %ret4, %res5
2394 %res6 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 6, i32 -1)
2395 %ret6 = add i32 %ret5, %res6
2396 %res7 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 7, i32 -1)
2397 %ret7 = add i32 %ret6, %res7
2401 define i32 @test_mask_ucmp_w_512(<32 x i16> %a0, <32 x i16> %a1, i32 %mask) nounwind {
2402 ; X86-LABEL: test_mask_ucmp_w_512:
2404 ; X86-NEXT: pushl %esi # encoding: [0x56]
2405 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x08]
2406 ; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
2407 ; X86-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x75,0xc1]
2408 ; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
2409 ; X86-NEXT: vpcmpltuw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3e,0xc1,0x01]
2410 ; X86-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0]
2411 ; X86-NEXT: addl %eax, %edx # encoding: [0x01,0xc2]
2412 ; X86-NEXT: vpcmpleuw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3e,0xc1,0x02]
2413 ; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
2414 ; X86-NEXT: vpcmpneqw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3f,0xc1,0x04]
2415 ; X86-NEXT: kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0]
2416 ; X86-NEXT: addl %eax, %esi # encoding: [0x01,0xc6]
2417 ; X86-NEXT: addl %edx, %esi # encoding: [0x01,0xd6]
2418 ; X86-NEXT: vpcmpnltuw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3e,0xc1,0x05]
2419 ; X86-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0]
2420 ; X86-NEXT: vpcmpnleuw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3e,0xc1,0x06]
2421 ; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
2422 ; X86-NEXT: addl %edx, %eax # encoding: [0x01,0xd0]
2423 ; X86-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8]
2424 ; X86-NEXT: addl %esi, %eax # encoding: [0x01,0xf0]
2425 ; X86-NEXT: popl %esi # encoding: [0x5e]
2426 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
2427 ; X86-NEXT: retl # encoding: [0xc3]
2429 ; X64-LABEL: test_mask_ucmp_w_512:
2431 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2432 ; X64-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x75,0xc1]
2433 ; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
2434 ; X64-NEXT: vpcmpltuw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3e,0xc1,0x01]
2435 ; X64-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
2436 ; X64-NEXT: addl %eax, %ecx # encoding: [0x01,0xc1]
2437 ; X64-NEXT: vpcmpleuw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3e,0xc1,0x02]
2438 ; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
2439 ; X64-NEXT: vpcmpneqw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3f,0xc1,0x04]
2440 ; X64-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0]
2441 ; X64-NEXT: addl %eax, %edx # encoding: [0x01,0xc2]
2442 ; X64-NEXT: addl %ecx, %edx # encoding: [0x01,0xca]
2443 ; X64-NEXT: vpcmpnltuw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3e,0xc1,0x05]
2444 ; X64-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
2445 ; X64-NEXT: vpcmpnleuw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3e,0xc1,0x06]
2446 ; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
2447 ; X64-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8]
2448 ; X64-NEXT: addl %edi, %eax # encoding: [0x01,0xf8]
2449 ; X64-NEXT: addl %edx, %eax # encoding: [0x01,0xd0]
2450 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
2451 ; X64-NEXT: retq # encoding: [0xc3]
2452 %res0 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 %mask)
2453 %res1 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 1, i32 %mask)
2454 %ret1 = add i32 %res0, %res1
2455 %res2 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 2, i32 %mask)
2456 %ret2 = add i32 %ret1, %res2
2457 %res3 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 3, i32 %mask)
2458 %ret3 = add i32 %ret2, %res3
2459 %res4 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 4, i32 %mask)
2460 %ret4 = add i32 %ret3, %res4
2461 %res5 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 5, i32 %mask)
2462 %ret5 = add i32 %ret4, %res5
2463 %res6 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 6, i32 %mask)
2464 %ret6 = add i32 %ret5, %res6
2465 %res7 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 7, i32 %mask)
2466 %ret7 = add i32 %ret6, %res7
2470 declare i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16>, <32 x i16>, i32, i32) nounwind readnone
2473 declare <64 x i8> @llvm.x86.avx512.mask.pavg.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
2475 define <64 x i8> @mm512_avg_epu8(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) nounwind {
2476 ; CHECK-LABEL: mm512_avg_epu8:
2478 ; CHECK-NEXT: vpavgb %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xe0,0xc1]
2479 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
2480 %res = call <64 x i8> @llvm.x86.avx512.mask.pavg.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
2484 define <64 x i8> @mm512_mask_avg_epu8(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) nounwind {
2485 ; X86-LABEL: mm512_mask_avg_epu8:
2487 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
2488 ; X86-NEXT: vpavgb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe0,0xd1]
2489 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
2490 ; X86-NEXT: retl # encoding: [0xc3]
2492 ; X64-LABEL: mm512_mask_avg_epu8:
2494 ; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
2495 ; X64-NEXT: vpavgb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe0,0xd1]
2496 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
2497 ; X64-NEXT: retq # encoding: [0xc3]
2498 %res = call <64 x i8> @llvm.x86.avx512.mask.pavg.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
2502 declare <32 x i16> @llvm.x86.avx512.mask.pavg.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
2504 define <32 x i16> @mm512_avg_epu16(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) nounwind {
2505 ; CHECK-LABEL: mm512_avg_epu16:
2507 ; CHECK-NEXT: vpavgw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xe3,0xc1]
2508 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
2509 %res = call <32 x i16> @llvm.x86.avx512.mask.pavg.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
2513 define <32 x i16> @mm512_mask_avg_epu16(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) nounwind {
2514 ; X86-LABEL: mm512_mask_avg_epu16:
2516 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
2517 ; X86-NEXT: vpavgw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe3,0xd1]
2518 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
2519 ; X86-NEXT: retl # encoding: [0xc3]
2521 ; X64-LABEL: mm512_mask_avg_epu16:
2523 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2524 ; X64-NEXT: vpavgw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe3,0xd1]
2525 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
2526 ; X64-NEXT: retq # encoding: [0xc3]
2527 %res = call <32 x i16> @llvm.x86.avx512.mask.pavg.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
2531 declare <32 x i16> @llvm.x86.avx512.mask.pabs.w.512(<32 x i16>, <32 x i16>, i32)
2533 define <32 x i16> @test_int_x86_avx512_pabs_w_512(<32 x i16> %x0, <32 x i16> %x1) nounwind {
2534 ; CHECK-LABEL: test_int_x86_avx512_pabs_w_512:
2536 ; CHECK-NEXT: vpabsw %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x1d,0xc0]
2537 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
2538 %res = call <32 x i16> @llvm.x86.avx512.mask.pabs.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 -1)
2542 define <32 x i16> @test_int_x86_avx512_mask_pabs_w_512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2) nounwind {
2543 ; X86-LABEL: test_int_x86_avx512_mask_pabs_w_512:
2545 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
2546 ; X86-NEXT: vpabsw %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x1d,0xc8]
2547 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
2548 ; X86-NEXT: retl # encoding: [0xc3]
2550 ; X64-LABEL: test_int_x86_avx512_mask_pabs_w_512:
2552 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2553 ; X64-NEXT: vpabsw %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x1d,0xc8]
2554 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
2555 ; X64-NEXT: retq # encoding: [0xc3]
2556 %res = call <32 x i16> @llvm.x86.avx512.mask.pabs.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2)
2560 declare <64 x i8> @llvm.x86.avx512.mask.pabs.b.512(<64 x i8>, <64 x i8>, i64)
2562 define <64 x i8> @test_int_x86_avx512_pabs_b_512(<64 x i8> %x0, <64 x i8> %x1) nounwind {
2563 ; CHECK-LABEL: test_int_x86_avx512_pabs_b_512:
2565 ; CHECK-NEXT: vpabsb %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x1c,0xc0]
2566 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
2567 %res = call <64 x i8> @llvm.x86.avx512.mask.pabs.b.512(<64 x i8> %x0, <64 x i8> %x1, i64 -1)
2571 define <64 x i8> @test_int_x86_avx512_mask_pabs_b_512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2) nounwind {
2572 ; X86-LABEL: test_int_x86_avx512_mask_pabs_b_512:
2574 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
2575 ; X86-NEXT: vpabsb %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x1c,0xc8]
2576 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
2577 ; X86-NEXT: retl # encoding: [0xc3]
2579 ; X64-LABEL: test_int_x86_avx512_mask_pabs_b_512:
2581 ; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
2582 ; X64-NEXT: vpabsb %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x1c,0xc8]
2583 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
2584 ; X64-NEXT: retq # encoding: [0xc3]
2585 %res = call <64 x i8> @llvm.x86.avx512.mask.pabs.b.512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2)
2589 declare i64 @llvm.x86.avx512.ptestm.b.512(<64 x i8>, <64 x i8>, i64)
2591 define i64 @test_int_x86_avx512_ptestm_b_512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2) nounwind {
2592 ; X86-LABEL: test_int_x86_avx512_ptestm_b_512:
2594 ; X86-NEXT: pushl %esi # encoding: [0x56]
2595 ; X86-NEXT: vptestmb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf2,0x7d,0x48,0x26,0xc1]
2596 ; X86-NEXT: kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20]
2597 ; X86-NEXT: kmovd %k1, %ecx # encoding: [0xc5,0xfb,0x93,0xc9]
2598 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx # encoding: [0x8b,0x54,0x24,0x0c]
2599 ; X86-NEXT: andl %ecx, %edx # encoding: [0x21,0xca]
2600 ; X86-NEXT: kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0]
2601 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
2602 ; X86-NEXT: andl %esi, %eax # encoding: [0x21,0xf0]
2603 ; X86-NEXT: addl %esi, %eax # encoding: [0x01,0xf0]
2604 ; X86-NEXT: adcl %ecx, %edx # encoding: [0x11,0xca]
2605 ; X86-NEXT: popl %esi # encoding: [0x5e]
2606 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
2607 ; X86-NEXT: retl # encoding: [0xc3]
2609 ; X64-LABEL: test_int_x86_avx512_ptestm_b_512:
2611 ; X64-NEXT: vptestmb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf2,0x7d,0x48,0x26,0xc1]
2612 ; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0]
2613 ; X64-NEXT: andq %rax, %rdi # encoding: [0x48,0x21,0xc7]
2614 ; X64-NEXT: addq %rdi, %rax # encoding: [0x48,0x01,0xf8]
2615 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
2616 ; X64-NEXT: retq # encoding: [0xc3]
2617 %res = call i64 @llvm.x86.avx512.ptestm.b.512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2)
2618 %res1 = call i64 @llvm.x86.avx512.ptestm.b.512(<64 x i8> %x0, <64 x i8> %x1, i64-1)
2619 %res2 = add i64 %res, %res1
2623 declare i32 @llvm.x86.avx512.ptestm.w.512(<32 x i16>, <32 x i16>, i32)
2625 define i32 @test_int_x86_avx512_ptestm_w_512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2) nounwind {
2626 ; X86-LABEL: test_int_x86_avx512_ptestm_w_512:
2628 ; X86-NEXT: vptestmw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x48,0x26,0xc1]
2629 ; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
2630 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2631 ; X86-NEXT: andl %ecx, %eax # encoding: [0x21,0xc8]
2632 ; X86-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8]
2633 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
2634 ; X86-NEXT: retl # encoding: [0xc3]
2636 ; X64-LABEL: test_int_x86_avx512_ptestm_w_512:
2638 ; X64-NEXT: vptestmw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x48,0x26,0xc1]
2639 ; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
2640 ; X64-NEXT: andl %eax, %edi # encoding: [0x21,0xc7]
2641 ; X64-NEXT: addl %edi, %eax # encoding: [0x01,0xf8]
2642 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
2643 ; X64-NEXT: retq # encoding: [0xc3]
2644 %res = call i32 @llvm.x86.avx512.ptestm.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2)
2645 %res1 = call i32 @llvm.x86.avx512.ptestm.w.512(<32 x i16> %x0, <32 x i16> %x1, i32-1)
2646 %res2 = add i32 %res, %res1
2650 declare i64 @llvm.x86.avx512.ptestnm.b.512(<64 x i8>, <64 x i8>, i64 %x2)
2652 define i64 @test_int_x86_avx512_ptestnm_b_512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2) nounwind {
2653 ; X86-LABEL: test_int_x86_avx512_ptestnm_b_512:
2655 ; X86-NEXT: pushl %esi # encoding: [0x56]
2656 ; X86-NEXT: vptestnmb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf2,0x7e,0x48,0x26,0xc1]
2657 ; X86-NEXT: kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20]
2658 ; X86-NEXT: kmovd %k1, %ecx # encoding: [0xc5,0xfb,0x93,0xc9]
2659 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx # encoding: [0x8b,0x54,0x24,0x0c]
2660 ; X86-NEXT: andl %ecx, %edx # encoding: [0x21,0xca]
2661 ; X86-NEXT: kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0]
2662 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
2663 ; X86-NEXT: andl %esi, %eax # encoding: [0x21,0xf0]
2664 ; X86-NEXT: addl %esi, %eax # encoding: [0x01,0xf0]
2665 ; X86-NEXT: adcl %ecx, %edx # encoding: [0x11,0xca]
2666 ; X86-NEXT: popl %esi # encoding: [0x5e]
2667 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
2668 ; X86-NEXT: retl # encoding: [0xc3]
2670 ; X64-LABEL: test_int_x86_avx512_ptestnm_b_512:
2672 ; X64-NEXT: vptestnmb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf2,0x7e,0x48,0x26,0xc1]
2673 ; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0]
2674 ; X64-NEXT: andq %rax, %rdi # encoding: [0x48,0x21,0xc7]
2675 ; X64-NEXT: addq %rdi, %rax # encoding: [0x48,0x01,0xf8]
2676 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
2677 ; X64-NEXT: retq # encoding: [0xc3]
2678 %res = call i64 @llvm.x86.avx512.ptestnm.b.512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2)
2679 %res1 = call i64 @llvm.x86.avx512.ptestnm.b.512(<64 x i8> %x0, <64 x i8> %x1, i64-1)
2680 %res2 = add i64 %res, %res1
2684 declare i32 @llvm.x86.avx512.ptestnm.w.512(<32 x i16>, <32 x i16>, i32 %x2)
2686 define i32 @test_int_x86_avx512_ptestnm_w_512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2) nounwind {
2687 ; X86-LABEL: test_int_x86_avx512_ptestnm_w_512:
2689 ; X86-NEXT: vptestnmw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf2,0xfe,0x48,0x26,0xc1]
2690 ; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
2691 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2692 ; X86-NEXT: andl %ecx, %eax # encoding: [0x21,0xc8]
2693 ; X86-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8]
2694 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
2695 ; X86-NEXT: retl # encoding: [0xc3]
2697 ; X64-LABEL: test_int_x86_avx512_ptestnm_w_512:
2699 ; X64-NEXT: vptestnmw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf2,0xfe,0x48,0x26,0xc1]
2700 ; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
2701 ; X64-NEXT: andl %eax, %edi # encoding: [0x21,0xc7]
2702 ; X64-NEXT: addl %edi, %eax # encoding: [0x01,0xf8]
2703 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
2704 ; X64-NEXT: retq # encoding: [0xc3]
2705 %res = call i32 @llvm.x86.avx512.ptestnm.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2)
2706 %res1 = call i32 @llvm.x86.avx512.ptestnm.w.512(<32 x i16> %x0, <32 x i16> %x1, i32-1)
2707 %res2 = add i32 %res, %res1
2711 declare i64 @llvm.x86.avx512.cvtb2mask.512(<64 x i8>)
2713 define i64 @test_int_x86_avx512_cvtb2mask_512(<64 x i8> %x0) nounwind {
2714 ; X86-LABEL: test_int_x86_avx512_cvtb2mask_512:
2716 ; X86-NEXT: vpmovb2m %zmm0, %k0 # encoding: [0x62,0xf2,0x7e,0x48,0x29,0xc0]
2717 ; X86-NEXT: kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20]
2718 ; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
2719 ; X86-NEXT: kmovd %k1, %edx # encoding: [0xc5,0xfb,0x93,0xd1]
2720 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
2721 ; X86-NEXT: retl # encoding: [0xc3]
2723 ; X64-LABEL: test_int_x86_avx512_cvtb2mask_512:
2725 ; X64-NEXT: vpmovb2m %zmm0, %k0 # encoding: [0x62,0xf2,0x7e,0x48,0x29,0xc0]
2726 ; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0]
2727 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
2728 ; X64-NEXT: retq # encoding: [0xc3]
2729 %res = call i64 @llvm.x86.avx512.cvtb2mask.512(<64 x i8> %x0)
2733 declare i32 @llvm.x86.avx512.cvtw2mask.512(<32 x i16>)
2735 define i32 @test_int_x86_avx512_cvtw2mask_512(<32 x i16> %x0) nounwind {
2736 ; CHECK-LABEL: test_int_x86_avx512_cvtw2mask_512:
2738 ; CHECK-NEXT: vpmovw2m %zmm0, %k0 # encoding: [0x62,0xf2,0xfe,0x48,0x29,0xc0]
2739 ; CHECK-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
2740 ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
2741 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
2742 %res = call i32 @llvm.x86.avx512.cvtw2mask.512(<32 x i16> %x0)
2746 declare <32 x i16> @llvm.x86.avx512.mask.pmulhu.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
2748 define <32 x i16> @test_int_x86_avx512_pmulhu_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2) nounwind {
2749 ; CHECK-LABEL: test_int_x86_avx512_pmulhu_w_512:
2751 ; CHECK-NEXT: vpmulhuw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xe4,0xc1]
2752 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
2753 %res = call <32 x i16> @llvm.x86.avx512.mask.pmulhu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
2757 define <32 x i16> @test_int_x86_avx512_mask_pmulhu_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) nounwind {
2758 ; X86-LABEL: test_int_x86_avx512_mask_pmulhu_w_512:
2760 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
2761 ; X86-NEXT: vpmulhuw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe4,0xd1]
2762 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
2763 ; X86-NEXT: retl # encoding: [0xc3]
2765 ; X64-LABEL: test_int_x86_avx512_mask_pmulhu_w_512:
2767 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2768 ; X64-NEXT: vpmulhuw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe4,0xd1]
2769 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
2770 ; X64-NEXT: retq # encoding: [0xc3]
2771 %res = call <32 x i16> @llvm.x86.avx512.mask.pmulhu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
2775 declare <32 x i16> @llvm.x86.avx512.mask.pmulh.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
2777 define <32 x i16> @test_int_x86_avx512_pmulh_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2) nounwind {
2778 ; CHECK-LABEL: test_int_x86_avx512_pmulh_w_512:
2780 ; CHECK-NEXT: vpmulhw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xe5,0xc1]
2781 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
2782 %res = call <32 x i16> @llvm.x86.avx512.mask.pmulh.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
2786 define <32 x i16> @test_int_x86_avx512_mask_pmulh_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) nounwind {
2787 ; X86-LABEL: test_int_x86_avx512_mask_pmulh_w_512:
2789 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
2790 ; X86-NEXT: vpmulhw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe5,0xd1]
2791 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
2792 ; X86-NEXT: retl # encoding: [0xc3]
2794 ; X64-LABEL: test_int_x86_avx512_mask_pmulh_w_512:
2796 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2797 ; X64-NEXT: vpmulhw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe5,0xd1]
2798 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
2799 ; X64-NEXT: retq # encoding: [0xc3]
2800 %res = call <32 x i16> @llvm.x86.avx512.mask.pmulh.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
2804 declare <32 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
2806 define <32 x i16> @test_int_x86_avx512_pmulhr_sw_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2) nounwind {
2807 ; CHECK-LABEL: test_int_x86_avx512_pmulhr_sw_512:
2809 ; CHECK-NEXT: vpmulhrsw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x0b,0xc1]
2810 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
2811 %res = call <32 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
2815 define <32 x i16> @test_int_x86_avx512_mask_pmulhr_sw_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) nounwind {
2816 ; X86-LABEL: test_int_x86_avx512_mask_pmulhr_sw_512:
2818 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
2819 ; X86-NEXT: vpmulhrsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x0b,0xd1]
2820 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
2821 ; X86-NEXT: retl # encoding: [0xc3]
2823 ; X64-LABEL: test_int_x86_avx512_mask_pmulhr_sw_512:
2825 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2826 ; X64-NEXT: vpmulhrsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x0b,0xd1]
2827 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
2828 ; X64-NEXT: retq # encoding: [0xc3]
2829 %res = call <32 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
2833 declare <32 x i16> @llvm.x86.avx512.mask.pmaddubs.w.512(<64 x i8>, <64 x i8>, <32 x i16>, i32)
2835 define <32 x i16> @test_int_x86_avx512_pmaddubs_w_512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x2) nounwind {
2836 ; CHECK-LABEL: test_int_x86_avx512_pmaddubs_w_512:
2838 ; CHECK-NEXT: vpmaddubsw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x04,0xc1]
2839 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
2840 %res = call <32 x i16> @llvm.x86.avx512.mask.pmaddubs.w.512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x2, i32 -1)
2844 define <32 x i16> @test_int_x86_avx512_mask_pmaddubs_w_512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x2, i32 %x3) nounwind {
2845 ; X86-LABEL: test_int_x86_avx512_mask_pmaddubs_w_512:
2847 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
2848 ; X86-NEXT: vpmaddubsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x04,0xd1]
2849 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
2850 ; X86-NEXT: retl # encoding: [0xc3]
2852 ; X64-LABEL: test_int_x86_avx512_mask_pmaddubs_w_512:
2854 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2855 ; X64-NEXT: vpmaddubsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x04,0xd1]
2856 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
2857 ; X64-NEXT: retq # encoding: [0xc3]
2858 %res = call <32 x i16> @llvm.x86.avx512.mask.pmaddubs.w.512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x2, i32 %x3)
2862 declare <16 x i32> @llvm.x86.avx512.mask.pmaddw.d.512(<32 x i16>, <32 x i16>, <16 x i32>, i16)
2864 define <16 x i32> @test_int_x86_avx512_pmaddw_d_512(<32 x i16> %x0, <32 x i16> %x1, <16 x i32> %x2) nounwind {
2865 ; CHECK-LABEL: test_int_x86_avx512_pmaddw_d_512:
2867 ; CHECK-NEXT: vpmaddwd %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xf5,0xc1]
2868 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
2869 %res = call <16 x i32> @llvm.x86.avx512.mask.pmaddw.d.512(<32 x i16> %x0, <32 x i16> %x1, <16 x i32> %x2, i16 -1)
2873 define <16 x i32> @test_int_x86_avx512_mask_pmaddw_d_512(<32 x i16> %x0, <32 x i16> %x1, <16 x i32> %x2, i16 %x3) nounwind {
2874 ; X86-LABEL: test_int_x86_avx512_mask_pmaddw_d_512:
2876 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2877 ; X86-NEXT: vpmaddwd %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xf5,0xd1]
2878 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
2879 ; X86-NEXT: retl # encoding: [0xc3]
2881 ; X64-LABEL: test_int_x86_avx512_mask_pmaddw_d_512:
2883 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2884 ; X64-NEXT: vpmaddwd %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xf5,0xd1]
2885 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
2886 ; X64-NEXT: retq # encoding: [0xc3]
2887 %res = call <16 x i32> @llvm.x86.avx512.mask.pmaddw.d.512(<32 x i16> %x0, <32 x i16> %x1, <16 x i32> %x2, i16 %x3)
2891 declare <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
2893 define <32 x i16> @test_int_x86_avx512_permvar_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2) nounwind {
2894 ; CHECK-LABEL: test_int_x86_avx512_permvar_hi_512:
2896 ; CHECK-NEXT: vpermw %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0x8d,0xc0]
2897 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
2898 %res = call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
2902 define <32 x i16> @test_int_x86_avx512_mask_permvar_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) nounwind {
2903 ; X86-LABEL: test_int_x86_avx512_mask_permvar_hi_512:
2905 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
2906 ; X86-NEXT: vpermw %zmm0, %zmm1, %zmm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x8d,0xd0]
2907 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
2908 ; X86-NEXT: retl # encoding: [0xc3]
2910 ; X64-LABEL: test_int_x86_avx512_mask_permvar_hi_512:
2912 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2913 ; X64-NEXT: vpermw %zmm0, %zmm1, %zmm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x8d,0xd0]
2914 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
2915 ; X64-NEXT: retq # encoding: [0xc3]
2916 %res = call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
2920 define <32 x i16> @test_int_x86_avx512_maskz_permvar_hi_512(<32 x i16> %x0, <32 x i16> %x1, i32 %x3) nounwind {
2921 ; X86-LABEL: test_int_x86_avx512_maskz_permvar_hi_512:
2923 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
2924 ; X86-NEXT: vpermw %zmm0, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x8d,0xc0]
2925 ; X86-NEXT: retl # encoding: [0xc3]
2927 ; X64-LABEL: test_int_x86_avx512_maskz_permvar_hi_512:
2929 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2930 ; X64-NEXT: vpermw %zmm0, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x8d,0xc0]
2931 ; X64-NEXT: retq # encoding: [0xc3]
2932 %res = call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3)
2936 declare <32 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
2938 define <32 x i16> @test_int_x86_avx512_vpermt2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2) nounwind {
2939 ; CHECK-LABEL: test_int_x86_avx512_vpermt2var_hi_512:
2941 ; CHECK-NEXT: vpermi2w %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0x75,0xc2]
2942 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
2943 %res = call <32 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
2947 define <32 x i16> @test_int_x86_avx512_mask_vpermt2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) nounwind {
2948 ; X86-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_512:
2950 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
2951 ; X86-NEXT: vpermt2w %zmm2, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x7d,0xca]
2952 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
2953 ; X86-NEXT: retl # encoding: [0xc3]
2955 ; X64-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_512:
2957 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2958 ; X64-NEXT: vpermt2w %zmm2, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x7d,0xca]
2959 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
2960 ; X64-NEXT: retq # encoding: [0xc3]
2961 %res = call <32 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
2965 declare <32 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
2967 define <32 x i16> @test_int_x86_avx512_maskz_vpermt2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) nounwind {
2968 ; X86-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_512:
2970 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
2971 ; X86-NEXT: vpermi2w %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x75,0xc2]
2972 ; X86-NEXT: retl # encoding: [0xc3]
2974 ; X64-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_512:
2976 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2977 ; X64-NEXT: vpermi2w %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x75,0xc2]
2978 ; X64-NEXT: retq # encoding: [0xc3]
2979 %res = call <32 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
2983 declare <32 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
2985 define <32 x i16> @test_int_x86_avx512_vpermi2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2) nounwind {
2986 ; CHECK-LABEL: test_int_x86_avx512_vpermi2var_hi_512:
2988 ; CHECK-NEXT: vpermt2w %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0x7d,0xc2]
2989 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
2990 %res = call <32 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
2994 define <32 x i16> @test_int_x86_avx512_mask_vpermi2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) nounwind {
2995 ; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_512:
2997 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
2998 ; X86-NEXT: vpermi2w %zmm2, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x75,0xca]
2999 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
3000 ; X86-NEXT: retl # encoding: [0xc3]
3002 ; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_512:
3004 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3005 ; X64-NEXT: vpermi2w %zmm2, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x75,0xca]
3006 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
3007 ; X64-NEXT: retq # encoding: [0xc3]
3008 %res = call <32 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
3012 declare <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8>, <64 x i8>, i32, <32 x i16>, i32)
3014 define { <32 x i16>, <32 x i16>, <32 x i16> } @test_int_x86_avx512_mask_dbpsadbw_512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x3, i32 %x4) nounwind {
3015 ; X86-LABEL: test_int_x86_avx512_mask_dbpsadbw_512:
3017 ; X86-NEXT: vmovdqa64 %zmm2, %zmm4 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xe2]
3018 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
3019 ; X86-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm4 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x42,0xe1,0x02]
3020 ; X86-NEXT: vdbpsadbw $3, %zmm1, %zmm0, %zmm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xc9,0x42,0xd9,0x03]
3021 ; X86-NEXT: vdbpsadbw $4, %zmm1, %zmm0, %zmm2 # encoding: [0x62,0xf3,0x7d,0x48,0x42,0xd1,0x04]
3022 ; X86-NEXT: vmovdqa64 %zmm4, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc4]
3023 ; X86-NEXT: vmovdqa64 %zmm3, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcb]
3024 ; X86-NEXT: retl # encoding: [0xc3]
3026 ; X64-LABEL: test_int_x86_avx512_mask_dbpsadbw_512:
3028 ; X64-NEXT: vmovdqa64 %zmm2, %zmm4 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xe2]
3029 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3030 ; X64-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm4 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x42,0xe1,0x02]
3031 ; X64-NEXT: vdbpsadbw $3, %zmm1, %zmm0, %zmm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xc9,0x42,0xd9,0x03]
3032 ; X64-NEXT: vdbpsadbw $4, %zmm1, %zmm0, %zmm2 # encoding: [0x62,0xf3,0x7d,0x48,0x42,0xd1,0x04]
3033 ; X64-NEXT: vmovdqa64 %zmm4, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc4]
3034 ; X64-NEXT: vmovdqa64 %zmm3, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcb]
3035 ; X64-NEXT: retq # encoding: [0xc3]
3036 %res = call <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <32 x i16> %x3, i32 %x4)
3037 %res1 = call <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 3, <32 x i16> zeroinitializer, i32 %x4)
3038 %res2 = call <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 4, <32 x i16> %x3, i32 -1)
3039 %res3 = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } poison, <32 x i16> %res, 0
3040 %res4 = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } %res3, <32 x i16> %res1, 1
3041 %res5 = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } %res4, <32 x i16> %res2, 2
3042 ret { <32 x i16>, <32 x i16>, <32 x i16> } %res5
3045 define <32 x i16> @test_mask_adds_epu16_rr_512(<32 x i16> %a, <32 x i16> %b) nounwind {
3046 ; CHECK-LABEL: test_mask_adds_epu16_rr_512:
3048 ; CHECK-NEXT: vpaddusw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xdd,0xc1]
3049 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3050 %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
3054 define <32 x i16> @test_mask_adds_epu16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) nounwind {
3055 ; X86-LABEL: test_mask_adds_epu16_rrk_512:
3057 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
3058 ; X86-NEXT: vpaddusw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xdd,0xd1]
3059 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
3060 ; X86-NEXT: retl # encoding: [0xc3]
3062 ; X64-LABEL: test_mask_adds_epu16_rrk_512:
3064 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3065 ; X64-NEXT: vpaddusw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xdd,0xd1]
3066 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
3067 ; X64-NEXT: retq # encoding: [0xc3]
3068 %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
3072 define <32 x i16> @test_mask_adds_epu16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) nounwind {
3073 ; X86-LABEL: test_mask_adds_epu16_rrkz_512:
3075 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
3076 ; X86-NEXT: vpaddusw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xdd,0xc1]
3077 ; X86-NEXT: retl # encoding: [0xc3]
3079 ; X64-LABEL: test_mask_adds_epu16_rrkz_512:
3081 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3082 ; X64-NEXT: vpaddusw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xdd,0xc1]
3083 ; X64-NEXT: retq # encoding: [0xc3]
3084 %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
3088 define <32 x i16> @test_mask_adds_epu16_rm_512(<32 x i16> %a, ptr %ptr_b) nounwind {
3089 ; X86-LABEL: test_mask_adds_epu16_rm_512:
3091 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3092 ; X86-NEXT: vpaddusw (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xdd,0x00]
3093 ; X86-NEXT: retl # encoding: [0xc3]
3095 ; X64-LABEL: test_mask_adds_epu16_rm_512:
3097 ; X64-NEXT: vpaddusw (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xdd,0x07]
3098 ; X64-NEXT: retq # encoding: [0xc3]
3099 %b = load <32 x i16>, ptr %ptr_b
3100 %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
3104 define <32 x i16> @test_mask_adds_epu16_rmk_512(<32 x i16> %a, ptr %ptr_b, <32 x i16> %passThru, i32 %mask) nounwind {
3105 ; X86-LABEL: test_mask_adds_epu16_rmk_512:
3107 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3108 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
3109 ; X86-NEXT: vpaddusw (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xdd,0x08]
3110 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
3111 ; X86-NEXT: retl # encoding: [0xc3]
3113 ; X64-LABEL: test_mask_adds_epu16_rmk_512:
3115 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
3116 ; X64-NEXT: vpaddusw (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xdd,0x0f]
3117 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
3118 ; X64-NEXT: retq # encoding: [0xc3]
3119 %b = load <32 x i16>, ptr %ptr_b
3120 %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
3124 define <32 x i16> @test_mask_adds_epu16_rmkz_512(<32 x i16> %a, ptr %ptr_b, i32 %mask) nounwind {
3125 ; X86-LABEL: test_mask_adds_epu16_rmkz_512:
3127 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3128 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
3129 ; X86-NEXT: vpaddusw (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xdd,0x00]
3130 ; X86-NEXT: retl # encoding: [0xc3]
3132 ; X64-LABEL: test_mask_adds_epu16_rmkz_512:
3134 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
3135 ; X64-NEXT: vpaddusw (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xdd,0x07]
3136 ; X64-NEXT: retq # encoding: [0xc3]
3137 %b = load <32 x i16>, ptr %ptr_b
3138 %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
3142 declare <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
3144 define <32 x i16> @test_mask_subs_epu16_rr_512(<32 x i16> %a, <32 x i16> %b) nounwind {
3145 ; CHECK-LABEL: test_mask_subs_epu16_rr_512:
3147 ; CHECK-NEXT: vpsubusw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xd9,0xc1]
3148 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3149 %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
3153 define <32 x i16> @test_mask_subs_epu16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) nounwind {
3154 ; X86-LABEL: test_mask_subs_epu16_rrk_512:
3156 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
3157 ; X86-NEXT: vpsubusw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xd9,0xd1]
3158 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
3159 ; X86-NEXT: retl # encoding: [0xc3]
3161 ; X64-LABEL: test_mask_subs_epu16_rrk_512:
3163 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3164 ; X64-NEXT: vpsubusw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xd9,0xd1]
3165 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
3166 ; X64-NEXT: retq # encoding: [0xc3]
3167 %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
3171 define <32 x i16> @test_mask_subs_epu16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) nounwind {
3172 ; X86-LABEL: test_mask_subs_epu16_rrkz_512:
3174 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
3175 ; X86-NEXT: vpsubusw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xd9,0xc1]
3176 ; X86-NEXT: retl # encoding: [0xc3]
3178 ; X64-LABEL: test_mask_subs_epu16_rrkz_512:
3180 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3181 ; X64-NEXT: vpsubusw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xd9,0xc1]
3182 ; X64-NEXT: retq # encoding: [0xc3]
3183 %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
3187 define <32 x i16> @test_mask_subs_epu16_rm_512(<32 x i16> %a, ptr %ptr_b) nounwind {
3188 ; X86-LABEL: test_mask_subs_epu16_rm_512:
3190 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3191 ; X86-NEXT: vpsubusw (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xd9,0x00]
3192 ; X86-NEXT: retl # encoding: [0xc3]
3194 ; X64-LABEL: test_mask_subs_epu16_rm_512:
3196 ; X64-NEXT: vpsubusw (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xd9,0x07]
3197 ; X64-NEXT: retq # encoding: [0xc3]
3198 %b = load <32 x i16>, ptr %ptr_b
3199 %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
3203 define <32 x i16> @test_mask_subs_epu16_rmk_512(<32 x i16> %a, ptr %ptr_b, <32 x i16> %passThru, i32 %mask) nounwind {
3204 ; X86-LABEL: test_mask_subs_epu16_rmk_512:
3206 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3207 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
3208 ; X86-NEXT: vpsubusw (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xd9,0x08]
3209 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
3210 ; X86-NEXT: retl # encoding: [0xc3]
3212 ; X64-LABEL: test_mask_subs_epu16_rmk_512:
3214 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
3215 ; X64-NEXT: vpsubusw (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xd9,0x0f]
3216 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
3217 ; X64-NEXT: retq # encoding: [0xc3]
3218 %b = load <32 x i16>, ptr %ptr_b
3219 %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
3223 define <32 x i16> @test_mask_subs_epu16_rmkz_512(<32 x i16> %a, ptr %ptr_b, i32 %mask) nounwind {
3224 ; X86-LABEL: test_mask_subs_epu16_rmkz_512:
3226 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3227 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
3228 ; X86-NEXT: vpsubusw (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xd9,0x00]
3229 ; X86-NEXT: retl # encoding: [0xc3]
3231 ; X64-LABEL: test_mask_subs_epu16_rmkz_512:
3233 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
3234 ; X64-NEXT: vpsubusw (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xd9,0x07]
3235 ; X64-NEXT: retq # encoding: [0xc3]
3236 %b = load <32 x i16>, ptr %ptr_b
3237 %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
3241 declare <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
3243 define <64 x i8> @test_mask_adds_epu8_rr_512(<64 x i8> %a, <64 x i8> %b) nounwind {
3244 ; CHECK-LABEL: test_mask_adds_epu8_rr_512:
3246 ; CHECK-NEXT: vpaddusb %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xdc,0xc1]
3247 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3248 %res = call <64 x i8> @llvm.x86.avx512.mask.paddus.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> zeroinitializer, i64 -1)
3252 define <64 x i8> @test_mask_adds_epu8_rrk_512(<64 x i8> %a, <64 x i8> %b, <64 x i8> %passThru, i64 %mask) nounwind {
3253 ; X86-LABEL: test_mask_adds_epu8_rrk_512:
3255 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
3256 ; X86-NEXT: vpaddusb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xdc,0xd1]
3257 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
3258 ; X86-NEXT: retl # encoding: [0xc3]
3260 ; X64-LABEL: test_mask_adds_epu8_rrk_512:
3262 ; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
3263 ; X64-NEXT: vpaddusb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xdc,0xd1]
3264 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
3265 ; X64-NEXT: retq # encoding: [0xc3]
3266 %res = call <64 x i8> @llvm.x86.avx512.mask.paddus.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> %passThru, i64 %mask)
3270 define <64 x i8> @test_mask_adds_epu8_rrkz_512(<64 x i8> %a, <64 x i8> %b, i64 %mask) nounwind {
3271 ; X86-LABEL: test_mask_adds_epu8_rrkz_512:
3273 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
3274 ; X86-NEXT: vpaddusb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xdc,0xc1]
3275 ; X86-NEXT: retl # encoding: [0xc3]
3277 ; X64-LABEL: test_mask_adds_epu8_rrkz_512:
3279 ; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
3280 ; X64-NEXT: vpaddusb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xdc,0xc1]
3281 ; X64-NEXT: retq # encoding: [0xc3]
3282 %res = call <64 x i8> @llvm.x86.avx512.mask.paddus.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> zeroinitializer, i64 %mask)
3286 define <64 x i8> @test_mask_adds_epu8_rm_512(<64 x i8> %a, ptr %ptr_b) nounwind {
3287 ; X86-LABEL: test_mask_adds_epu8_rm_512:
3289 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3290 ; X86-NEXT: vpaddusb (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xdc,0x00]
3291 ; X86-NEXT: retl # encoding: [0xc3]
3293 ; X64-LABEL: test_mask_adds_epu8_rm_512:
3295 ; X64-NEXT: vpaddusb (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xdc,0x07]
3296 ; X64-NEXT: retq # encoding: [0xc3]
3297 %b = load <64 x i8>, ptr %ptr_b
3298 %res = call <64 x i8> @llvm.x86.avx512.mask.paddus.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> zeroinitializer, i64 -1)
3302 define <64 x i8> @test_mask_adds_epu8_rmk_512(<64 x i8> %a, ptr %ptr_b, <64 x i8> %passThru, i64 %mask) nounwind {
3303 ; X86-LABEL: test_mask_adds_epu8_rmk_512:
3305 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3306 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08]
3307 ; X86-NEXT: vpaddusb (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xdc,0x08]
3308 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
3309 ; X86-NEXT: retl # encoding: [0xc3]
3311 ; X64-LABEL: test_mask_adds_epu8_rmk_512:
3313 ; X64-NEXT: kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce]
3314 ; X64-NEXT: vpaddusb (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xdc,0x0f]
3315 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
3316 ; X64-NEXT: retq # encoding: [0xc3]
3317 %b = load <64 x i8>, ptr %ptr_b
3318 %res = call <64 x i8> @llvm.x86.avx512.mask.paddus.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> %passThru, i64 %mask)
3322 define <64 x i8> @test_mask_adds_epu8_rmkz_512(<64 x i8> %a, ptr %ptr_b, i64 %mask) nounwind {
3323 ; X86-LABEL: test_mask_adds_epu8_rmkz_512:
3325 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3326 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08]
3327 ; X86-NEXT: vpaddusb (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xdc,0x00]
3328 ; X86-NEXT: retl # encoding: [0xc3]
3330 ; X64-LABEL: test_mask_adds_epu8_rmkz_512:
3332 ; X64-NEXT: kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce]
3333 ; X64-NEXT: vpaddusb (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xdc,0x07]
3334 ; X64-NEXT: retq # encoding: [0xc3]
3335 %b = load <64 x i8>, ptr %ptr_b
3336 %res = call <64 x i8> @llvm.x86.avx512.mask.paddus.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> zeroinitializer, i64 %mask)
3340 declare <64 x i8> @llvm.x86.avx512.mask.paddus.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
3342 define <64 x i8> @test_mask_subs_epu8_rr_512(<64 x i8> %a, <64 x i8> %b) nounwind {
3343 ; CHECK-LABEL: test_mask_subs_epu8_rr_512:
3345 ; CHECK-NEXT: vpsubusb %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xd8,0xc1]
3346 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3347 %res = call <64 x i8> @llvm.x86.avx512.mask.psubus.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> zeroinitializer, i64 -1)
3351 define <64 x i8> @test_mask_subs_epu8_rrk_512(<64 x i8> %a, <64 x i8> %b, <64 x i8> %passThru, i64 %mask) nounwind {
3352 ; X86-LABEL: test_mask_subs_epu8_rrk_512:
3354 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
3355 ; X86-NEXT: vpsubusb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xd8,0xd1]
3356 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
3357 ; X86-NEXT: retl # encoding: [0xc3]
3359 ; X64-LABEL: test_mask_subs_epu8_rrk_512:
3361 ; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
3362 ; X64-NEXT: vpsubusb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xd8,0xd1]
3363 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
3364 ; X64-NEXT: retq # encoding: [0xc3]
3365 %res = call <64 x i8> @llvm.x86.avx512.mask.psubus.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> %passThru, i64 %mask)
3369 define <64 x i8> @test_mask_subs_epu8_rrkz_512(<64 x i8> %a, <64 x i8> %b, i64 %mask) nounwind {
3370 ; X86-LABEL: test_mask_subs_epu8_rrkz_512:
3372 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
3373 ; X86-NEXT: vpsubusb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xd8,0xc1]
3374 ; X86-NEXT: retl # encoding: [0xc3]
3376 ; X64-LABEL: test_mask_subs_epu8_rrkz_512:
3378 ; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
3379 ; X64-NEXT: vpsubusb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xd8,0xc1]
3380 ; X64-NEXT: retq # encoding: [0xc3]
3381 %res = call <64 x i8> @llvm.x86.avx512.mask.psubus.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> zeroinitializer, i64 %mask)
3385 define <64 x i8> @test_mask_subs_epu8_rm_512(<64 x i8> %a, ptr %ptr_b) nounwind {
3386 ; X86-LABEL: test_mask_subs_epu8_rm_512:
3388 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3389 ; X86-NEXT: vpsubusb (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xd8,0x00]
3390 ; X86-NEXT: retl # encoding: [0xc3]
3392 ; X64-LABEL: test_mask_subs_epu8_rm_512:
3394 ; X64-NEXT: vpsubusb (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xd8,0x07]
3395 ; X64-NEXT: retq # encoding: [0xc3]
3396 %b = load <64 x i8>, ptr %ptr_b
3397 %res = call <64 x i8> @llvm.x86.avx512.mask.psubus.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> zeroinitializer, i64 -1)
3401 define <64 x i8> @test_mask_subs_epu8_rmk_512(<64 x i8> %a, ptr %ptr_b, <64 x i8> %passThru, i64 %mask) nounwind {
3402 ; X86-LABEL: test_mask_subs_epu8_rmk_512:
3404 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3405 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08]
3406 ; X86-NEXT: vpsubusb (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xd8,0x08]
3407 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
3408 ; X86-NEXT: retl # encoding: [0xc3]
3410 ; X64-LABEL: test_mask_subs_epu8_rmk_512:
3412 ; X64-NEXT: kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce]
3413 ; X64-NEXT: vpsubusb (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xd8,0x0f]
3414 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
3415 ; X64-NEXT: retq # encoding: [0xc3]
3416 %b = load <64 x i8>, ptr %ptr_b
3417 %res = call <64 x i8> @llvm.x86.avx512.mask.psubus.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> %passThru, i64 %mask)
3421 define <64 x i8> @test_mask_subs_epu8_rmkz_512(<64 x i8> %a, ptr %ptr_b, i64 %mask) nounwind {
3422 ; X86-LABEL: test_mask_subs_epu8_rmkz_512:
3424 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3425 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08]
3426 ; X86-NEXT: vpsubusb (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xd8,0x00]
3427 ; X86-NEXT: retl # encoding: [0xc3]
3429 ; X64-LABEL: test_mask_subs_epu8_rmkz_512:
3431 ; X64-NEXT: kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce]
3432 ; X64-NEXT: vpsubusb (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xd8,0x07]
3433 ; X64-NEXT: retq # encoding: [0xc3]
3434 %b = load <64 x i8>, ptr %ptr_b
3435 %res = call <64 x i8> @llvm.x86.avx512.mask.psubus.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> zeroinitializer, i64 %mask)
3439 declare <64 x i8> @llvm.x86.avx512.mask.psubus.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
3441 define <32 x i16> @test_adds_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) nounwind {
3442 ; CHECK-LABEL: test_adds_epi16_rr_512:
3444 ; CHECK-NEXT: vpaddsw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xed,0xc1]
3445 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3446 %1 = call <32 x i16> @llvm.x86.avx512.padds.w.512(<32 x i16> %a, <32 x i16> %b)
3450 define <32 x i16> @test_adds_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) nounwind {
3451 ; X86-LABEL: test_adds_epi16_rrk_512:
3453 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
3454 ; X86-NEXT: vpaddsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xed,0xd1]
3455 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
3456 ; X86-NEXT: retl # encoding: [0xc3]
3458 ; X64-LABEL: test_adds_epi16_rrk_512:
3460 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3461 ; X64-NEXT: vpaddsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xed,0xd1]
3462 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
3463 ; X64-NEXT: retq # encoding: [0xc3]
3464 %1 = call <32 x i16> @llvm.x86.avx512.padds.w.512(<32 x i16> %a, <32 x i16> %b)
3465 %2 = bitcast i32 %mask to <32 x i1>
3466 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru
3470 define <32 x i16> @test_adds_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) nounwind {
3471 ; X86-LABEL: test_adds_epi16_rrkz_512:
3473 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
3474 ; X86-NEXT: vpaddsw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xed,0xc1]
3475 ; X86-NEXT: retl # encoding: [0xc3]
3477 ; X64-LABEL: test_adds_epi16_rrkz_512:
3479 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3480 ; X64-NEXT: vpaddsw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xed,0xc1]
3481 ; X64-NEXT: retq # encoding: [0xc3]
3482 %1 = call <32 x i16> @llvm.x86.avx512.padds.w.512(<32 x i16> %a, <32 x i16> %b)
3483 %2 = bitcast i32 %mask to <32 x i1>
3484 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer
3488 define <32 x i16> @test_adds_epi16_rm_512(<32 x i16> %a, ptr %ptr_b) nounwind {
3489 ; X86-LABEL: test_adds_epi16_rm_512:
3491 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3492 ; X86-NEXT: vpaddsw (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xed,0x00]
3493 ; X86-NEXT: retl # encoding: [0xc3]
3495 ; X64-LABEL: test_adds_epi16_rm_512:
3497 ; X64-NEXT: vpaddsw (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xed,0x07]
3498 ; X64-NEXT: retq # encoding: [0xc3]
3499 %b = load <32 x i16>, ptr %ptr_b
3500 %1 = call <32 x i16> @llvm.x86.avx512.padds.w.512(<32 x i16> %a, <32 x i16> %b)
3504 define <32 x i16> @test_adds_epi16_rmk_512(<32 x i16> %a, ptr %ptr_b, <32 x i16> %passThru, i32 %mask) nounwind {
3505 ; X86-LABEL: test_adds_epi16_rmk_512:
3507 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3508 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
3509 ; X86-NEXT: vpaddsw (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xed,0x08]
3510 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
3511 ; X86-NEXT: retl # encoding: [0xc3]
3513 ; X64-LABEL: test_adds_epi16_rmk_512:
3515 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
3516 ; X64-NEXT: vpaddsw (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xed,0x0f]
3517 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
3518 ; X64-NEXT: retq # encoding: [0xc3]
3519 %b = load <32 x i16>, ptr %ptr_b
3520 %1 = call <32 x i16> @llvm.x86.avx512.padds.w.512(<32 x i16> %a, <32 x i16> %b)
3521 %2 = bitcast i32 %mask to <32 x i1>
3522 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru
3526 define <32 x i16> @test_adds_epi16_rmkz_512(<32 x i16> %a, ptr %ptr_b, i32 %mask) nounwind {
3527 ; X86-LABEL: test_adds_epi16_rmkz_512:
3529 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3530 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
3531 ; X86-NEXT: vpaddsw (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xed,0x00]
3532 ; X86-NEXT: retl # encoding: [0xc3]
3534 ; X64-LABEL: test_adds_epi16_rmkz_512:
3536 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
3537 ; X64-NEXT: vpaddsw (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xed,0x07]
3538 ; X64-NEXT: retq # encoding: [0xc3]
3539 %b = load <32 x i16>, ptr %ptr_b
3540 %1 = call <32 x i16> @llvm.x86.avx512.padds.w.512(<32 x i16> %a, <32 x i16> %b)
3541 %2 = bitcast i32 %mask to <32 x i1>
3542 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer
3546 declare <32 x i16> @llvm.x86.avx512.padds.w.512(<32 x i16>, <32 x i16>)
3548 define <32 x i16> @test_mask_adds_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) nounwind {
3549 ; CHECK-LABEL: test_mask_adds_epi16_rr_512:
3551 ; CHECK-NEXT: vpaddsw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xed,0xc1]
3552 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3553 %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
3557 define <32 x i16> @test_mask_adds_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) nounwind {
3558 ; X86-LABEL: test_mask_adds_epi16_rrk_512:
3560 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
3561 ; X86-NEXT: vpaddsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xed,0xd1]
3562 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
3563 ; X86-NEXT: retl # encoding: [0xc3]
3565 ; X64-LABEL: test_mask_adds_epi16_rrk_512:
3567 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3568 ; X64-NEXT: vpaddsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xed,0xd1]
3569 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
3570 ; X64-NEXT: retq # encoding: [0xc3]
3571 %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
3575 define <32 x i16> @test_mask_adds_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) nounwind {
3576 ; X86-LABEL: test_mask_adds_epi16_rrkz_512:
3578 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
3579 ; X86-NEXT: vpaddsw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xed,0xc1]
3580 ; X86-NEXT: retl # encoding: [0xc3]
3582 ; X64-LABEL: test_mask_adds_epi16_rrkz_512:
3584 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3585 ; X64-NEXT: vpaddsw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xed,0xc1]
3586 ; X64-NEXT: retq # encoding: [0xc3]
3587 %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
3591 define <32 x i16> @test_mask_adds_epi16_rm_512(<32 x i16> %a, ptr %ptr_b) nounwind {
3592 ; X86-LABEL: test_mask_adds_epi16_rm_512:
3594 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3595 ; X86-NEXT: vpaddsw (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xed,0x00]
3596 ; X86-NEXT: retl # encoding: [0xc3]
3598 ; X64-LABEL: test_mask_adds_epi16_rm_512:
3600 ; X64-NEXT: vpaddsw (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xed,0x07]
3601 ; X64-NEXT: retq # encoding: [0xc3]
3602 %b = load <32 x i16>, ptr %ptr_b
3603 %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
3607 define <32 x i16> @test_mask_adds_epi16_rmk_512(<32 x i16> %a, ptr %ptr_b, <32 x i16> %passThru, i32 %mask) nounwind {
3608 ; X86-LABEL: test_mask_adds_epi16_rmk_512:
3610 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3611 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
3612 ; X86-NEXT: vpaddsw (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xed,0x08]
3613 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
3614 ; X86-NEXT: retl # encoding: [0xc3]
3616 ; X64-LABEL: test_mask_adds_epi16_rmk_512:
3618 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
3619 ; X64-NEXT: vpaddsw (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xed,0x0f]
3620 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
3621 ; X64-NEXT: retq # encoding: [0xc3]
3622 %b = load <32 x i16>, ptr %ptr_b
3623 %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
3627 define <32 x i16> @test_mask_adds_epi16_rmkz_512(<32 x i16> %a, ptr %ptr_b, i32 %mask) nounwind {
3628 ; X86-LABEL: test_mask_adds_epi16_rmkz_512:
3630 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3631 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
3632 ; X86-NEXT: vpaddsw (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xed,0x00]
3633 ; X86-NEXT: retl # encoding: [0xc3]
3635 ; X64-LABEL: test_mask_adds_epi16_rmkz_512:
3637 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
3638 ; X64-NEXT: vpaddsw (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xed,0x07]
3639 ; X64-NEXT: retq # encoding: [0xc3]
3640 %b = load <32 x i16>, ptr %ptr_b
3641 %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
3645 declare <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
3647 define <32 x i16> @test_subs_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) nounwind {
3648 ; CHECK-LABEL: test_subs_epi16_rr_512:
3650 ; CHECK-NEXT: vpsubsw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xe9,0xc1]
3651 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3652 %1 = call <32 x i16> @llvm.x86.avx512.psubs.w.512(<32 x i16> %a, <32 x i16> %b)
3656 define <32 x i16> @test_subs_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) nounwind {
3657 ; X86-LABEL: test_subs_epi16_rrk_512:
3659 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
3660 ; X86-NEXT: vpsubsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe9,0xd1]
3661 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
3662 ; X86-NEXT: retl # encoding: [0xc3]
3664 ; X64-LABEL: test_subs_epi16_rrk_512:
3666 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3667 ; X64-NEXT: vpsubsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe9,0xd1]
3668 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
3669 ; X64-NEXT: retq # encoding: [0xc3]
3670 %1 = call <32 x i16> @llvm.x86.avx512.psubs.w.512(<32 x i16> %a, <32 x i16> %b)
3671 %2 = bitcast i32 %mask to <32 x i1>
3672 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru
3676 define <32 x i16> @test_subs_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) nounwind {
3677 ; X86-LABEL: test_subs_epi16_rrkz_512:
3679 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
3680 ; X86-NEXT: vpsubsw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe9,0xc1]
3681 ; X86-NEXT: retl # encoding: [0xc3]
3683 ; X64-LABEL: test_subs_epi16_rrkz_512:
3685 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3686 ; X64-NEXT: vpsubsw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe9,0xc1]
3687 ; X64-NEXT: retq # encoding: [0xc3]
3688 %1 = call <32 x i16> @llvm.x86.avx512.psubs.w.512(<32 x i16> %a, <32 x i16> %b)
3689 %2 = bitcast i32 %mask to <32 x i1>
3690 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer
3694 define <32 x i16> @test_subs_epi16_rm_512(<32 x i16> %a, ptr %ptr_b) nounwind {
3695 ; X86-LABEL: test_subs_epi16_rm_512:
3697 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3698 ; X86-NEXT: vpsubsw (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xe9,0x00]
3699 ; X86-NEXT: retl # encoding: [0xc3]
3701 ; X64-LABEL: test_subs_epi16_rm_512:
3703 ; X64-NEXT: vpsubsw (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xe9,0x07]
3704 ; X64-NEXT: retq # encoding: [0xc3]
3705 %b = load <32 x i16>, ptr %ptr_b
3706 %1 = call <32 x i16> @llvm.x86.avx512.psubs.w.512(<32 x i16> %a, <32 x i16> %b)
3710 define <32 x i16> @test_subs_epi16_rmk_512(<32 x i16> %a, ptr %ptr_b, <32 x i16> %passThru, i32 %mask) nounwind {
3711 ; X86-LABEL: test_subs_epi16_rmk_512:
3713 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3714 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
3715 ; X86-NEXT: vpsubsw (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe9,0x08]
3716 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
3717 ; X86-NEXT: retl # encoding: [0xc3]
3719 ; X64-LABEL: test_subs_epi16_rmk_512:
3721 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
3722 ; X64-NEXT: vpsubsw (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe9,0x0f]
3723 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
3724 ; X64-NEXT: retq # encoding: [0xc3]
3725 %b = load <32 x i16>, ptr %ptr_b
3726 %1 = call <32 x i16> @llvm.x86.avx512.psubs.w.512(<32 x i16> %a, <32 x i16> %b)
3727 %2 = bitcast i32 %mask to <32 x i1>
3728 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru
3732 define <32 x i16> @test_subs_epi16_rmkz_512(<32 x i16> %a, ptr %ptr_b, i32 %mask) nounwind {
3733 ; X86-LABEL: test_subs_epi16_rmkz_512:
3735 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3736 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
3737 ; X86-NEXT: vpsubsw (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe9,0x00]
3738 ; X86-NEXT: retl # encoding: [0xc3]
3740 ; X64-LABEL: test_subs_epi16_rmkz_512:
3742 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
3743 ; X64-NEXT: vpsubsw (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe9,0x07]
3744 ; X64-NEXT: retq # encoding: [0xc3]
3745 %b = load <32 x i16>, ptr %ptr_b
3746 %1 = call <32 x i16> @llvm.x86.avx512.psubs.w.512(<32 x i16> %a, <32 x i16> %b)
3747 %2 = bitcast i32 %mask to <32 x i1>
3748 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer
3752 declare <32 x i16> @llvm.x86.avx512.psubs.w.512(<32 x i16>, <32 x i16>)
3754 define <32 x i16> @test_mask_subs_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) nounwind {
3755 ; CHECK-LABEL: test_mask_subs_epi16_rr_512:
3757 ; CHECK-NEXT: vpsubsw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xe9,0xc1]
3758 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3759 %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
3763 define <32 x i16> @test_mask_subs_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) nounwind {
3764 ; X86-LABEL: test_mask_subs_epi16_rrk_512:
3766 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
3767 ; X86-NEXT: vpsubsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe9,0xd1]
3768 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
3769 ; X86-NEXT: retl # encoding: [0xc3]
3771 ; X64-LABEL: test_mask_subs_epi16_rrk_512:
3773 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3774 ; X64-NEXT: vpsubsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe9,0xd1]
3775 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
3776 ; X64-NEXT: retq # encoding: [0xc3]
3777 %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
3781 define <32 x i16> @test_mask_subs_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) nounwind {
3782 ; X86-LABEL: test_mask_subs_epi16_rrkz_512:
3784 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
3785 ; X86-NEXT: vpsubsw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe9,0xc1]
3786 ; X86-NEXT: retl # encoding: [0xc3]
3788 ; X64-LABEL: test_mask_subs_epi16_rrkz_512:
3790 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3791 ; X64-NEXT: vpsubsw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe9,0xc1]
3792 ; X64-NEXT: retq # encoding: [0xc3]
3793 %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
3797 define <32 x i16> @test_mask_subs_epi16_rm_512(<32 x i16> %a, ptr %ptr_b) nounwind {
3798 ; X86-LABEL: test_mask_subs_epi16_rm_512:
3800 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3801 ; X86-NEXT: vpsubsw (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xe9,0x00]
3802 ; X86-NEXT: retl # encoding: [0xc3]
3804 ; X64-LABEL: test_mask_subs_epi16_rm_512:
3806 ; X64-NEXT: vpsubsw (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xe9,0x07]
3807 ; X64-NEXT: retq # encoding: [0xc3]
3808 %b = load <32 x i16>, ptr %ptr_b
3809 %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
3813 define <32 x i16> @test_mask_subs_epi16_rmk_512(<32 x i16> %a, ptr %ptr_b, <32 x i16> %passThru, i32 %mask) nounwind {
3814 ; X86-LABEL: test_mask_subs_epi16_rmk_512:
3816 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3817 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
3818 ; X86-NEXT: vpsubsw (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe9,0x08]
3819 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
3820 ; X86-NEXT: retl # encoding: [0xc3]
3822 ; X64-LABEL: test_mask_subs_epi16_rmk_512:
3824 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
3825 ; X64-NEXT: vpsubsw (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe9,0x0f]
3826 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
3827 ; X64-NEXT: retq # encoding: [0xc3]
3828 %b = load <32 x i16>, ptr %ptr_b
3829 %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
3833 define <32 x i16> @test_mask_subs_epi16_rmkz_512(<32 x i16> %a, ptr %ptr_b, i32 %mask) nounwind {
3834 ; X86-LABEL: test_mask_subs_epi16_rmkz_512:
3836 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3837 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
3838 ; X86-NEXT: vpsubsw (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe9,0x00]
3839 ; X86-NEXT: retl # encoding: [0xc3]
3841 ; X64-LABEL: test_mask_subs_epi16_rmkz_512:
3843 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
3844 ; X64-NEXT: vpsubsw (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe9,0x07]
3845 ; X64-NEXT: retq # encoding: [0xc3]
3846 %b = load <32 x i16>, ptr %ptr_b
3847 %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
3851 declare <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
3853 define <64 x i8> @test_mask_adds_epi8_rr_512(<64 x i8> %a, <64 x i8> %b) nounwind {
3854 ; CHECK-LABEL: test_mask_adds_epi8_rr_512:
3856 ; CHECK-NEXT: vpaddsb %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xec,0xc1]
3857 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3858 %res = call <64 x i8> @llvm.x86.avx512.mask.padds.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> zeroinitializer, i64 -1)
3862 define <64 x i8> @test_mask_adds_epi8_rrk_512(<64 x i8> %a, <64 x i8> %b, <64 x i8> %passThru, i64 %mask) nounwind {
3863 ; X86-LABEL: test_mask_adds_epi8_rrk_512:
3865 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
3866 ; X86-NEXT: vpaddsb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xec,0xd1]
3867 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
3868 ; X86-NEXT: retl # encoding: [0xc3]
3870 ; X64-LABEL: test_mask_adds_epi8_rrk_512:
3872 ; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
3873 ; X64-NEXT: vpaddsb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xec,0xd1]
3874 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
3875 ; X64-NEXT: retq # encoding: [0xc3]
3876 %res = call <64 x i8> @llvm.x86.avx512.mask.padds.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> %passThru, i64 %mask)
3880 define <64 x i8> @test_mask_adds_epi8_rrkz_512(<64 x i8> %a, <64 x i8> %b, i64 %mask) nounwind {
3881 ; X86-LABEL: test_mask_adds_epi8_rrkz_512:
3883 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
3884 ; X86-NEXT: vpaddsb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xec,0xc1]
3885 ; X86-NEXT: retl # encoding: [0xc3]
3887 ; X64-LABEL: test_mask_adds_epi8_rrkz_512:
3889 ; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
3890 ; X64-NEXT: vpaddsb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xec,0xc1]
3891 ; X64-NEXT: retq # encoding: [0xc3]
3892 %res = call <64 x i8> @llvm.x86.avx512.mask.padds.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> zeroinitializer, i64 %mask)
3896 define <64 x i8> @test_mask_adds_epi8_rm_512(<64 x i8> %a, ptr %ptr_b) nounwind {
3897 ; X86-LABEL: test_mask_adds_epi8_rm_512:
3899 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3900 ; X86-NEXT: vpaddsb (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xec,0x00]
3901 ; X86-NEXT: retl # encoding: [0xc3]
3903 ; X64-LABEL: test_mask_adds_epi8_rm_512:
3905 ; X64-NEXT: vpaddsb (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xec,0x07]
3906 ; X64-NEXT: retq # encoding: [0xc3]
3907 %b = load <64 x i8>, ptr %ptr_b
3908 %res = call <64 x i8> @llvm.x86.avx512.mask.padds.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> zeroinitializer, i64 -1)
3912 define <64 x i8> @test_mask_adds_epi8_rmk_512(<64 x i8> %a, ptr %ptr_b, <64 x i8> %passThru, i64 %mask) nounwind {
3913 ; X86-LABEL: test_mask_adds_epi8_rmk_512:
3915 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3916 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08]
3917 ; X86-NEXT: vpaddsb (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xec,0x08]
3918 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
3919 ; X86-NEXT: retl # encoding: [0xc3]
3921 ; X64-LABEL: test_mask_adds_epi8_rmk_512:
3923 ; X64-NEXT: kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce]
3924 ; X64-NEXT: vpaddsb (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xec,0x0f]
3925 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
3926 ; X64-NEXT: retq # encoding: [0xc3]
3927 %b = load <64 x i8>, ptr %ptr_b
3928 %res = call <64 x i8> @llvm.x86.avx512.mask.padds.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> %passThru, i64 %mask)
3932 define <64 x i8> @test_mask_adds_epi8_rmkz_512(<64 x i8> %a, ptr %ptr_b, i64 %mask) nounwind {
3933 ; X86-LABEL: test_mask_adds_epi8_rmkz_512:
3935 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3936 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08]
3937 ; X86-NEXT: vpaddsb (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xec,0x00]
3938 ; X86-NEXT: retl # encoding: [0xc3]
3940 ; X64-LABEL: test_mask_adds_epi8_rmkz_512:
3942 ; X64-NEXT: kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce]
3943 ; X64-NEXT: vpaddsb (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xec,0x07]
3944 ; X64-NEXT: retq # encoding: [0xc3]
3945 %b = load <64 x i8>, ptr %ptr_b
3946 %res = call <64 x i8> @llvm.x86.avx512.mask.padds.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> zeroinitializer, i64 %mask)
3950 declare <64 x i8> @llvm.x86.avx512.mask.padds.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
3952 define <64 x i8> @test_mask_subs_epi8_rr_512(<64 x i8> %a, <64 x i8> %b) nounwind {
3953 ; CHECK-LABEL: test_mask_subs_epi8_rr_512:
3955 ; CHECK-NEXT: vpsubsb %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xe8,0xc1]
3956 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3957 %res = call <64 x i8> @llvm.x86.avx512.mask.psubs.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> zeroinitializer, i64 -1)
3961 define <64 x i8> @test_mask_subs_epi8_rrk_512(<64 x i8> %a, <64 x i8> %b, <64 x i8> %passThru, i64 %mask) nounwind {
3962 ; X86-LABEL: test_mask_subs_epi8_rrk_512:
3964 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
3965 ; X86-NEXT: vpsubsb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe8,0xd1]
3966 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
3967 ; X86-NEXT: retl # encoding: [0xc3]
3969 ; X64-LABEL: test_mask_subs_epi8_rrk_512:
3971 ; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
3972 ; X64-NEXT: vpsubsb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe8,0xd1]
3973 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
3974 ; X64-NEXT: retq # encoding: [0xc3]
3975 %res = call <64 x i8> @llvm.x86.avx512.mask.psubs.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> %passThru, i64 %mask)
3979 define <64 x i8> @test_mask_subs_epi8_rrkz_512(<64 x i8> %a, <64 x i8> %b, i64 %mask) nounwind {
3980 ; X86-LABEL: test_mask_subs_epi8_rrkz_512:
3982 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
3983 ; X86-NEXT: vpsubsb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe8,0xc1]
3984 ; X86-NEXT: retl # encoding: [0xc3]
3986 ; X64-LABEL: test_mask_subs_epi8_rrkz_512:
3988 ; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
3989 ; X64-NEXT: vpsubsb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe8,0xc1]
3990 ; X64-NEXT: retq # encoding: [0xc3]
3991 %res = call <64 x i8> @llvm.x86.avx512.mask.psubs.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> zeroinitializer, i64 %mask)
3995 define <64 x i8> @test_mask_subs_epi8_rm_512(<64 x i8> %a, ptr %ptr_b) nounwind {
3996 ; X86-LABEL: test_mask_subs_epi8_rm_512:
3998 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3999 ; X86-NEXT: vpsubsb (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xe8,0x00]
4000 ; X86-NEXT: retl # encoding: [0xc3]
4002 ; X64-LABEL: test_mask_subs_epi8_rm_512:
4004 ; X64-NEXT: vpsubsb (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xe8,0x07]
4005 ; X64-NEXT: retq # encoding: [0xc3]
4006 %b = load <64 x i8>, ptr %ptr_b
4007 %res = call <64 x i8> @llvm.x86.avx512.mask.psubs.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> zeroinitializer, i64 -1)
4011 define <64 x i8> @test_mask_subs_epi8_rmk_512(<64 x i8> %a, ptr %ptr_b, <64 x i8> %passThru, i64 %mask) nounwind {
4012 ; X86-LABEL: test_mask_subs_epi8_rmk_512:
4014 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4015 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08]
4016 ; X86-NEXT: vpsubsb (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe8,0x08]
4017 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
4018 ; X86-NEXT: retl # encoding: [0xc3]
4020 ; X64-LABEL: test_mask_subs_epi8_rmk_512:
4022 ; X64-NEXT: kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce]
4023 ; X64-NEXT: vpsubsb (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe8,0x0f]
4024 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
4025 ; X64-NEXT: retq # encoding: [0xc3]
4026 %b = load <64 x i8>, ptr %ptr_b
4027 %res = call <64 x i8> @llvm.x86.avx512.mask.psubs.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> %passThru, i64 %mask)
4031 define <64 x i8> @test_mask_subs_epi8_rmkz_512(<64 x i8> %a, ptr %ptr_b, i64 %mask) nounwind {
4032 ; X86-LABEL: test_mask_subs_epi8_rmkz_512:
4034 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4035 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08]
4036 ; X86-NEXT: vpsubsb (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe8,0x00]
4037 ; X86-NEXT: retl # encoding: [0xc3]
4039 ; X64-LABEL: test_mask_subs_epi8_rmkz_512:
4041 ; X64-NEXT: kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce]
4042 ; X64-NEXT: vpsubsb (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe8,0x07]
4043 ; X64-NEXT: retq # encoding: [0xc3]
4044 %b = load <64 x i8>, ptr %ptr_b
4045 %res = call <64 x i8> @llvm.x86.avx512.mask.psubs.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> zeroinitializer, i64 %mask)
4049 declare <64 x i8> @llvm.x86.avx512.mask.psubs.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
4051 declare <32 x i16> @llvm.x86.avx512.mask.psrlv32hi(<32 x i16>, <32 x i16>, <32 x i16>, i32)
4053 define <32 x i16> @test_int_x86_avx512_psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2) nounwind {
4054 ; CHECK-LABEL: test_int_x86_avx512_psrlv32hi:
4056 ; CHECK-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x48,0x10,0xc1]
4057 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
4058 %res = call <32 x i16> @llvm.x86.avx512.mask.psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
4062 define <32 x i16> @test_int_x86_avx512_mask_psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) nounwind {
4063 ; X86-LABEL: test_int_x86_avx512_mask_psrlv32hi:
4065 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
4066 ; X86-NEXT: vpsrlvw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x10,0xd1]
4067 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
4068 ; X86-NEXT: retl # encoding: [0xc3]
4070 ; X64-LABEL: test_int_x86_avx512_mask_psrlv32hi:
4072 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
4073 ; X64-NEXT: vpsrlvw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x10,0xd1]
4074 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
4075 ; X64-NEXT: retq # encoding: [0xc3]
4076 %res = call <32 x i16> @llvm.x86.avx512.mask.psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
4080 define <32 x i16> @test_int_x86_avx512_maskz_psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, i32 %x3) nounwind {
4081 ; X86-LABEL: test_int_x86_avx512_maskz_psrlv32hi:
4083 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
4084 ; X86-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x10,0xc1]
4085 ; X86-NEXT: retl # encoding: [0xc3]
4087 ; X64-LABEL: test_int_x86_avx512_maskz_psrlv32hi:
4089 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
4090 ; X64-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x10,0xc1]
4091 ; X64-NEXT: retq # encoding: [0xc3]
4092 %res = call <32 x i16> @llvm.x86.avx512.mask.psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3)
4096 declare <32 x i16> @llvm.x86.avx512.mask.psrav32.hi(<32 x i16>, <32 x i16>, <32 x i16>, i32)
4098 define <32 x i16> @test_int_x86_avx512_psrav32_hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2) nounwind {
4099 ; CHECK-LABEL: test_int_x86_avx512_psrav32_hi:
4101 ; CHECK-NEXT: vpsravw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x48,0x11,0xc1]
4102 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
4103 %res = call <32 x i16> @llvm.x86.avx512.mask.psrav32.hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
4107 define <32 x i16> @test_int_x86_avx512_mask_psrav32_hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) nounwind {
4108 ; X86-LABEL: test_int_x86_avx512_mask_psrav32_hi:
4110 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
4111 ; X86-NEXT: vpsravw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x11,0xd1]
4112 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
4113 ; X86-NEXT: retl # encoding: [0xc3]
4115 ; X64-LABEL: test_int_x86_avx512_mask_psrav32_hi:
4117 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
4118 ; X64-NEXT: vpsravw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x11,0xd1]
4119 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
4120 ; X64-NEXT: retq # encoding: [0xc3]
4121 %res = call <32 x i16> @llvm.x86.avx512.mask.psrav32.hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
4125 define <32 x i16> @test_int_x86_avx512_maskz_psrav32_hi(<32 x i16> %x0, <32 x i16> %x1, i32 %x3) nounwind {
4126 ; X86-LABEL: test_int_x86_avx512_maskz_psrav32_hi:
4128 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
4129 ; X86-NEXT: vpsravw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x11,0xc1]
4130 ; X86-NEXT: retl # encoding: [0xc3]
4132 ; X64-LABEL: test_int_x86_avx512_maskz_psrav32_hi:
4134 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
4135 ; X64-NEXT: vpsravw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x11,0xc1]
4136 ; X64-NEXT: retq # encoding: [0xc3]
4137 %res = call <32 x i16> @llvm.x86.avx512.mask.psrav32.hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3)
4141 declare <32 x i16> @llvm.x86.avx512.mask.psllv32hi(<32 x i16>, <32 x i16>, <32 x i16>, i32)
4143 define <32 x i16> @test_int_x86_avx512_psllv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2) nounwind {
4144 ; CHECK-LABEL: test_int_x86_avx512_psllv32hi:
4146 ; CHECK-NEXT: vpsllvw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x48,0x12,0xc1]
4147 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
4148 %res = call <32 x i16> @llvm.x86.avx512.mask.psllv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
4152 define <32 x i16> @test_int_x86_avx512_mask_psllv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) nounwind {
4153 ; X86-LABEL: test_int_x86_avx512_mask_psllv32hi:
4155 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
4156 ; X86-NEXT: vpsllvw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x12,0xd1]
4157 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
4158 ; X86-NEXT: retl # encoding: [0xc3]
4160 ; X64-LABEL: test_int_x86_avx512_mask_psllv32hi:
4162 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
4163 ; X64-NEXT: vpsllvw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x12,0xd1]
4164 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
4165 ; X64-NEXT: retq # encoding: [0xc3]
4166 %res = call <32 x i16> @llvm.x86.avx512.mask.psllv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
4170 define <32 x i16> @test_int_x86_avx512_maskz_psllv32hi(<32 x i16> %x0, <32 x i16> %x1, i32 %x3) nounwind {
4171 ; X86-LABEL: test_int_x86_avx512_maskz_psllv32hi:
4173 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
4174 ; X86-NEXT: vpsllvw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x12,0xc1]
4175 ; X86-NEXT: retl # encoding: [0xc3]
4177 ; X64-LABEL: test_int_x86_avx512_maskz_psllv32hi:
4179 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
4180 ; X64-NEXT: vpsllvw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x12,0xc1]
4181 ; X64-NEXT: retq # encoding: [0xc3]
4182 %res = call <32 x i16> @llvm.x86.avx512.mask.psllv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3)
4186 declare <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16>, <32 x i8>, i32)
4188 define <32 x i8> @test_int_x86_avx512_pmov_wb_512(<32 x i16> %x0, <32 x i8> %x1) nounwind {
4189 ; CHECK-LABEL: test_int_x86_avx512_pmov_wb_512:
4191 ; CHECK-NEXT: vpmovwb %zmm0, %ymm0 # encoding: [0x62,0xf2,0x7e,0x48,0x30,0xc0]
4192 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
4193 %res = call <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 -1)
4197 define <32 x i8> @test_int_x86_avx512_mask_pmov_wb_512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) nounwind {
4198 ; X86-LABEL: test_int_x86_avx512_mask_pmov_wb_512:
4200 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
4201 ; X86-NEXT: vpmovwb %zmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x30,0xc1]
4202 ; X86-NEXT: vmovdqa %ymm1, %ymm0 # encoding: [0xc5,0xfd,0x6f,0xc1]
4203 ; X86-NEXT: retl # encoding: [0xc3]
4205 ; X64-LABEL: test_int_x86_avx512_mask_pmov_wb_512:
4207 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
4208 ; X64-NEXT: vpmovwb %zmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x30,0xc1]
4209 ; X64-NEXT: vmovdqa %ymm1, %ymm0 # encoding: [0xc5,0xfd,0x6f,0xc1]
4210 ; X64-NEXT: retq # encoding: [0xc3]
4211 %res = call <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2)
4215 define <32 x i8> @test_int_x86_avx512_maskz_pmov_wb_512(<32 x i16> %x0, i32 %x2) nounwind {
4216 ; X86-LABEL: test_int_x86_avx512_maskz_pmov_wb_512:
4218 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
4219 ; X86-NEXT: vpmovwb %zmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xc9,0x30,0xc0]
4220 ; X86-NEXT: retl # encoding: [0xc3]
4222 ; X64-LABEL: test_int_x86_avx512_maskz_pmov_wb_512:
4224 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
4225 ; X64-NEXT: vpmovwb %zmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xc9,0x30,0xc0]
4226 ; X64-NEXT: retq # encoding: [0xc3]
4227 %res = call <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16> %x0, <32 x i8> zeroinitializer, i32 %x2)