1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512bw --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64
5 declare i32 @llvm.x86.avx512.kunpck.wd(i32, i32)
7 define i32@test_int_x86_avx512_kunpck_wd(i32 %x0, i32 %x1) {
8 ; X86-LABEL: test_int_x86_avx512_kunpck_wd:
10 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k0 # encoding: [0xc5,0xf8,0x90,0x44,0x24,0x04]
11 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
12 ; X86-NEXT: kunpckwd %k1, %k0, %k0 # encoding: [0xc5,0xfc,0x4b,0xc1]
13 ; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
14 ; X86-NEXT: retl # encoding: [0xc3]
16 ; X64-LABEL: test_int_x86_avx512_kunpck_wd:
18 ; X64-NEXT: kmovd %edi, %k0 # encoding: [0xc5,0xfb,0x92,0xc7]
19 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
20 ; X64-NEXT: kunpckwd %k1, %k0, %k0 # encoding: [0xc5,0xfc,0x4b,0xc1]
21 ; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
22 ; X64-NEXT: retq # encoding: [0xc3]
23 %res = call i32 @llvm.x86.avx512.kunpck.wd(i32 %x0, i32 %x1)
27 declare i64 @llvm.x86.avx512.kunpck.dq(i64, i64)
29 define i64@test_int_x86_avx512_kunpck_qd(i64 %x0, i64 %x1) {
30 ; X86-LABEL: test_int_x86_avx512_kunpck_qd:
32 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx # encoding: [0x8b,0x54,0x24,0x04]
33 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x0c]
34 ; X86-NEXT: retl # encoding: [0xc3]
36 ; X64-LABEL: test_int_x86_avx512_kunpck_qd:
38 ; X64-NEXT: kmovq %rdi, %k0 # encoding: [0xc4,0xe1,0xfb,0x92,0xc7]
39 ; X64-NEXT: kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce]
40 ; X64-NEXT: kunpckdq %k1, %k0, %k0 # encoding: [0xc4,0xe1,0xfc,0x4b,0xc1]
41 ; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0]
42 ; X64-NEXT: retq # encoding: [0xc3]
43 %res = call i64 @llvm.x86.avx512.kunpck.dq(i64 %x0, i64 %x1)
47 declare <64 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.512(i8, <64 x i8>, i64)
49 define <64 x i8>@test_int_x86_avx512_mask_pbroadcast_b_gpr_512(i8 %x0, <64 x i8> %x1, i64 %mask) {
50 ; X86-LABEL: test_int_x86_avx512_mask_pbroadcast_b_gpr_512:
52 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al # encoding: [0x8a,0x44,0x24,0x04]
53 ; X86-NEXT: vpbroadcastb %eax, %zmm1 # encoding: [0x62,0xf2,0x7d,0x48,0x7a,0xc8]
54 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08]
55 ; X86-NEXT: vpbroadcastb %eax, %zmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x7a,0xc0]
56 ; X86-NEXT: vpbroadcastb %eax, %zmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x7a,0xd0]
57 ; X86-NEXT: vpaddb %zmm2, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfc,0xc2]
58 ; X86-NEXT: vpaddb %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfc,0xc0]
59 ; X86-NEXT: retl # encoding: [0xc3]
61 ; X64-LABEL: test_int_x86_avx512_mask_pbroadcast_b_gpr_512:
63 ; X64-NEXT: vpbroadcastb %edi, %zmm1 # encoding: [0x62,0xf2,0x7d,0x48,0x7a,0xcf]
64 ; X64-NEXT: kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce]
65 ; X64-NEXT: vpbroadcastb %edi, %zmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x7a,0xc7]
66 ; X64-NEXT: vpbroadcastb %edi, %zmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x7a,0xd7]
67 ; X64-NEXT: vpaddb %zmm2, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfc,0xc2]
68 ; X64-NEXT: vpaddb %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfc,0xc0]
69 ; X64-NEXT: retq # encoding: [0xc3]
70 %res = call <64 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.512(i8 %x0, <64 x i8> %x1, i64 -1)
71 %res1 = call <64 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.512(i8 %x0, <64 x i8> %x1, i64 %mask)
72 %res2 = call <64 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.512(i8 %x0, <64 x i8> zeroinitializer, i64 %mask)
73 %res3 = add <64 x i8> %res, %res1
74 %res4 = add <64 x i8> %res2, %res3
78 declare <32 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.512(i16, <32 x i16>, i32)
79 define <32 x i16>@test_int_x86_avx512_mask_pbroadcast_w_gpr_512(i16 %x0, <32 x i16> %x1, i32 %mask) {
80 ; X86-LABEL: test_int_x86_avx512_mask_pbroadcast_w_gpr_512:
82 ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
83 ; X86-NEXT: vpbroadcastw %eax, %zmm1 # encoding: [0x62,0xf2,0x7d,0x48,0x7b,0xc8]
84 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
85 ; X86-NEXT: vpbroadcastw %eax, %zmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x7b,0xc0]
86 ; X86-NEXT: vpbroadcastw %eax, %zmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x7b,0xd0]
87 ; X86-NEXT: vpaddw %zmm2, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc2]
88 ; X86-NEXT: vpaddw %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc0]
89 ; X86-NEXT: retl # encoding: [0xc3]
91 ; X64-LABEL: test_int_x86_avx512_mask_pbroadcast_w_gpr_512:
93 ; X64-NEXT: vpbroadcastw %edi, %zmm1 # encoding: [0x62,0xf2,0x7d,0x48,0x7b,0xcf]
94 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
95 ; X64-NEXT: vpbroadcastw %edi, %zmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x7b,0xc7]
96 ; X64-NEXT: vpbroadcastw %edi, %zmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x7b,0xd7]
97 ; X64-NEXT: vpaddw %zmm2, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc2]
98 ; X64-NEXT: vpaddw %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc0]
99 ; X64-NEXT: retq # encoding: [0xc3]
100 %res = call <32 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.512(i16 %x0, <32 x i16> %x1, i32 -1)
101 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.512(i16 %x0, <32 x i16> %x1, i32 %mask)
102 %res2 = call <32 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.512(i16 %x0, <32 x i16> zeroinitializer, i32 %mask)
103 %res3 = add <32 x i16> %res, %res1
104 %res4 = add <32 x i16> %res2, %res3
108 declare void @llvm.x86.avx512.mask.storeu.b.512(i8*, <64 x i8>, i64)
110 define void@test_int_x86_avx512_mask_storeu_b_512(i8* %ptr1, i8* %ptr2, <64 x i8> %x1, i64 %x2) {
111 ; X86-LABEL: test_int_x86_avx512_mask_storeu_b_512:
113 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
114 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
115 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x0c]
116 ; X86-NEXT: vmovdqu8 %zmm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0x7f,0x49,0x7f,0x01]
117 ; X86-NEXT: vmovdqu64 %zmm0, (%eax) # encoding: [0x62,0xf1,0xfe,0x48,0x7f,0x00]
118 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
119 ; X86-NEXT: retl # encoding: [0xc3]
121 ; X64-LABEL: test_int_x86_avx512_mask_storeu_b_512:
123 ; X64-NEXT: kmovq %rdx, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xca]
124 ; X64-NEXT: vmovdqu8 %zmm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0x7f,0x49,0x7f,0x07]
125 ; X64-NEXT: vmovdqu64 %zmm0, (%rsi) # encoding: [0x62,0xf1,0xfe,0x48,0x7f,0x06]
126 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
127 ; X64-NEXT: retq # encoding: [0xc3]
128 call void @llvm.x86.avx512.mask.storeu.b.512(i8* %ptr1, <64 x i8> %x1, i64 %x2)
129 call void @llvm.x86.avx512.mask.storeu.b.512(i8* %ptr2, <64 x i8> %x1, i64 -1)
133 declare void @llvm.x86.avx512.mask.storeu.w.512(i8*, <32 x i16>, i32)
135 define void@test_int_x86_avx512_mask_storeu_w_512(i8* %ptr1, i8* %ptr2, <32 x i16> %x1, i32 %x2) {
136 ; X86-LABEL: test_int_x86_avx512_mask_storeu_w_512:
138 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
139 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
140 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x0c]
141 ; X86-NEXT: vmovdqu16 %zmm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0xff,0x49,0x7f,0x01]
142 ; X86-NEXT: vmovdqu64 %zmm0, (%eax) # encoding: [0x62,0xf1,0xfe,0x48,0x7f,0x00]
143 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
144 ; X86-NEXT: retl # encoding: [0xc3]
146 ; X64-LABEL: test_int_x86_avx512_mask_storeu_w_512:
148 ; X64-NEXT: kmovd %edx, %k1 # encoding: [0xc5,0xfb,0x92,0xca]
149 ; X64-NEXT: vmovdqu16 %zmm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0xff,0x49,0x7f,0x07]
150 ; X64-NEXT: vmovdqu64 %zmm0, (%rsi) # encoding: [0x62,0xf1,0xfe,0x48,0x7f,0x06]
151 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
152 ; X64-NEXT: retq # encoding: [0xc3]
153 call void @llvm.x86.avx512.mask.storeu.w.512(i8* %ptr1, <32 x i16> %x1, i32 %x2)
154 call void @llvm.x86.avx512.mask.storeu.w.512(i8* %ptr2, <32 x i16> %x1, i32 -1)
158 declare <32 x i16> @llvm.x86.avx512.mask.loadu.w.512(i8*, <32 x i16>, i32)
160 define <32 x i16>@test_int_x86_avx512_mask_loadu_w_512(i8* %ptr, i8* %ptr2, <32 x i16> %x1, i32 %mask) {
161 ; X86-LABEL: test_int_x86_avx512_mask_loadu_w_512:
163 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
164 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
165 ; X86-NEXT: vmovdqu64 (%ecx), %zmm0 # encoding: [0x62,0xf1,0xfe,0x48,0x6f,0x01]
166 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x0c]
167 ; X86-NEXT: vmovdqu16 (%eax), %zmm0 {%k1} # encoding: [0x62,0xf1,0xff,0x49,0x6f,0x00]
168 ; X86-NEXT: vmovdqu16 (%ecx), %zmm1 {%k1} {z} # encoding: [0x62,0xf1,0xff,0xc9,0x6f,0x09]
169 ; X86-NEXT: vpaddw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc1]
170 ; X86-NEXT: retl # encoding: [0xc3]
172 ; X64-LABEL: test_int_x86_avx512_mask_loadu_w_512:
174 ; X64-NEXT: vmovdqu64 (%rdi), %zmm0 # encoding: [0x62,0xf1,0xfe,0x48,0x6f,0x07]
175 ; X64-NEXT: kmovd %edx, %k1 # encoding: [0xc5,0xfb,0x92,0xca]
176 ; X64-NEXT: vmovdqu16 (%rsi), %zmm0 {%k1} # encoding: [0x62,0xf1,0xff,0x49,0x6f,0x06]
177 ; X64-NEXT: vmovdqu16 (%rdi), %zmm1 {%k1} {z} # encoding: [0x62,0xf1,0xff,0xc9,0x6f,0x0f]
178 ; X64-NEXT: vpaddw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc1]
179 ; X64-NEXT: retq # encoding: [0xc3]
180 %res0 = call <32 x i16> @llvm.x86.avx512.mask.loadu.w.512(i8* %ptr, <32 x i16> %x1, i32 -1)
181 %res = call <32 x i16> @llvm.x86.avx512.mask.loadu.w.512(i8* %ptr2, <32 x i16> %res0, i32 %mask)
182 %res1 = call <32 x i16> @llvm.x86.avx512.mask.loadu.w.512(i8* %ptr, <32 x i16> zeroinitializer, i32 %mask)
183 %res2 = add <32 x i16> %res, %res1
187 declare <64 x i8> @llvm.x86.avx512.mask.loadu.b.512(i8*, <64 x i8>, i64)
189 define <64 x i8>@test_int_x86_avx512_mask_loadu_b_512(i8* %ptr, i8* %ptr2, <64 x i8> %x1, i64 %mask) {
190 ; X86-LABEL: test_int_x86_avx512_mask_loadu_b_512:
192 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
193 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
194 ; X86-NEXT: vmovdqu64 (%ecx), %zmm0 # encoding: [0x62,0xf1,0xfe,0x48,0x6f,0x01]
195 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x0c]
196 ; X86-NEXT: vmovdqu8 (%eax), %zmm0 {%k1} # encoding: [0x62,0xf1,0x7f,0x49,0x6f,0x00]
197 ; X86-NEXT: vmovdqu8 (%ecx), %zmm1 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0xc9,0x6f,0x09]
198 ; X86-NEXT: vpaddb %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfc,0xc1]
199 ; X86-NEXT: retl # encoding: [0xc3]
201 ; X64-LABEL: test_int_x86_avx512_mask_loadu_b_512:
203 ; X64-NEXT: vmovdqu64 (%rdi), %zmm0 # encoding: [0x62,0xf1,0xfe,0x48,0x6f,0x07]
204 ; X64-NEXT: kmovq %rdx, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xca]
205 ; X64-NEXT: vmovdqu8 (%rsi), %zmm0 {%k1} # encoding: [0x62,0xf1,0x7f,0x49,0x6f,0x06]
206 ; X64-NEXT: vmovdqu8 (%rdi), %zmm1 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0xc9,0x6f,0x0f]
207 ; X64-NEXT: vpaddb %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfc,0xc1]
208 ; X64-NEXT: retq # encoding: [0xc3]
209 %res0 = call <64 x i8> @llvm.x86.avx512.mask.loadu.b.512(i8* %ptr, <64 x i8> %x1, i64 -1)
210 %res = call <64 x i8> @llvm.x86.avx512.mask.loadu.b.512(i8* %ptr2, <64 x i8> %res0, i64 %mask)
211 %res1 = call <64 x i8> @llvm.x86.avx512.mask.loadu.b.512(i8* %ptr, <64 x i8> zeroinitializer, i64 %mask)
212 %res2 = add <64 x i8> %res, %res1
216 declare <8 x i64> @llvm.x86.avx512.psll.dq.512(<8 x i64>, i32)
218 define <8 x i64>@test_int_x86_avx512_psll_dq_512(<8 x i64> %x0) {
219 ; CHECK-LABEL: test_int_x86_avx512_psll_dq_512:
221 ; CHECK-NEXT: vpslldq $8, %zmm0, %zmm1 # encoding: [0x62,0xf1,0x75,0x48,0x73,0xf8,0x08]
222 ; CHECK-NEXT: # zmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zmm0[0,1,2,3,4,5,6,7],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[16,17,18,19,20,21,22,23],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[32,33,34,35,36,37,38,39],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[48,49,50,51,52,53,54,55]
223 ; CHECK-NEXT: vpslldq $4, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x73,0xf8,0x04]
224 ; CHECK-NEXT: # zmm0 = zero,zero,zero,zero,zmm0[0,1,2,3,4,5,6,7,8,9,10,11],zero,zero,zero,zero,zmm0[16,17,18,19,20,21,22,23,24,25,26,27],zero,zero,zero,zero,zmm0[32,33,34,35,36,37,38,39,40,41,42,43],zero,zero,zero,zero,zmm0[48,49,50,51,52,53,54,55,56,57,58,59]
225 ; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
226 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
227 %res = call <8 x i64> @llvm.x86.avx512.psll.dq.512(<8 x i64> %x0, i32 8)
228 %res1 = call <8 x i64> @llvm.x86.avx512.psll.dq.512(<8 x i64> %x0, i32 4)
229 %res2 = add <8 x i64> %res, %res1
233 define <8 x i64>@test_int_x86_avx512_psll_load_dq_512(<8 x i64>* %p0) {
234 ; X86-LABEL: test_int_x86_avx512_psll_load_dq_512:
236 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
237 ; X86-NEXT: vpslldq $4, (%eax), %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x73,0x38,0x04]
238 ; X86-NEXT: # zmm0 = zero,zero,zero,zero,mem[0,1,2,3,4,5,6,7,8,9,10,11],zero,zero,zero,zero,mem[16,17,18,19,20,21,22,23,24,25,26,27],zero,zero,zero,zero,mem[32,33,34,35,36,37,38,39,40,41,42,43],zero,zero,zero,zero,mem[48,49,50,51,52,53,54,55,56,57,58,59]
239 ; X86-NEXT: retl # encoding: [0xc3]
241 ; X64-LABEL: test_int_x86_avx512_psll_load_dq_512:
243 ; X64-NEXT: vpslldq $4, (%rdi), %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x73,0x3f,0x04]
244 ; X64-NEXT: # zmm0 = zero,zero,zero,zero,mem[0,1,2,3,4,5,6,7,8,9,10,11],zero,zero,zero,zero,mem[16,17,18,19,20,21,22,23,24,25,26,27],zero,zero,zero,zero,mem[32,33,34,35,36,37,38,39,40,41,42,43],zero,zero,zero,zero,mem[48,49,50,51,52,53,54,55,56,57,58,59]
245 ; X64-NEXT: retq # encoding: [0xc3]
246 %x0 = load <8 x i64>, <8 x i64> *%p0
247 %res = call <8 x i64> @llvm.x86.avx512.psll.dq.512(<8 x i64> %x0, i32 4)
251 declare <8 x i64> @llvm.x86.avx512.psrl.dq.512(<8 x i64>, i32)
253 define <8 x i64>@test_int_x86_avx512_psrl_dq_512(<8 x i64> %x0) {
254 ; CHECK-LABEL: test_int_x86_avx512_psrl_dq_512:
256 ; CHECK-NEXT: vpsrldq $8, %zmm0, %zmm1 # encoding: [0x62,0xf1,0x75,0x48,0x73,0xd8,0x08]
257 ; CHECK-NEXT: # zmm1 = zmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[40,41,42,43,44,45,46,47],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[56,57,58,59,60,61,62,63],zero,zero,zero,zero,zero,zero,zero,zero
258 ; CHECK-NEXT: vpsrldq $4, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x73,0xd8,0x04]
259 ; CHECK-NEXT: # zmm0 = zmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zmm0[20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero,zero,zmm0[36,37,38,39,40,41,42,43,44,45,46,47],zero,zero,zero,zero,zmm0[52,53,54,55,56,57,58,59,60,61,62,63],zero,zero,zero,zero
260 ; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
261 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
262 %res = call <8 x i64> @llvm.x86.avx512.psrl.dq.512(<8 x i64> %x0, i32 8)
263 %res1 = call <8 x i64> @llvm.x86.avx512.psrl.dq.512(<8 x i64> %x0, i32 4)
264 %res2 = add <8 x i64> %res, %res1
268 define <8 x i64>@test_int_x86_avx512_psrl_load_dq_512(<8 x i64>* %p0) {
269 ; X86-LABEL: test_int_x86_avx512_psrl_load_dq_512:
271 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
272 ; X86-NEXT: vpsrldq $4, (%eax), %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x73,0x18,0x04]
273 ; X86-NEXT: # zmm0 = mem[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,mem[20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero,zero,mem[36,37,38,39,40,41,42,43,44,45,46,47],zero,zero,zero,zero,mem[52,53,54,55,56,57,58,59,60,61,62,63],zero,zero,zero,zero
274 ; X86-NEXT: retl # encoding: [0xc3]
276 ; X64-LABEL: test_int_x86_avx512_psrl_load_dq_512:
278 ; X64-NEXT: vpsrldq $4, (%rdi), %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x73,0x1f,0x04]
279 ; X64-NEXT: # zmm0 = mem[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,mem[20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero,zero,mem[36,37,38,39,40,41,42,43,44,45,46,47],zero,zero,zero,zero,mem[52,53,54,55,56,57,58,59,60,61,62,63],zero,zero,zero,zero
280 ; X64-NEXT: retq # encoding: [0xc3]
281 %x0 = load <8 x i64>, <8 x i64> *%p0
282 %res = call <8 x i64> @llvm.x86.avx512.psrl.dq.512(<8 x i64> %x0, i32 4)
286 declare <64 x i8> @llvm.x86.avx512.mask.palignr.512(<64 x i8>, <64 x i8>, i32, <64 x i8>, i64)
288 define <64 x i8>@test_int_x86_avx512_mask_palignr_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x3, i64 %x4) {
289 ; X86-LABEL: test_int_x86_avx512_mask_palignr_512:
291 ; X86-NEXT: vpalignr $2, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0x7d,0x48,0x0f,0xd9,0x02]
292 ; X86-NEXT: # zmm3 = zmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zmm0[0,1],zmm1[18,19,20,21,22,23,24,25,26,27,28,29,30,31],zmm0[16,17],zmm1[34,35,36,37,38,39,40,41,42,43,44,45,46,47],zmm0[32,33],zmm1[50,51,52,53,54,55,56,57,58,59,60,61,62,63],zmm0[48,49]
293 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
294 ; X86-NEXT: vpalignr $2, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x0f,0xd1,0x02]
295 ; X86-NEXT: # zmm2 {%k1} = zmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zmm0[0,1],zmm1[18,19,20,21,22,23,24,25,26,27,28,29,30,31],zmm0[16,17],zmm1[34,35,36,37,38,39,40,41,42,43,44,45,46,47],zmm0[32,33],zmm1[50,51,52,53,54,55,56,57,58,59,60,61,62,63],zmm0[48,49]
296 ; X86-NEXT: vpalignr $2, %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xc9,0x0f,0xc1,0x02]
297 ; X86-NEXT: # zmm0 {%k1} {z} = zmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zmm0[0,1],zmm1[18,19,20,21,22,23,24,25,26,27,28,29,30,31],zmm0[16,17],zmm1[34,35,36,37,38,39,40,41,42,43,44,45,46,47],zmm0[32,33],zmm1[50,51,52,53,54,55,56,57,58,59,60,61,62,63],zmm0[48,49]
298 ; X86-NEXT: vpaddb %zmm3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfc,0xc3]
299 ; X86-NEXT: vpaddb %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfc,0xc0]
300 ; X86-NEXT: retl # encoding: [0xc3]
302 ; X64-LABEL: test_int_x86_avx512_mask_palignr_512:
304 ; X64-NEXT: vpalignr $2, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0x7d,0x48,0x0f,0xd9,0x02]
305 ; X64-NEXT: # zmm3 = zmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zmm0[0,1],zmm1[18,19,20,21,22,23,24,25,26,27,28,29,30,31],zmm0[16,17],zmm1[34,35,36,37,38,39,40,41,42,43,44,45,46,47],zmm0[32,33],zmm1[50,51,52,53,54,55,56,57,58,59,60,61,62,63],zmm0[48,49]
306 ; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
307 ; X64-NEXT: vpalignr $2, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x0f,0xd1,0x02]
308 ; X64-NEXT: # zmm2 {%k1} = zmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zmm0[0,1],zmm1[18,19,20,21,22,23,24,25,26,27,28,29,30,31],zmm0[16,17],zmm1[34,35,36,37,38,39,40,41,42,43,44,45,46,47],zmm0[32,33],zmm1[50,51,52,53,54,55,56,57,58,59,60,61,62,63],zmm0[48,49]
309 ; X64-NEXT: vpalignr $2, %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xc9,0x0f,0xc1,0x02]
310 ; X64-NEXT: # zmm0 {%k1} {z} = zmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zmm0[0,1],zmm1[18,19,20,21,22,23,24,25,26,27,28,29,30,31],zmm0[16,17],zmm1[34,35,36,37,38,39,40,41,42,43,44,45,46,47],zmm0[32,33],zmm1[50,51,52,53,54,55,56,57,58,59,60,61,62,63],zmm0[48,49]
311 ; X64-NEXT: vpaddb %zmm3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfc,0xc3]
312 ; X64-NEXT: vpaddb %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfc,0xc0]
313 ; X64-NEXT: retq # encoding: [0xc3]
314 %res = call <64 x i8> @llvm.x86.avx512.mask.palignr.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <64 x i8> %x3, i64 %x4)
315 %res1 = call <64 x i8> @llvm.x86.avx512.mask.palignr.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <64 x i8> zeroinitializer, i64 %x4)
316 %res2 = call <64 x i8> @llvm.x86.avx512.mask.palignr.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <64 x i8> %x3, i64 -1)
317 %res3 = add <64 x i8> %res, %res1
318 %res4 = add <64 x i8> %res3, %res2
322 declare <32 x i16> @llvm.x86.avx512.mask.pshufh.w.512(<32 x i16>, i32, <32 x i16>, i32)
324 define <32 x i16>@test_int_x86_avx512_mask_pshufh_w_512(<32 x i16> %x0, i32 %x1, <32 x i16> %x2, i32 %x3) {
325 ; X86-LABEL: test_int_x86_avx512_mask_pshufh_w_512:
327 ; X86-NEXT: vpshufhw $3, %zmm0, %zmm2 # encoding: [0x62,0xf1,0x7e,0x48,0x70,0xd0,0x03]
328 ; X86-NEXT: # zmm2 = zmm0[0,1,2,3,7,4,4,4,8,9,10,11,15,12,12,12,16,17,18,19,23,20,20,20,24,25,26,27,31,28,28,28]
329 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
330 ; X86-NEXT: vpshufhw $3, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x49,0x70,0xc8,0x03]
331 ; X86-NEXT: # zmm1 {%k1} = zmm0[0,1,2,3,7,4,4,4,8,9,10,11,15,12,12,12,16,17,18,19,23,20,20,20,24,25,26,27,31,28,28,28]
332 ; X86-NEXT: vpshufhw $3, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0xc9,0x70,0xc0,0x03]
333 ; X86-NEXT: # zmm0 {%k1} {z} = zmm0[0,1,2,3,7,4,4,4,8,9,10,11,15,12,12,12,16,17,18,19,23,20,20,20,24,25,26,27,31,28,28,28]
334 ; X86-NEXT: vpaddw %zmm2, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc2]
335 ; X86-NEXT: vpaddw %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc0]
336 ; X86-NEXT: retl # encoding: [0xc3]
338 ; X64-LABEL: test_int_x86_avx512_mask_pshufh_w_512:
340 ; X64-NEXT: vpshufhw $3, %zmm0, %zmm2 # encoding: [0x62,0xf1,0x7e,0x48,0x70,0xd0,0x03]
341 ; X64-NEXT: # zmm2 = zmm0[0,1,2,3,7,4,4,4,8,9,10,11,15,12,12,12,16,17,18,19,23,20,20,20,24,25,26,27,31,28,28,28]
342 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
343 ; X64-NEXT: vpshufhw $3, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x49,0x70,0xc8,0x03]
344 ; X64-NEXT: # zmm1 {%k1} = zmm0[0,1,2,3,7,4,4,4,8,9,10,11,15,12,12,12,16,17,18,19,23,20,20,20,24,25,26,27,31,28,28,28]
345 ; X64-NEXT: vpshufhw $3, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0xc9,0x70,0xc0,0x03]
346 ; X64-NEXT: # zmm0 {%k1} {z} = zmm0[0,1,2,3,7,4,4,4,8,9,10,11,15,12,12,12,16,17,18,19,23,20,20,20,24,25,26,27,31,28,28,28]
347 ; X64-NEXT: vpaddw %zmm2, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc2]
348 ; X64-NEXT: vpaddw %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc0]
349 ; X64-NEXT: retq # encoding: [0xc3]
350 %res = call <32 x i16> @llvm.x86.avx512.mask.pshufh.w.512(<32 x i16> %x0, i32 3, <32 x i16> %x2, i32 %x3)
351 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pshufh.w.512(<32 x i16> %x0, i32 3, <32 x i16> zeroinitializer, i32 %x3)
352 %res2 = call <32 x i16> @llvm.x86.avx512.mask.pshufh.w.512(<32 x i16> %x0, i32 3, <32 x i16> %x2, i32 -1)
353 %res3 = add <32 x i16> %res, %res1
354 %res4 = add <32 x i16> %res3, %res2
358 declare <32 x i16> @llvm.x86.avx512.mask.pshufl.w.512(<32 x i16>, i32, <32 x i16>, i32)
360 define <32 x i16>@test_int_x86_avx512_mask_pshufl_w_512(<32 x i16> %x0, i32 %x1, <32 x i16> %x2, i32 %x3) {
361 ; X86-LABEL: test_int_x86_avx512_mask_pshufl_w_512:
363 ; X86-NEXT: vpshuflw $3, %zmm0, %zmm2 # encoding: [0x62,0xf1,0x7f,0x48,0x70,0xd0,0x03]
364 ; X86-NEXT: # zmm2 = zmm0[3,0,0,0,4,5,6,7,11,8,8,8,12,13,14,15,19,16,16,16,20,21,22,23,27,24,24,24,28,29,30,31]
365 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
366 ; X86-NEXT: vpshuflw $3, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7f,0x49,0x70,0xc8,0x03]
367 ; X86-NEXT: # zmm1 {%k1} = zmm0[3,0,0,0,4,5,6,7,11,8,8,8,12,13,14,15,19,16,16,16,20,21,22,23,27,24,24,24,28,29,30,31]
368 ; X86-NEXT: vpshuflw $3, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0xc9,0x70,0xc0,0x03]
369 ; X86-NEXT: # zmm0 {%k1} {z} = zmm0[3,0,0,0,4,5,6,7,11,8,8,8,12,13,14,15,19,16,16,16,20,21,22,23,27,24,24,24,28,29,30,31]
370 ; X86-NEXT: vpaddw %zmm2, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc2]
371 ; X86-NEXT: vpaddw %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc0]
372 ; X86-NEXT: retl # encoding: [0xc3]
374 ; X64-LABEL: test_int_x86_avx512_mask_pshufl_w_512:
376 ; X64-NEXT: vpshuflw $3, %zmm0, %zmm2 # encoding: [0x62,0xf1,0x7f,0x48,0x70,0xd0,0x03]
377 ; X64-NEXT: # zmm2 = zmm0[3,0,0,0,4,5,6,7,11,8,8,8,12,13,14,15,19,16,16,16,20,21,22,23,27,24,24,24,28,29,30,31]
378 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
379 ; X64-NEXT: vpshuflw $3, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7f,0x49,0x70,0xc8,0x03]
380 ; X64-NEXT: # zmm1 {%k1} = zmm0[3,0,0,0,4,5,6,7,11,8,8,8,12,13,14,15,19,16,16,16,20,21,22,23,27,24,24,24,28,29,30,31]
381 ; X64-NEXT: vpshuflw $3, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0xc9,0x70,0xc0,0x03]
382 ; X64-NEXT: # zmm0 {%k1} {z} = zmm0[3,0,0,0,4,5,6,7,11,8,8,8,12,13,14,15,19,16,16,16,20,21,22,23,27,24,24,24,28,29,30,31]
383 ; X64-NEXT: vpaddw %zmm2, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc2]
384 ; X64-NEXT: vpaddw %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc0]
385 ; X64-NEXT: retq # encoding: [0xc3]
386 %res = call <32 x i16> @llvm.x86.avx512.mask.pshufl.w.512(<32 x i16> %x0, i32 3, <32 x i16> %x2, i32 %x3)
387 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pshufl.w.512(<32 x i16> %x0, i32 3, <32 x i16> zeroinitializer, i32 %x3)
388 %res2 = call <32 x i16> @llvm.x86.avx512.mask.pshufl.w.512(<32 x i16> %x0, i32 3, <32 x i16> %x2, i32 -1)
389 %res3 = add <32 x i16> %res, %res1
390 %res4 = add <32 x i16> %res3, %res2
394 define i64 @test_pcmpeq_b(<64 x i8> %a, <64 x i8> %b) {
395 ; X86-LABEL: test_pcmpeq_b:
397 ; X86-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x74,0xc1]
398 ; X86-NEXT: kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20]
399 ; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
400 ; X86-NEXT: kmovd %k1, %edx # encoding: [0xc5,0xfb,0x93,0xd1]
401 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
402 ; X86-NEXT: retl # encoding: [0xc3]
404 ; X64-LABEL: test_pcmpeq_b:
406 ; X64-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x74,0xc1]
407 ; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0]
408 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
409 ; X64-NEXT: retq # encoding: [0xc3]
410 %res = call i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8> %a, <64 x i8> %b, i64 -1)
414 define i64 @test_mask_pcmpeq_b(<64 x i8> %a, <64 x i8> %b, i64 %mask) {
415 ; X86-LABEL: test_mask_pcmpeq_b:
417 ; X86-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x74,0xc1]
418 ; X86-NEXT: kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20]
419 ; X86-NEXT: kmovd %k1, %edx # encoding: [0xc5,0xfb,0x93,0xd1]
420 ; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
421 ; X86-NEXT: andl {{[0-9]+}}(%esp), %eax # encoding: [0x23,0x44,0x24,0x04]
422 ; X86-NEXT: andl {{[0-9]+}}(%esp), %edx # encoding: [0x23,0x54,0x24,0x08]
423 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
424 ; X86-NEXT: retl # encoding: [0xc3]
426 ; X64-LABEL: test_mask_pcmpeq_b:
428 ; X64-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x74,0xc1]
429 ; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0]
430 ; X64-NEXT: andq %rdi, %rax # encoding: [0x48,0x21,0xf8]
431 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
432 ; X64-NEXT: retq # encoding: [0xc3]
433 %res = call i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8> %a, <64 x i8> %b, i64 %mask)
437 declare i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8>, <64 x i8>, i64)
439 define i32 @test_pcmpeq_w(<32 x i16> %a, <32 x i16> %b) {
440 ; CHECK-LABEL: test_pcmpeq_w:
442 ; CHECK-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x75,0xc1]
443 ; CHECK-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
444 ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
445 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
446 %res = call i32 @llvm.x86.avx512.mask.pcmpeq.w.512(<32 x i16> %a, <32 x i16> %b, i32 -1)
450 define i32 @test_mask_pcmpeq_w(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
451 ; X86-LABEL: test_mask_pcmpeq_w:
453 ; X86-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x75,0xc1]
454 ; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
455 ; X86-NEXT: andl {{[0-9]+}}(%esp), %eax # encoding: [0x23,0x44,0x24,0x04]
456 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
457 ; X86-NEXT: retl # encoding: [0xc3]
459 ; X64-LABEL: test_mask_pcmpeq_w:
461 ; X64-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x75,0xc1]
462 ; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
463 ; X64-NEXT: andl %edi, %eax # encoding: [0x21,0xf8]
464 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
465 ; X64-NEXT: retq # encoding: [0xc3]
466 %res = call i32 @llvm.x86.avx512.mask.pcmpeq.w.512(<32 x i16> %a, <32 x i16> %b, i32 %mask)
470 declare i32 @llvm.x86.avx512.mask.pcmpeq.w.512(<32 x i16>, <32 x i16>, i32)
472 define i64 @test_pcmpgt_b(<64 x i8> %a, <64 x i8> %b) {
473 ; X86-LABEL: test_pcmpgt_b:
475 ; X86-NEXT: vpcmpgtb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x64,0xc1]
476 ; X86-NEXT: kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20]
477 ; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
478 ; X86-NEXT: kmovd %k1, %edx # encoding: [0xc5,0xfb,0x93,0xd1]
479 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
480 ; X86-NEXT: retl # encoding: [0xc3]
482 ; X64-LABEL: test_pcmpgt_b:
484 ; X64-NEXT: vpcmpgtb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x64,0xc1]
485 ; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0]
486 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
487 ; X64-NEXT: retq # encoding: [0xc3]
488 %res = call i64 @llvm.x86.avx512.mask.pcmpgt.b.512(<64 x i8> %a, <64 x i8> %b, i64 -1)
492 define i64 @test_mask_pcmpgt_b(<64 x i8> %a, <64 x i8> %b, i64 %mask) {
493 ; X86-LABEL: test_mask_pcmpgt_b:
495 ; X86-NEXT: vpcmpgtb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x64,0xc1]
496 ; X86-NEXT: kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20]
497 ; X86-NEXT: kmovd %k1, %edx # encoding: [0xc5,0xfb,0x93,0xd1]
498 ; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
499 ; X86-NEXT: andl {{[0-9]+}}(%esp), %eax # encoding: [0x23,0x44,0x24,0x04]
500 ; X86-NEXT: andl {{[0-9]+}}(%esp), %edx # encoding: [0x23,0x54,0x24,0x08]
501 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
502 ; X86-NEXT: retl # encoding: [0xc3]
504 ; X64-LABEL: test_mask_pcmpgt_b:
506 ; X64-NEXT: vpcmpgtb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x64,0xc1]
507 ; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0]
508 ; X64-NEXT: andq %rdi, %rax # encoding: [0x48,0x21,0xf8]
509 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
510 ; X64-NEXT: retq # encoding: [0xc3]
511 %res = call i64 @llvm.x86.avx512.mask.pcmpgt.b.512(<64 x i8> %a, <64 x i8> %b, i64 %mask)
515 declare i64 @llvm.x86.avx512.mask.pcmpgt.b.512(<64 x i8>, <64 x i8>, i64)
517 define i32 @test_pcmpgt_w(<32 x i16> %a, <32 x i16> %b) {
518 ; CHECK-LABEL: test_pcmpgt_w:
520 ; CHECK-NEXT: vpcmpgtw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x65,0xc1]
521 ; CHECK-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
522 ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
523 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
524 %res = call i32 @llvm.x86.avx512.mask.pcmpgt.w.512(<32 x i16> %a, <32 x i16> %b, i32 -1)
528 define i32 @test_mask_pcmpgt_w(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
529 ; X86-LABEL: test_mask_pcmpgt_w:
531 ; X86-NEXT: vpcmpgtw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x65,0xc1]
532 ; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
533 ; X86-NEXT: andl {{[0-9]+}}(%esp), %eax # encoding: [0x23,0x44,0x24,0x04]
534 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
535 ; X86-NEXT: retl # encoding: [0xc3]
537 ; X64-LABEL: test_mask_pcmpgt_w:
539 ; X64-NEXT: vpcmpgtw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x65,0xc1]
540 ; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
541 ; X64-NEXT: andl %edi, %eax # encoding: [0x21,0xf8]
542 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
543 ; X64-NEXT: retq # encoding: [0xc3]
544 %res = call i32 @llvm.x86.avx512.mask.pcmpgt.w.512(<32 x i16> %a, <32 x i16> %b, i32 %mask)
548 declare i32 @llvm.x86.avx512.mask.pcmpgt.w.512(<32 x i16>, <32 x i16>, i32)
550 declare <64 x i8> @llvm.x86.avx512.mask.punpckhb.w.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
552 define <64 x i8>@test_int_x86_avx512_mask_punpckhb_w_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
553 ; X86-LABEL: test_int_x86_avx512_mask_punpckhb_w_512:
555 ; X86-NEXT: vpunpckhbw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0x68,0xd9]
556 ; X86-NEXT: # zmm3 = zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31],zmm0[40],zmm1[40],zmm0[41],zmm1[41],zmm0[42],zmm1[42],zmm0[43],zmm1[43],zmm0[44],zmm1[44],zmm0[45],zmm1[45],zmm0[46],zmm1[46],zmm0[47],zmm1[47],zmm0[56],zmm1[56],zmm0[57],zmm1[57],zmm0[58],zmm1[58],zmm0[59],zmm1[59],zmm0[60],zmm1[60],zmm0[61],zmm1[61],zmm0[62],zmm1[62],zmm0[63],zmm1[63]
557 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
558 ; X86-NEXT: vpunpckhbw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x68,0xd1]
559 ; X86-NEXT: # zmm2 {%k1} = zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31],zmm0[40],zmm1[40],zmm0[41],zmm1[41],zmm0[42],zmm1[42],zmm0[43],zmm1[43],zmm0[44],zmm1[44],zmm0[45],zmm1[45],zmm0[46],zmm1[46],zmm0[47],zmm1[47],zmm0[56],zmm1[56],zmm0[57],zmm1[57],zmm0[58],zmm1[58],zmm0[59],zmm1[59],zmm0[60],zmm1[60],zmm0[61],zmm1[61],zmm0[62],zmm1[62],zmm0[63],zmm1[63]
560 ; X86-NEXT: vpaddb %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfc,0xc3]
561 ; X86-NEXT: retl # encoding: [0xc3]
563 ; X64-LABEL: test_int_x86_avx512_mask_punpckhb_w_512:
565 ; X64-NEXT: vpunpckhbw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0x68,0xd9]
566 ; X64-NEXT: # zmm3 = zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31],zmm0[40],zmm1[40],zmm0[41],zmm1[41],zmm0[42],zmm1[42],zmm0[43],zmm1[43],zmm0[44],zmm1[44],zmm0[45],zmm1[45],zmm0[46],zmm1[46],zmm0[47],zmm1[47],zmm0[56],zmm1[56],zmm0[57],zmm1[57],zmm0[58],zmm1[58],zmm0[59],zmm1[59],zmm0[60],zmm1[60],zmm0[61],zmm1[61],zmm0[62],zmm1[62],zmm0[63],zmm1[63]
567 ; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
568 ; X64-NEXT: vpunpckhbw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x68,0xd1]
569 ; X64-NEXT: # zmm2 {%k1} = zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31],zmm0[40],zmm1[40],zmm0[41],zmm1[41],zmm0[42],zmm1[42],zmm0[43],zmm1[43],zmm0[44],zmm1[44],zmm0[45],zmm1[45],zmm0[46],zmm1[46],zmm0[47],zmm1[47],zmm0[56],zmm1[56],zmm0[57],zmm1[57],zmm0[58],zmm1[58],zmm0[59],zmm1[59],zmm0[60],zmm1[60],zmm0[61],zmm1[61],zmm0[62],zmm1[62],zmm0[63],zmm1[63]
570 ; X64-NEXT: vpaddb %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfc,0xc3]
571 ; X64-NEXT: retq # encoding: [0xc3]
572 %res = call <64 x i8> @llvm.x86.avx512.mask.punpckhb.w.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
573 %res1 = call <64 x i8> @llvm.x86.avx512.mask.punpckhb.w.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
574 %res2 = add <64 x i8> %res, %res1
578 declare <64 x i8> @llvm.x86.avx512.mask.punpcklb.w.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
580 define <64 x i8>@test_int_x86_avx512_mask_punpcklb_w_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
581 ; X86-LABEL: test_int_x86_avx512_mask_punpcklb_w_512:
583 ; X86-NEXT: vpunpcklbw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0x60,0xd9]
584 ; X86-NEXT: # zmm3 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[32],zmm1[32],zmm0[33],zmm1[33],zmm0[34],zmm1[34],zmm0[35],zmm1[35],zmm0[36],zmm1[36],zmm0[37],zmm1[37],zmm0[38],zmm1[38],zmm0[39],zmm1[39],zmm0[48],zmm1[48],zmm0[49],zmm1[49],zmm0[50],zmm1[50],zmm0[51],zmm1[51],zmm0[52],zmm1[52],zmm0[53],zmm1[53],zmm0[54],zmm1[54],zmm0[55],zmm1[55]
585 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
586 ; X86-NEXT: vpunpcklbw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x60,0xd1]
587 ; X86-NEXT: # zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[32],zmm1[32],zmm0[33],zmm1[33],zmm0[34],zmm1[34],zmm0[35],zmm1[35],zmm0[36],zmm1[36],zmm0[37],zmm1[37],zmm0[38],zmm1[38],zmm0[39],zmm1[39],zmm0[48],zmm1[48],zmm0[49],zmm1[49],zmm0[50],zmm1[50],zmm0[51],zmm1[51],zmm0[52],zmm1[52],zmm0[53],zmm1[53],zmm0[54],zmm1[54],zmm0[55],zmm1[55]
588 ; X86-NEXT: vpaddb %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfc,0xc3]
589 ; X86-NEXT: retl # encoding: [0xc3]
591 ; X64-LABEL: test_int_x86_avx512_mask_punpcklb_w_512:
593 ; X64-NEXT: vpunpcklbw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0x60,0xd9]
594 ; X64-NEXT: # zmm3 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[32],zmm1[32],zmm0[33],zmm1[33],zmm0[34],zmm1[34],zmm0[35],zmm1[35],zmm0[36],zmm1[36],zmm0[37],zmm1[37],zmm0[38],zmm1[38],zmm0[39],zmm1[39],zmm0[48],zmm1[48],zmm0[49],zmm1[49],zmm0[50],zmm1[50],zmm0[51],zmm1[51],zmm0[52],zmm1[52],zmm0[53],zmm1[53],zmm0[54],zmm1[54],zmm0[55],zmm1[55]
595 ; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
596 ; X64-NEXT: vpunpcklbw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x60,0xd1]
597 ; X64-NEXT: # zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[32],zmm1[32],zmm0[33],zmm1[33],zmm0[34],zmm1[34],zmm0[35],zmm1[35],zmm0[36],zmm1[36],zmm0[37],zmm1[37],zmm0[38],zmm1[38],zmm0[39],zmm1[39],zmm0[48],zmm1[48],zmm0[49],zmm1[49],zmm0[50],zmm1[50],zmm0[51],zmm1[51],zmm0[52],zmm1[52],zmm0[53],zmm1[53],zmm0[54],zmm1[54],zmm0[55],zmm1[55]
598 ; X64-NEXT: vpaddb %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfc,0xc3]
599 ; X64-NEXT: retq # encoding: [0xc3]
600 %res = call <64 x i8> @llvm.x86.avx512.mask.punpcklb.w.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
601 %res1 = call <64 x i8> @llvm.x86.avx512.mask.punpcklb.w.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
602 %res2 = add <64 x i8> %res, %res1
606 declare <32 x i16> @llvm.x86.avx512.mask.punpckhw.d.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
608 define <32 x i16>@test_int_x86_avx512_mask_punpckhw_d_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
609 ; X86-LABEL: test_int_x86_avx512_mask_punpckhw_d_512:
611 ; X86-NEXT: vpunpckhwd %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0x69,0xd9]
612 ; X86-NEXT: # zmm3 = zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31]
613 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
614 ; X86-NEXT: vpunpckhwd %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x69,0xd1]
615 ; X86-NEXT: # zmm2 {%k1} = zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31]
616 ; X86-NEXT: vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3]
617 ; X86-NEXT: retl # encoding: [0xc3]
619 ; X64-LABEL: test_int_x86_avx512_mask_punpckhw_d_512:
621 ; X64-NEXT: vpunpckhwd %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0x69,0xd9]
622 ; X64-NEXT: # zmm3 = zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31]
623 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
624 ; X64-NEXT: vpunpckhwd %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x69,0xd1]
625 ; X64-NEXT: # zmm2 {%k1} = zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31]
626 ; X64-NEXT: vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3]
627 ; X64-NEXT: retq # encoding: [0xc3]
628 %res = call <32 x i16> @llvm.x86.avx512.mask.punpckhw.d.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
629 %res1 = call <32 x i16> @llvm.x86.avx512.mask.punpckhw.d.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
630 %res2 = add <32 x i16> %res, %res1
634 declare <32 x i16> @llvm.x86.avx512.mask.punpcklw.d.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
636 define <32 x i16>@test_int_x86_avx512_mask_punpcklw_d_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
637 ; X86-LABEL: test_int_x86_avx512_mask_punpcklw_d_512:
639 ; X86-NEXT: vpunpcklwd %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0x61,0xd9]
640 ; X86-NEXT: # zmm3 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27]
641 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
642 ; X86-NEXT: vpunpcklwd %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x61,0xd1]
643 ; X86-NEXT: # zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27]
644 ; X86-NEXT: vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3]
645 ; X86-NEXT: retl # encoding: [0xc3]
647 ; X64-LABEL: test_int_x86_avx512_mask_punpcklw_d_512:
649 ; X64-NEXT: vpunpcklwd %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0x61,0xd9]
650 ; X64-NEXT: # zmm3 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27]
651 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
652 ; X64-NEXT: vpunpcklwd %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x61,0xd1]
653 ; X64-NEXT: # zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27]
654 ; X64-NEXT: vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3]
655 ; X64-NEXT: retq # encoding: [0xc3]
656 %res = call <32 x i16> @llvm.x86.avx512.mask.punpcklw.d.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
657 %res1 = call <32 x i16> @llvm.x86.avx512.mask.punpcklw.d.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
658 %res2 = add <32 x i16> %res, %res1
662 declare <64 x i8> @llvm.x86.avx512.mask.pmaxs.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
664 define <64 x i8>@test_int_x86_avx512_mask_pmaxs_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
665 ; X86-LABEL: test_int_x86_avx512_mask_pmaxs_b_512:
667 ; X86-NEXT: vpmaxsb %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf2,0x7d,0x48,0x3c,0xd9]
668 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
669 ; X86-NEXT: vpmaxsb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x3c,0xd1]
670 ; X86-NEXT: vpaddb %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfc,0xc3]
671 ; X86-NEXT: retl # encoding: [0xc3]
673 ; X64-LABEL: test_int_x86_avx512_mask_pmaxs_b_512:
675 ; X64-NEXT: vpmaxsb %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf2,0x7d,0x48,0x3c,0xd9]
676 ; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
677 ; X64-NEXT: vpmaxsb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x3c,0xd1]
678 ; X64-NEXT: vpaddb %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfc,0xc3]
679 ; X64-NEXT: retq # encoding: [0xc3]
680 %res = call <64 x i8> @llvm.x86.avx512.mask.pmaxs.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
681 %res1 = call <64 x i8> @llvm.x86.avx512.mask.pmaxs.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
682 %res2 = add <64 x i8> %res, %res1
686 declare <32 x i16> @llvm.x86.avx512.mask.pmaxs.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
688 define <32 x i16>@test_int_x86_avx512_mask_pmaxs_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
689 ; X86-LABEL: test_int_x86_avx512_mask_pmaxs_w_512:
691 ; X86-NEXT: vpmaxsw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0xee,0xd9]
692 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
693 ; X86-NEXT: vpmaxsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xee,0xd1]
694 ; X86-NEXT: vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3]
695 ; X86-NEXT: retl # encoding: [0xc3]
697 ; X64-LABEL: test_int_x86_avx512_mask_pmaxs_w_512:
699 ; X64-NEXT: vpmaxsw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0xee,0xd9]
700 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
701 ; X64-NEXT: vpmaxsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xee,0xd1]
702 ; X64-NEXT: vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3]
703 ; X64-NEXT: retq # encoding: [0xc3]
704 %res = call <32 x i16> @llvm.x86.avx512.mask.pmaxs.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
705 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmaxs.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
706 %res2 = add <32 x i16> %res, %res1
710 declare <64 x i8> @llvm.x86.avx512.mask.pmaxu.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
712 define <64 x i8>@test_int_x86_avx512_mask_pmaxu_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
713 ; X86-LABEL: test_int_x86_avx512_mask_pmaxu_b_512:
715 ; X86-NEXT: vpmaxub %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0xde,0xd9]
716 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
717 ; X86-NEXT: vpmaxub %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xde,0xd1]
718 ; X86-NEXT: vpaddb %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfc,0xc3]
719 ; X86-NEXT: retl # encoding: [0xc3]
721 ; X64-LABEL: test_int_x86_avx512_mask_pmaxu_b_512:
723 ; X64-NEXT: vpmaxub %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0xde,0xd9]
724 ; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
725 ; X64-NEXT: vpmaxub %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xde,0xd1]
726 ; X64-NEXT: vpaddb %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfc,0xc3]
727 ; X64-NEXT: retq # encoding: [0xc3]
728 %res = call <64 x i8> @llvm.x86.avx512.mask.pmaxu.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
729 %res1 = call <64 x i8> @llvm.x86.avx512.mask.pmaxu.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
730 %res2 = add <64 x i8> %res, %res1
734 declare <32 x i16> @llvm.x86.avx512.mask.pmaxu.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
736 define <32 x i16>@test_int_x86_avx512_mask_pmaxu_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
737 ; X86-LABEL: test_int_x86_avx512_mask_pmaxu_w_512:
739 ; X86-NEXT: vpmaxuw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf2,0x7d,0x48,0x3e,0xd9]
740 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
741 ; X86-NEXT: vpmaxuw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x3e,0xd1]
742 ; X86-NEXT: vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3]
743 ; X86-NEXT: retl # encoding: [0xc3]
745 ; X64-LABEL: test_int_x86_avx512_mask_pmaxu_w_512:
747 ; X64-NEXT: vpmaxuw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf2,0x7d,0x48,0x3e,0xd9]
748 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
749 ; X64-NEXT: vpmaxuw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x3e,0xd1]
750 ; X64-NEXT: vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3]
751 ; X64-NEXT: retq # encoding: [0xc3]
752 %res = call <32 x i16> @llvm.x86.avx512.mask.pmaxu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
753 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmaxu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
754 %res2 = add <32 x i16> %res, %res1
758 declare <64 x i8> @llvm.x86.avx512.mask.pmins.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
760 define <64 x i8>@test_int_x86_avx512_mask_pmins_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
761 ; X86-LABEL: test_int_x86_avx512_mask_pmins_b_512:
763 ; X86-NEXT: vpminsb %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf2,0x7d,0x48,0x38,0xd9]
764 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
765 ; X86-NEXT: vpminsb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x38,0xd1]
766 ; X86-NEXT: vpaddb %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfc,0xc3]
767 ; X86-NEXT: retl # encoding: [0xc3]
769 ; X64-LABEL: test_int_x86_avx512_mask_pmins_b_512:
771 ; X64-NEXT: vpminsb %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf2,0x7d,0x48,0x38,0xd9]
772 ; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
773 ; X64-NEXT: vpminsb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x38,0xd1]
774 ; X64-NEXT: vpaddb %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfc,0xc3]
775 ; X64-NEXT: retq # encoding: [0xc3]
776 %res = call <64 x i8> @llvm.x86.avx512.mask.pmins.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
777 %res1 = call <64 x i8> @llvm.x86.avx512.mask.pmins.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
778 %res2 = add <64 x i8> %res, %res1
782 declare <32 x i16> @llvm.x86.avx512.mask.pmins.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
784 define <32 x i16>@test_int_x86_avx512_mask_pmins_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
785 ; X86-LABEL: test_int_x86_avx512_mask_pmins_w_512:
787 ; X86-NEXT: vpminsw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0xea,0xd9]
788 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
789 ; X86-NEXT: vpminsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xea,0xd1]
790 ; X86-NEXT: vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3]
791 ; X86-NEXT: retl # encoding: [0xc3]
793 ; X64-LABEL: test_int_x86_avx512_mask_pmins_w_512:
795 ; X64-NEXT: vpminsw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0xea,0xd9]
796 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
797 ; X64-NEXT: vpminsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xea,0xd1]
798 ; X64-NEXT: vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3]
799 ; X64-NEXT: retq # encoding: [0xc3]
800 %res = call <32 x i16> @llvm.x86.avx512.mask.pmins.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
801 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmins.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
802 %res2 = add <32 x i16> %res, %res1
806 declare <64 x i8> @llvm.x86.avx512.mask.pminu.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
808 define <64 x i8>@test_int_x86_avx512_mask_pminu_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
809 ; X86-LABEL: test_int_x86_avx512_mask_pminu_b_512:
811 ; X86-NEXT: vpminub %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0xda,0xd9]
812 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
813 ; X86-NEXT: vpminub %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xda,0xd1]
814 ; X86-NEXT: vpaddb %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfc,0xc3]
815 ; X86-NEXT: retl # encoding: [0xc3]
817 ; X64-LABEL: test_int_x86_avx512_mask_pminu_b_512:
819 ; X64-NEXT: vpminub %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0xda,0xd9]
820 ; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
821 ; X64-NEXT: vpminub %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xda,0xd1]
822 ; X64-NEXT: vpaddb %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfc,0xc3]
823 ; X64-NEXT: retq # encoding: [0xc3]
824 %res = call <64 x i8> @llvm.x86.avx512.mask.pminu.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
825 %res1 = call <64 x i8> @llvm.x86.avx512.mask.pminu.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
826 %res2 = add <64 x i8> %res, %res1
830 declare <32 x i16> @llvm.x86.avx512.mask.pminu.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
832 define <32 x i16>@test_int_x86_avx512_mask_pminu_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
833 ; X86-LABEL: test_int_x86_avx512_mask_pminu_w_512:
835 ; X86-NEXT: vpminuw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf2,0x7d,0x48,0x3a,0xd9]
836 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
837 ; X86-NEXT: vpminuw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x3a,0xd1]
838 ; X86-NEXT: vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3]
839 ; X86-NEXT: retl # encoding: [0xc3]
841 ; X64-LABEL: test_int_x86_avx512_mask_pminu_w_512:
843 ; X64-NEXT: vpminuw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf2,0x7d,0x48,0x3a,0xd9]
844 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
845 ; X64-NEXT: vpminuw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x3a,0xd1]
846 ; X64-NEXT: vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3]
847 ; X64-NEXT: retq # encoding: [0xc3]
848 %res = call <32 x i16> @llvm.x86.avx512.mask.pminu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
849 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pminu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
850 %res2 = add <32 x i16> %res, %res1
854 declare <32 x i16> @llvm.x86.avx512.mask.pmovzxb.w.512(<32 x i8>, <32 x i16>, i32)
856 define <32 x i16>@test_int_x86_avx512_mask_pmovzxb_w_512(<32 x i8> %x0, <32 x i16> %x1, i32 %x2) {
857 ; X86-LABEL: test_int_x86_avx512_mask_pmovzxb_w_512:
859 ; X86-NEXT: vpmovzxbw %ymm0, %zmm2 # encoding: [0x62,0xf2,0x7d,0x48,0x30,0xd0]
860 ; X86-NEXT: # zmm2 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
861 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
862 ; X86-NEXT: vpmovzxbw %ymm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x30,0xc8]
863 ; X86-NEXT: # zmm1 {%k1} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
864 ; X86-NEXT: vpmovzxbw %ymm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x30,0xc0]
865 ; X86-NEXT: # zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
866 ; X86-NEXT: vpaddw %zmm2, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc2]
867 ; X86-NEXT: vpaddw %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc0]
868 ; X86-NEXT: retl # encoding: [0xc3]
870 ; X64-LABEL: test_int_x86_avx512_mask_pmovzxb_w_512:
872 ; X64-NEXT: vpmovzxbw %ymm0, %zmm2 # encoding: [0x62,0xf2,0x7d,0x48,0x30,0xd0]
873 ; X64-NEXT: # zmm2 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
874 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
875 ; X64-NEXT: vpmovzxbw %ymm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x30,0xc8]
876 ; X64-NEXT: # zmm1 {%k1} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
877 ; X64-NEXT: vpmovzxbw %ymm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x30,0xc0]
878 ; X64-NEXT: # zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
879 ; X64-NEXT: vpaddw %zmm2, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc2]
880 ; X64-NEXT: vpaddw %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc0]
881 ; X64-NEXT: retq # encoding: [0xc3]
882 %res = call <32 x i16> @llvm.x86.avx512.mask.pmovzxb.w.512(<32 x i8> %x0, <32 x i16> %x1, i32 %x2)
883 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmovzxb.w.512(<32 x i8> %x0, <32 x i16> zeroinitializer, i32 %x2)
884 %res2 = call <32 x i16> @llvm.x86.avx512.mask.pmovzxb.w.512(<32 x i8> %x0, <32 x i16> %x1, i32 -1)
885 %res3 = add <32 x i16> %res, %res1
886 %res4 = add <32 x i16> %res3, %res2
890 declare <32 x i16> @llvm.x86.avx512.mask.pmovsxb.w.512(<32 x i8>, <32 x i16>, i32)
892 define <32 x i16>@test_int_x86_avx512_mask_pmovsxb_w_512(<32 x i8> %x0, <32 x i16> %x1, i32 %x2) {
893 ; X86-LABEL: test_int_x86_avx512_mask_pmovsxb_w_512:
895 ; X86-NEXT: vpmovsxbw %ymm0, %zmm2 # encoding: [0x62,0xf2,0x7d,0x48,0x20,0xd0]
896 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
897 ; X86-NEXT: vpmovsxbw %ymm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x20,0xc8]
898 ; X86-NEXT: vpmovsxbw %ymm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x20,0xc0]
899 ; X86-NEXT: vpaddw %zmm2, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc2]
900 ; X86-NEXT: vpaddw %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc0]
901 ; X86-NEXT: retl # encoding: [0xc3]
903 ; X64-LABEL: test_int_x86_avx512_mask_pmovsxb_w_512:
905 ; X64-NEXT: vpmovsxbw %ymm0, %zmm2 # encoding: [0x62,0xf2,0x7d,0x48,0x20,0xd0]
906 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
907 ; X64-NEXT: vpmovsxbw %ymm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x20,0xc8]
908 ; X64-NEXT: vpmovsxbw %ymm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x20,0xc0]
909 ; X64-NEXT: vpaddw %zmm2, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc2]
910 ; X64-NEXT: vpaddw %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc0]
911 ; X64-NEXT: retq # encoding: [0xc3]
912 %res = call <32 x i16> @llvm.x86.avx512.mask.pmovsxb.w.512(<32 x i8> %x0, <32 x i16> %x1, i32 %x2)
913 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmovsxb.w.512(<32 x i8> %x0, <32 x i16> zeroinitializer, i32 %x2)
914 %res2 = call <32 x i16> @llvm.x86.avx512.mask.pmovsxb.w.512(<32 x i8> %x0, <32 x i16> %x1, i32 -1)
915 %res3 = add <32 x i16> %res, %res1
916 %res4 = add <32 x i16> %res3, %res2
920 declare <32 x i16> @llvm.x86.avx512.mask.psrl.w.512(<32 x i16>, <8 x i16>, <32 x i16>, i32)
922 define <32 x i16>@test_int_x86_avx512_mask_psrl_w_512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 %x3) {
923 ; X86-LABEL: test_int_x86_avx512_mask_psrl_w_512:
925 ; X86-NEXT: vpsrlw %xmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0xd1,0xd9]
926 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
927 ; X86-NEXT: vpsrlw %xmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xd1,0xd1]
928 ; X86-NEXT: vpsrlw %xmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xd1,0xc1]
929 ; X86-NEXT: vpaddw %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfd,0xc0]
930 ; X86-NEXT: vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0]
931 ; X86-NEXT: retl # encoding: [0xc3]
933 ; X64-LABEL: test_int_x86_avx512_mask_psrl_w_512:
935 ; X64-NEXT: vpsrlw %xmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0xd1,0xd9]
936 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
937 ; X64-NEXT: vpsrlw %xmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xd1,0xd1]
938 ; X64-NEXT: vpsrlw %xmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xd1,0xc1]
939 ; X64-NEXT: vpaddw %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfd,0xc0]
940 ; X64-NEXT: vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0]
941 ; X64-NEXT: retq # encoding: [0xc3]
942 %res = call <32 x i16> @llvm.x86.avx512.mask.psrl.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 %x3)
943 %res1 = call <32 x i16> @llvm.x86.avx512.mask.psrl.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 -1)
944 %res2 = call <32 x i16> @llvm.x86.avx512.mask.psrl.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3)
945 %res3 = add <32 x i16> %res, %res1
946 %res4 = add <32 x i16> %res3, %res2
950 declare <32 x i16> @llvm.x86.avx512.mask.psrl.wi.512(<32 x i16>, i32, <32 x i16>, i32)
952 define <32 x i16>@test_int_x86_avx512_mask_psrl_wi_512(<32 x i16> %x0, i32 %x1, <32 x i16> %x2, i32 %x3) {
953 ; X86-LABEL: test_int_x86_avx512_mask_psrl_wi_512:
955 ; X86-NEXT: vpsrlw $3, %zmm0, %zmm2 # encoding: [0x62,0xf1,0x6d,0x48,0x71,0xd0,0x03]
956 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
957 ; X86-NEXT: vpsrlw $3, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x71,0xd0,0x03]
958 ; X86-NEXT: vpsrlw $3, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x71,0xd0,0x03]
959 ; X86-NEXT: vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0]
960 ; X86-NEXT: vpaddw %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc0]
961 ; X86-NEXT: retl # encoding: [0xc3]
963 ; X64-LABEL: test_int_x86_avx512_mask_psrl_wi_512:
965 ; X64-NEXT: vpsrlw $3, %zmm0, %zmm2 # encoding: [0x62,0xf1,0x6d,0x48,0x71,0xd0,0x03]
966 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
967 ; X64-NEXT: vpsrlw $3, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x71,0xd0,0x03]
968 ; X64-NEXT: vpsrlw $3, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x71,0xd0,0x03]
969 ; X64-NEXT: vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0]
970 ; X64-NEXT: vpaddw %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc0]
971 ; X64-NEXT: retq # encoding: [0xc3]
972 %res = call <32 x i16> @llvm.x86.avx512.mask.psrl.wi.512(<32 x i16> %x0, i32 3, <32 x i16> %x2, i32 %x3)
973 %res1 = call <32 x i16> @llvm.x86.avx512.mask.psrl.wi.512(<32 x i16> %x0, i32 3, <32 x i16> %x2, i32 -1)
974 %res2 = call <32 x i16> @llvm.x86.avx512.mask.psrl.wi.512(<32 x i16> %x0, i32 3, <32 x i16> zeroinitializer, i32 %x3)
975 %res3 = add <32 x i16> %res, %res1
976 %res4 = add <32 x i16> %res3, %res2
980 declare <32 x i16> @llvm.x86.avx512.mask.psra.w.512(<32 x i16>, <8 x i16>, <32 x i16>, i32)
982 define <32 x i16>@test_int_x86_avx512_mask_psra_w_512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 %x3) {
983 ; X86-LABEL: test_int_x86_avx512_mask_psra_w_512:
985 ; X86-NEXT: vpsraw %xmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0xe1,0xd9]
986 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
987 ; X86-NEXT: vpsraw %xmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe1,0xd1]
988 ; X86-NEXT: vpsraw %xmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe1,0xc1]
989 ; X86-NEXT: vpaddw %zmm3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc3]
990 ; X86-NEXT: vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0]
991 ; X86-NEXT: retl # encoding: [0xc3]
993 ; X64-LABEL: test_int_x86_avx512_mask_psra_w_512:
995 ; X64-NEXT: vpsraw %xmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0xe1,0xd9]
996 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
997 ; X64-NEXT: vpsraw %xmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe1,0xd1]
998 ; X64-NEXT: vpsraw %xmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe1,0xc1]
999 ; X64-NEXT: vpaddw %zmm3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc3]
1000 ; X64-NEXT: vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0]
1001 ; X64-NEXT: retq # encoding: [0xc3]
1002 %res = call <32 x i16> @llvm.x86.avx512.mask.psra.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 %x3)
1003 %res1 = call <32 x i16> @llvm.x86.avx512.mask.psra.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3)
1004 %res2 = call <32 x i16> @llvm.x86.avx512.mask.psra.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 -1)
1005 %res3 = add <32 x i16> %res, %res1
1006 %res4 = add <32 x i16> %res3, %res2
1007 ret <32 x i16> %res4
1010 declare <32 x i16> @llvm.x86.avx512.mask.psra.wi.512(<32 x i16>, i32, <32 x i16>, i32)
1012 define <32 x i16>@test_int_x86_avx512_mask_psra_wi_512(<32 x i16> %x0, i32 %x1, <32 x i16> %x2, i32 %x3) {
1013 ; X86-LABEL: test_int_x86_avx512_mask_psra_wi_512:
1015 ; X86-NEXT: vpsraw $3, %zmm0, %zmm2 # encoding: [0x62,0xf1,0x6d,0x48,0x71,0xe0,0x03]
1016 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
1017 ; X86-NEXT: vpsraw $3, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x71,0xe0,0x03]
1018 ; X86-NEXT: vpsraw $3, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x71,0xe0,0x03]
1019 ; X86-NEXT: vpaddw %zmm2, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc2]
1020 ; X86-NEXT: vpaddw %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc0]
1021 ; X86-NEXT: retl # encoding: [0xc3]
1023 ; X64-LABEL: test_int_x86_avx512_mask_psra_wi_512:
1025 ; X64-NEXT: vpsraw $3, %zmm0, %zmm2 # encoding: [0x62,0xf1,0x6d,0x48,0x71,0xe0,0x03]
1026 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1027 ; X64-NEXT: vpsraw $3, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x71,0xe0,0x03]
1028 ; X64-NEXT: vpsraw $3, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x71,0xe0,0x03]
1029 ; X64-NEXT: vpaddw %zmm2, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc2]
1030 ; X64-NEXT: vpaddw %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc0]
1031 ; X64-NEXT: retq # encoding: [0xc3]
1032 %res = call <32 x i16> @llvm.x86.avx512.mask.psra.wi.512(<32 x i16> %x0, i32 3, <32 x i16> %x2, i32 %x3)
1033 %res1 = call <32 x i16> @llvm.x86.avx512.mask.psra.wi.512(<32 x i16> %x0, i32 3, <32 x i16> zeroinitializer, i32 %x3)
1034 %res2 = call <32 x i16> @llvm.x86.avx512.mask.psra.wi.512(<32 x i16> %x0, i32 3, <32 x i16> %x2, i32 -1)
1035 %res3 = add <32 x i16> %res, %res1
1036 %res4 = add <32 x i16> %res3, %res2
1037 ret <32 x i16> %res4
1040 declare <32 x i16> @llvm.x86.avx512.mask.psll.w.512(<32 x i16>, <8 x i16>, <32 x i16>, i32)
1042 define <32 x i16>@test_int_x86_avx512_mask_psll_w_512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 %x3) {
1043 ; X86-LABEL: test_int_x86_avx512_mask_psll_w_512:
1045 ; X86-NEXT: vpsllw %xmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0xf1,0xd9]
1046 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1047 ; X86-NEXT: vpsllw %xmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xf1,0xd1]
1048 ; X86-NEXT: vpsllw %xmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xf1,0xc1]
1049 ; X86-NEXT: vpaddw %zmm3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc3]
1050 ; X86-NEXT: vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0]
1051 ; X86-NEXT: retl # encoding: [0xc3]
1053 ; X64-LABEL: test_int_x86_avx512_mask_psll_w_512:
1055 ; X64-NEXT: vpsllw %xmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0xf1,0xd9]
1056 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1057 ; X64-NEXT: vpsllw %xmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xf1,0xd1]
1058 ; X64-NEXT: vpsllw %xmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xf1,0xc1]
1059 ; X64-NEXT: vpaddw %zmm3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc3]
1060 ; X64-NEXT: vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0]
1061 ; X64-NEXT: retq # encoding: [0xc3]
1062 %res = call <32 x i16> @llvm.x86.avx512.mask.psll.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 %x3)
1063 %res1 = call <32 x i16> @llvm.x86.avx512.mask.psll.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3)
1064 %res2 = call <32 x i16> @llvm.x86.avx512.mask.psll.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 -1)
1065 %res3 = add <32 x i16> %res, %res1
1066 %res4 = add <32 x i16> %res3, %res2
1067 ret <32 x i16> %res4
1070 declare <32 x i16> @llvm.x86.avx512.mask.psll.wi.512(<32 x i16>, i32, <32 x i16>, i32)
1072 define <32 x i16>@test_int_x86_avx512_mask_psll_wi_512(<32 x i16> %x0, i32 %x1, <32 x i16> %x2, i32 %x3) {
1073 ; X86-LABEL: test_int_x86_avx512_mask_psll_wi_512:
1075 ; X86-NEXT: vpsllw $3, %zmm0, %zmm2 # encoding: [0x62,0xf1,0x6d,0x48,0x71,0xf0,0x03]
1076 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
1077 ; X86-NEXT: vpsllw $3, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x71,0xf0,0x03]
1078 ; X86-NEXT: vpsllw $3, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x71,0xf0,0x03]
1079 ; X86-NEXT: vpaddw %zmm2, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc2]
1080 ; X86-NEXT: vpaddw %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc0]
1081 ; X86-NEXT: retl # encoding: [0xc3]
1083 ; X64-LABEL: test_int_x86_avx512_mask_psll_wi_512:
1085 ; X64-NEXT: vpsllw $3, %zmm0, %zmm2 # encoding: [0x62,0xf1,0x6d,0x48,0x71,0xf0,0x03]
1086 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1087 ; X64-NEXT: vpsllw $3, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x71,0xf0,0x03]
1088 ; X64-NEXT: vpsllw $3, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x71,0xf0,0x03]
1089 ; X64-NEXT: vpaddw %zmm2, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc2]
1090 ; X64-NEXT: vpaddw %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc0]
1091 ; X64-NEXT: retq # encoding: [0xc3]
1092 %res = call <32 x i16> @llvm.x86.avx512.mask.psll.wi.512(<32 x i16> %x0, i32 3, <32 x i16> %x2, i32 %x3)
1093 %res1 = call <32 x i16> @llvm.x86.avx512.mask.psll.wi.512(<32 x i16> %x0, i32 3, <32 x i16> zeroinitializer, i32 %x3)
1094 %res2 = call <32 x i16> @llvm.x86.avx512.mask.psll.wi.512(<32 x i16> %x0, i32 3, <32 x i16> %x2, i32 -1)
1095 %res3 = add <32 x i16> %res, %res1
1096 %res4 = add <32 x i16> %res3, %res2
1097 ret <32 x i16> %res4
1100 declare <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
1102 define <64 x i8>@test_int_x86_avx512_mask_pshuf_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
1103 ; X86-LABEL: test_int_x86_avx512_mask_pshuf_b_512:
1105 ; X86-NEXT: vpshufb %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf2,0x7d,0x48,0x00,0xd9]
1106 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
1107 ; X86-NEXT: vpshufb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x00,0xd1]
1108 ; X86-NEXT: vpaddb %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfc,0xc3]
1109 ; X86-NEXT: retl # encoding: [0xc3]
1111 ; X64-LABEL: test_int_x86_avx512_mask_pshuf_b_512:
1113 ; X64-NEXT: vpshufb %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf2,0x7d,0x48,0x00,0xd9]
1114 ; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
1115 ; X64-NEXT: vpshufb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x00,0xd1]
1116 ; X64-NEXT: vpaddb %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfc,0xc3]
1117 ; X64-NEXT: retq # encoding: [0xc3]
1118 %res = call <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
1119 %res1 = call <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
1120 %res2 = add <64 x i8> %res, %res1
1125 declare <64 x i8> @llvm.x86.avx512.cvtmask2b.512(i64)
1127 define <64 x i8>@test_int_x86_avx512_cvtmask2b_512(i64 %x0) {
1128 ; X86-LABEL: test_int_x86_avx512_cvtmask2b_512:
1130 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k0 # encoding: [0xc4,0xe1,0xf8,0x90,0x44,0x24,0x04]
1131 ; X86-NEXT: vpmovm2b %k0, %zmm0 # encoding: [0x62,0xf2,0x7e,0x48,0x28,0xc0]
1132 ; X86-NEXT: retl # encoding: [0xc3]
1134 ; X64-LABEL: test_int_x86_avx512_cvtmask2b_512:
1136 ; X64-NEXT: kmovq %rdi, %k0 # encoding: [0xc4,0xe1,0xfb,0x92,0xc7]
1137 ; X64-NEXT: vpmovm2b %k0, %zmm0 # encoding: [0x62,0xf2,0x7e,0x48,0x28,0xc0]
1138 ; X64-NEXT: retq # encoding: [0xc3]
1139 %res = call <64 x i8> @llvm.x86.avx512.cvtmask2b.512(i64 %x0)
1143 declare <32 x i16> @llvm.x86.avx512.cvtmask2w.512(i32)
1145 define <32 x i16>@test_int_x86_avx512_cvtmask2w_512(i32 %x0) {
1146 ; X86-LABEL: test_int_x86_avx512_cvtmask2w_512:
1148 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k0 # encoding: [0xc4,0xe1,0xf9,0x90,0x44,0x24,0x04]
1149 ; X86-NEXT: vpmovm2w %k0, %zmm0 # encoding: [0x62,0xf2,0xfe,0x48,0x28,0xc0]
1150 ; X86-NEXT: retl # encoding: [0xc3]
1152 ; X64-LABEL: test_int_x86_avx512_cvtmask2w_512:
1154 ; X64-NEXT: kmovd %edi, %k0 # encoding: [0xc5,0xfb,0x92,0xc7]
1155 ; X64-NEXT: vpmovm2w %k0, %zmm0 # encoding: [0x62,0xf2,0xfe,0x48,0x28,0xc0]
1156 ; X64-NEXT: retq # encoding: [0xc3]
1157 %res = call <32 x i16> @llvm.x86.avx512.cvtmask2w.512(i32 %x0)
1160 define <32 x i16> @test_mask_packs_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) {
1161 ; CHECK-LABEL: test_mask_packs_epi32_rr_512:
1163 ; CHECK-NEXT: vpackssdw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x6b,0xc1]
1164 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1165 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1)
1169 define <32 x i16> @test_mask_packs_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) {
1170 ; X86-LABEL: test_mask_packs_epi32_rrk_512:
1172 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1173 ; X86-NEXT: vpackssdw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x6b,0xd1]
1174 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1175 ; X86-NEXT: retl # encoding: [0xc3]
1177 ; X64-LABEL: test_mask_packs_epi32_rrk_512:
1179 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1180 ; X64-NEXT: vpackssdw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x6b,0xd1]
1181 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1182 ; X64-NEXT: retq # encoding: [0xc3]
1183 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask)
1187 define <32 x i16> @test_mask_packs_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, i32 %mask) {
1188 ; X86-LABEL: test_mask_packs_epi32_rrkz_512:
1190 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1191 ; X86-NEXT: vpackssdw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x6b,0xc1]
1192 ; X86-NEXT: retl # encoding: [0xc3]
1194 ; X64-LABEL: test_mask_packs_epi32_rrkz_512:
1196 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1197 ; X64-NEXT: vpackssdw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x6b,0xc1]
1198 ; X64-NEXT: retq # encoding: [0xc3]
1199 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask)
1203 define <32 x i16> @test_mask_packs_epi32_rm_512(<16 x i32> %a, <16 x i32>* %ptr_b) {
1204 ; X86-LABEL: test_mask_packs_epi32_rm_512:
1206 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1207 ; X86-NEXT: vpackssdw (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x6b,0x00]
1208 ; X86-NEXT: retl # encoding: [0xc3]
1210 ; X64-LABEL: test_mask_packs_epi32_rm_512:
1212 ; X64-NEXT: vpackssdw (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x6b,0x07]
1213 ; X64-NEXT: retq # encoding: [0xc3]
1214 %b = load <16 x i32>, <16 x i32>* %ptr_b
1215 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1)
1219 define <32 x i16> @test_mask_packs_epi32_rmk_512(<16 x i32> %a, <16 x i32>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
1220 ; X86-LABEL: test_mask_packs_epi32_rmk_512:
1222 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1223 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
1224 ; X86-NEXT: vpackssdw (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x6b,0x08]
1225 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1226 ; X86-NEXT: retl # encoding: [0xc3]
1228 ; X64-LABEL: test_mask_packs_epi32_rmk_512:
1230 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1231 ; X64-NEXT: vpackssdw (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x6b,0x0f]
1232 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1233 ; X64-NEXT: retq # encoding: [0xc3]
1234 %b = load <16 x i32>, <16 x i32>* %ptr_b
1235 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask)
1239 define <32 x i16> @test_mask_packs_epi32_rmkz_512(<16 x i32> %a, <16 x i32>* %ptr_b, i32 %mask) {
1240 ; X86-LABEL: test_mask_packs_epi32_rmkz_512:
1242 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1243 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
1244 ; X86-NEXT: vpackssdw (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x6b,0x00]
1245 ; X86-NEXT: retl # encoding: [0xc3]
1247 ; X64-LABEL: test_mask_packs_epi32_rmkz_512:
1249 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1250 ; X64-NEXT: vpackssdw (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x6b,0x07]
1251 ; X64-NEXT: retq # encoding: [0xc3]
1252 %b = load <16 x i32>, <16 x i32>* %ptr_b
1253 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask)
1257 define <32 x i16> @test_mask_packs_epi32_rmb_512(<16 x i32> %a, i32* %ptr_b) {
1258 ; X86-LABEL: test_mask_packs_epi32_rmb_512:
1260 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1261 ; X86-NEXT: vpackssdw (%eax){1to16}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x58,0x6b,0x00]
1262 ; X86-NEXT: retl # encoding: [0xc3]
1264 ; X64-LABEL: test_mask_packs_epi32_rmb_512:
1266 ; X64-NEXT: vpackssdw (%rdi){1to16}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x58,0x6b,0x07]
1267 ; X64-NEXT: retq # encoding: [0xc3]
1268 %q = load i32, i32* %ptr_b
1269 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
1270 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
1271 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1)
1275 define <32 x i16> @test_mask_packs_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, <32 x i16> %passThru, i32 %mask) {
1276 ; X86-LABEL: test_mask_packs_epi32_rmbk_512:
1278 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1279 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
1280 ; X86-NEXT: vpackssdw (%eax){1to16}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x59,0x6b,0x08]
1281 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1282 ; X86-NEXT: retl # encoding: [0xc3]
1284 ; X64-LABEL: test_mask_packs_epi32_rmbk_512:
1286 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1287 ; X64-NEXT: vpackssdw (%rdi){1to16}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x59,0x6b,0x0f]
1288 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1289 ; X64-NEXT: retq # encoding: [0xc3]
1290 %q = load i32, i32* %ptr_b
1291 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
1292 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
1293 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask)
1297 define <32 x i16> @test_mask_packs_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b, i32 %mask) {
1298 ; X86-LABEL: test_mask_packs_epi32_rmbkz_512:
1300 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1301 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
1302 ; X86-NEXT: vpackssdw (%eax){1to16}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xd9,0x6b,0x00]
1303 ; X86-NEXT: retl # encoding: [0xc3]
1305 ; X64-LABEL: test_mask_packs_epi32_rmbkz_512:
1307 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1308 ; X64-NEXT: vpackssdw (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xd9,0x6b,0x07]
1309 ; X64-NEXT: retq # encoding: [0xc3]
1310 %q = load i32, i32* %ptr_b
1311 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
1312 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
1313 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask)
1317 declare <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32>, <16 x i32>, <32 x i16>, i32)
1319 define <64 x i8> @test_mask_packs_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
1320 ; CHECK-LABEL: test_mask_packs_epi16_rr_512:
1322 ; CHECK-NEXT: vpacksswb %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x63,0xc1]
1323 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1324 %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1)
1328 define <64 x i8> @test_mask_packs_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) {
1329 ; X86-LABEL: test_mask_packs_epi16_rrk_512:
1331 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
1332 ; X86-NEXT: vpacksswb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x63,0xd1]
1333 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1334 ; X86-NEXT: retl # encoding: [0xc3]
1336 ; X64-LABEL: test_mask_packs_epi16_rrk_512:
1338 ; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
1339 ; X64-NEXT: vpacksswb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x63,0xd1]
1340 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1341 ; X64-NEXT: retq # encoding: [0xc3]
1342 %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask)
1346 define <64 x i8> @test_mask_packs_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i64 %mask) {
1347 ; X86-LABEL: test_mask_packs_epi16_rrkz_512:
1349 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
1350 ; X86-NEXT: vpacksswb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x63,0xc1]
1351 ; X86-NEXT: retl # encoding: [0xc3]
1353 ; X64-LABEL: test_mask_packs_epi16_rrkz_512:
1355 ; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
1356 ; X64-NEXT: vpacksswb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x63,0xc1]
1357 ; X64-NEXT: retq # encoding: [0xc3]
1358 %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask)
1362 define <64 x i8> @test_mask_packs_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
1363 ; X86-LABEL: test_mask_packs_epi16_rm_512:
1365 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1366 ; X86-NEXT: vpacksswb (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x63,0x00]
1367 ; X86-NEXT: retl # encoding: [0xc3]
1369 ; X64-LABEL: test_mask_packs_epi16_rm_512:
1371 ; X64-NEXT: vpacksswb (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x63,0x07]
1372 ; X64-NEXT: retq # encoding: [0xc3]
1373 %b = load <32 x i16>, <32 x i16>* %ptr_b
1374 %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1)
1378 define <64 x i8> @test_mask_packs_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <64 x i8> %passThru, i64 %mask) {
1379 ; X86-LABEL: test_mask_packs_epi16_rmk_512:
1381 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1382 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08]
1383 ; X86-NEXT: vpacksswb (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x63,0x08]
1384 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1385 ; X86-NEXT: retl # encoding: [0xc3]
1387 ; X64-LABEL: test_mask_packs_epi16_rmk_512:
1389 ; X64-NEXT: kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce]
1390 ; X64-NEXT: vpacksswb (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x63,0x0f]
1391 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1392 ; X64-NEXT: retq # encoding: [0xc3]
1393 %b = load <32 x i16>, <32 x i16>* %ptr_b
1394 %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask)
1398 define <64 x i8> @test_mask_packs_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i64 %mask) {
1399 ; X86-LABEL: test_mask_packs_epi16_rmkz_512:
1401 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1402 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08]
1403 ; X86-NEXT: vpacksswb (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x63,0x00]
1404 ; X86-NEXT: retl # encoding: [0xc3]
1406 ; X64-LABEL: test_mask_packs_epi16_rmkz_512:
1408 ; X64-NEXT: kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce]
1409 ; X64-NEXT: vpacksswb (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x63,0x07]
1410 ; X64-NEXT: retq # encoding: [0xc3]
1411 %b = load <32 x i16>, <32 x i16>* %ptr_b
1412 %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask)
1416 declare <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16>, <32 x i16>, <64 x i8>, i64)
1419 define <32 x i16> @test_mask_packus_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) {
1420 ; CHECK-LABEL: test_mask_packus_epi32_rr_512:
1422 ; CHECK-NEXT: vpackusdw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x2b,0xc1]
1423 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1424 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1)
1428 define <32 x i16> @test_mask_packus_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) {
1429 ; X86-LABEL: test_mask_packus_epi32_rrk_512:
1431 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1432 ; X86-NEXT: vpackusdw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x2b,0xd1]
1433 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1434 ; X86-NEXT: retl # encoding: [0xc3]
1436 ; X64-LABEL: test_mask_packus_epi32_rrk_512:
1438 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1439 ; X64-NEXT: vpackusdw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x2b,0xd1]
1440 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1441 ; X64-NEXT: retq # encoding: [0xc3]
1442 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask)
1446 define <32 x i16> @test_mask_packus_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, i32 %mask) {
1447 ; X86-LABEL: test_mask_packus_epi32_rrkz_512:
1449 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
1450 ; X86-NEXT: vpackusdw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x2b,0xc1]
1451 ; X86-NEXT: retl # encoding: [0xc3]
1453 ; X64-LABEL: test_mask_packus_epi32_rrkz_512:
1455 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
1456 ; X64-NEXT: vpackusdw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x2b,0xc1]
1457 ; X64-NEXT: retq # encoding: [0xc3]
1458 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask)
1462 define <32 x i16> @test_mask_packus_epi32_rm_512(<16 x i32> %a, <16 x i32>* %ptr_b) {
1463 ; X86-LABEL: test_mask_packus_epi32_rm_512:
1465 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1466 ; X86-NEXT: vpackusdw (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x2b,0x00]
1467 ; X86-NEXT: retl # encoding: [0xc3]
1469 ; X64-LABEL: test_mask_packus_epi32_rm_512:
1471 ; X64-NEXT: vpackusdw (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x2b,0x07]
1472 ; X64-NEXT: retq # encoding: [0xc3]
1473 %b = load <16 x i32>, <16 x i32>* %ptr_b
1474 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1)
1478 define <32 x i16> @test_mask_packus_epi32_rmk_512(<16 x i32> %a, <16 x i32>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
1479 ; X86-LABEL: test_mask_packus_epi32_rmk_512:
1481 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1482 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
1483 ; X86-NEXT: vpackusdw (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x2b,0x08]
1484 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1485 ; X86-NEXT: retl # encoding: [0xc3]
1487 ; X64-LABEL: test_mask_packus_epi32_rmk_512:
1489 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1490 ; X64-NEXT: vpackusdw (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x2b,0x0f]
1491 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1492 ; X64-NEXT: retq # encoding: [0xc3]
1493 %b = load <16 x i32>, <16 x i32>* %ptr_b
1494 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask)
1498 define <32 x i16> @test_mask_packus_epi32_rmkz_512(<16 x i32> %a, <16 x i32>* %ptr_b, i32 %mask) {
1499 ; X86-LABEL: test_mask_packus_epi32_rmkz_512:
1501 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1502 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
1503 ; X86-NEXT: vpackusdw (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x2b,0x00]
1504 ; X86-NEXT: retl # encoding: [0xc3]
1506 ; X64-LABEL: test_mask_packus_epi32_rmkz_512:
1508 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1509 ; X64-NEXT: vpackusdw (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x2b,0x07]
1510 ; X64-NEXT: retq # encoding: [0xc3]
1511 %b = load <16 x i32>, <16 x i32>* %ptr_b
1512 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask)
1516 define <32 x i16> @test_mask_packus_epi32_rmb_512(<16 x i32> %a, i32* %ptr_b) {
1517 ; X86-LABEL: test_mask_packus_epi32_rmb_512:
1519 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1520 ; X86-NEXT: vpackusdw (%eax){1to16}, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x58,0x2b,0x00]
1521 ; X86-NEXT: retl # encoding: [0xc3]
1523 ; X64-LABEL: test_mask_packus_epi32_rmb_512:
1525 ; X64-NEXT: vpackusdw (%rdi){1to16}, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x58,0x2b,0x07]
1526 ; X64-NEXT: retq # encoding: [0xc3]
1527 %q = load i32, i32* %ptr_b
1528 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
1529 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
1530 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1)
1534 define <32 x i16> @test_mask_packus_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, <32 x i16> %passThru, i32 %mask) {
1535 ; X86-LABEL: test_mask_packus_epi32_rmbk_512:
1537 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1538 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
1539 ; X86-NEXT: vpackusdw (%eax){1to16}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x59,0x2b,0x08]
1540 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1541 ; X86-NEXT: retl # encoding: [0xc3]
1543 ; X64-LABEL: test_mask_packus_epi32_rmbk_512:
1545 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1546 ; X64-NEXT: vpackusdw (%rdi){1to16}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x59,0x2b,0x0f]
1547 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1548 ; X64-NEXT: retq # encoding: [0xc3]
1549 %q = load i32, i32* %ptr_b
1550 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
1551 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
1552 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask)
1556 define <32 x i16> @test_mask_packus_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b, i32 %mask) {
1557 ; X86-LABEL: test_mask_packus_epi32_rmbkz_512:
1559 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1560 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
1561 ; X86-NEXT: vpackusdw (%eax){1to16}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xd9,0x2b,0x00]
1562 ; X86-NEXT: retl # encoding: [0xc3]
1564 ; X64-LABEL: test_mask_packus_epi32_rmbkz_512:
1566 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
1567 ; X64-NEXT: vpackusdw (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xd9,0x2b,0x07]
1568 ; X64-NEXT: retq # encoding: [0xc3]
1569 %q = load i32, i32* %ptr_b
1570 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
1571 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
1572 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask)
1576 declare <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32>, <16 x i32>, <32 x i16>, i32)
1578 define <64 x i8> @test_mask_packus_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
1579 ; CHECK-LABEL: test_mask_packus_epi16_rr_512:
1581 ; CHECK-NEXT: vpackuswb %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x67,0xc1]
1582 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1583 %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1)
1587 define <64 x i8> @test_mask_packus_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) {
1588 ; X86-LABEL: test_mask_packus_epi16_rrk_512:
1590 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
1591 ; X86-NEXT: vpackuswb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x67,0xd1]
1592 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1593 ; X86-NEXT: retl # encoding: [0xc3]
1595 ; X64-LABEL: test_mask_packus_epi16_rrk_512:
1597 ; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
1598 ; X64-NEXT: vpackuswb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x67,0xd1]
1599 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1600 ; X64-NEXT: retq # encoding: [0xc3]
1601 %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask)
1605 define <64 x i8> @test_mask_packus_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i64 %mask) {
1606 ; X86-LABEL: test_mask_packus_epi16_rrkz_512:
1608 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
1609 ; X86-NEXT: vpackuswb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x67,0xc1]
1610 ; X86-NEXT: retl # encoding: [0xc3]
1612 ; X64-LABEL: test_mask_packus_epi16_rrkz_512:
1614 ; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
1615 ; X64-NEXT: vpackuswb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x67,0xc1]
1616 ; X64-NEXT: retq # encoding: [0xc3]
1617 %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask)
1621 define <64 x i8> @test_mask_packus_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
1622 ; X86-LABEL: test_mask_packus_epi16_rm_512:
1624 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1625 ; X86-NEXT: vpackuswb (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x67,0x00]
1626 ; X86-NEXT: retl # encoding: [0xc3]
1628 ; X64-LABEL: test_mask_packus_epi16_rm_512:
1630 ; X64-NEXT: vpackuswb (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x67,0x07]
1631 ; X64-NEXT: retq # encoding: [0xc3]
1632 %b = load <32 x i16>, <32 x i16>* %ptr_b
1633 %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1)
1637 define <64 x i8> @test_mask_packus_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <64 x i8> %passThru, i64 %mask) {
1638 ; X86-LABEL: test_mask_packus_epi16_rmk_512:
1640 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1641 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08]
1642 ; X86-NEXT: vpackuswb (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x67,0x08]
1643 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1644 ; X86-NEXT: retl # encoding: [0xc3]
1646 ; X64-LABEL: test_mask_packus_epi16_rmk_512:
1648 ; X64-NEXT: kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce]
1649 ; X64-NEXT: vpackuswb (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x67,0x0f]
1650 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1651 ; X64-NEXT: retq # encoding: [0xc3]
1652 %b = load <32 x i16>, <32 x i16>* %ptr_b
1653 %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask)
1657 define <64 x i8> @test_mask_packus_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i64 %mask) {
1658 ; X86-LABEL: test_mask_packus_epi16_rmkz_512:
1660 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1661 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08]
1662 ; X86-NEXT: vpackuswb (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x67,0x00]
1663 ; X86-NEXT: retl # encoding: [0xc3]
1665 ; X64-LABEL: test_mask_packus_epi16_rmkz_512:
1667 ; X64-NEXT: kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce]
1668 ; X64-NEXT: vpackuswb (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x67,0x07]
1669 ; X64-NEXT: retq # encoding: [0xc3]
1670 %b = load <32 x i16>, <32 x i16>* %ptr_b
1671 %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask)
1675 declare <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16>, <32 x i16>, <64 x i8>, i64)
1677 define i64 @test_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1) {
1678 ; X86-LABEL: test_cmp_b_512:
1680 ; X86-NEXT: pushl %edi # encoding: [0x57]
1681 ; X86-NEXT: .cfi_def_cfa_offset 8
1682 ; X86-NEXT: pushl %esi # encoding: [0x56]
1683 ; X86-NEXT: .cfi_def_cfa_offset 12
1684 ; X86-NEXT: .cfi_offset %esi, -12
1685 ; X86-NEXT: .cfi_offset %edi, -8
1686 ; X86-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x74,0xc1]
1687 ; X86-NEXT: kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20]
1688 ; X86-NEXT: kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1]
1689 ; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
1690 ; X86-NEXT: vpcmpgtb %zmm0, %zmm1, %k0 # encoding: [0x62,0xf1,0x75,0x48,0x64,0xc0]
1691 ; X86-NEXT: kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20]
1692 ; X86-NEXT: kmovd %k1, %edx # encoding: [0xc5,0xfb,0x93,0xd1]
1693 ; X86-NEXT: kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0]
1694 ; X86-NEXT: addl %ecx, %esi # encoding: [0x01,0xce]
1695 ; X86-NEXT: adcl %eax, %edx # encoding: [0x11,0xc2]
1696 ; X86-NEXT: vpcmpleb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3f,0xc1,0x02]
1697 ; X86-NEXT: kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20]
1698 ; X86-NEXT: kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1]
1699 ; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
1700 ; X86-NEXT: addl %esi, %ecx # encoding: [0x01,0xf1]
1701 ; X86-NEXT: adcl %edx, %eax # encoding: [0x11,0xd0]
1702 ; X86-NEXT: vpcmpneqb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3f,0xc1,0x04]
1703 ; X86-NEXT: kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20]
1704 ; X86-NEXT: kmovd %k1, %edx # encoding: [0xc5,0xfb,0x93,0xd1]
1705 ; X86-NEXT: kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0]
1706 ; X86-NEXT: addl %ecx, %esi # encoding: [0x01,0xce]
1707 ; X86-NEXT: adcl %eax, %edx # encoding: [0x11,0xc2]
1708 ; X86-NEXT: vpcmpnltb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3f,0xc1,0x05]
1709 ; X86-NEXT: kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20]
1710 ; X86-NEXT: kmovd %k1, %ecx # encoding: [0xc5,0xfb,0x93,0xc9]
1711 ; X86-NEXT: kmovd %k0, %edi # encoding: [0xc5,0xfb,0x93,0xf8]
1712 ; X86-NEXT: addl %esi, %edi # encoding: [0x01,0xf7]
1713 ; X86-NEXT: adcl %edx, %ecx # encoding: [0x11,0xd1]
1714 ; X86-NEXT: vpcmpgtb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x64,0xc1]
1715 ; X86-NEXT: kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20]
1716 ; X86-NEXT: kmovd %k1, %edx # encoding: [0xc5,0xfb,0x93,0xd1]
1717 ; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
1718 ; X86-NEXT: addl %edi, %eax # encoding: [0x01,0xf8]
1719 ; X86-NEXT: adcl %ecx, %edx # encoding: [0x11,0xca]
1720 ; X86-NEXT: addl $-1, %eax # encoding: [0x83,0xc0,0xff]
1721 ; X86-NEXT: adcl $-1, %edx # encoding: [0x83,0xd2,0xff]
1722 ; X86-NEXT: popl %esi # encoding: [0x5e]
1723 ; X86-NEXT: .cfi_def_cfa_offset 8
1724 ; X86-NEXT: popl %edi # encoding: [0x5f]
1725 ; X86-NEXT: .cfi_def_cfa_offset 4
1726 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
1727 ; X86-NEXT: retl # encoding: [0xc3]
1729 ; X64-LABEL: test_cmp_b_512:
1731 ; X64-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x74,0xc1]
1732 ; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0]
1733 ; X64-NEXT: vpcmpgtb %zmm0, %zmm1, %k0 # encoding: [0x62,0xf1,0x75,0x48,0x64,0xc0]
1734 ; X64-NEXT: kmovq %k0, %rcx # encoding: [0xc4,0xe1,0xfb,0x93,0xc8]
1735 ; X64-NEXT: addq %rax, %rcx # encoding: [0x48,0x01,0xc1]
1736 ; X64-NEXT: vpcmpleb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3f,0xc1,0x02]
1737 ; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0]
1738 ; X64-NEXT: addq %rcx, %rax # encoding: [0x48,0x01,0xc8]
1739 ; X64-NEXT: vpcmpneqb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3f,0xc1,0x04]
1740 ; X64-NEXT: kmovq %k0, %rcx # encoding: [0xc4,0xe1,0xfb,0x93,0xc8]
1741 ; X64-NEXT: addq %rax, %rcx # encoding: [0x48,0x01,0xc1]
1742 ; X64-NEXT: vpcmpnltb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3f,0xc1,0x05]
1743 ; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0]
1744 ; X64-NEXT: addq %rcx, %rax # encoding: [0x48,0x01,0xc8]
1745 ; X64-NEXT: vpcmpgtb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x64,0xc1]
1746 ; X64-NEXT: kmovq %k0, %rcx # encoding: [0xc4,0xe1,0xfb,0x93,0xc8]
1747 ; X64-NEXT: leaq -1(%rcx,%rax), %rax # encoding: [0x48,0x8d,0x44,0x01,0xff]
1748 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
1749 ; X64-NEXT: retq # encoding: [0xc3]
1750 %res0 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 -1)
1751 %res1 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 -1)
1752 %ret1 = add i64 %res0, %res1
1753 %res2 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 2, i64 -1)
1754 %ret2 = add i64 %ret1, %res2
1755 %res3 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 3, i64 -1)
1756 %ret3 = add i64 %ret2, %res3
1757 %res4 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 4, i64 -1)
1758 %ret4 = add i64 %ret3, %res4
1759 %res5 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 5, i64 -1)
1760 %ret5 = add i64 %ret4, %res5
1761 %res6 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 6, i64 -1)
1762 %ret6 = add i64 %ret5, %res6
1763 %res7 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 7, i64 -1)
1764 %ret7 = add i64 %ret6, %res7
1768 define i64 @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) {
1769 ; X86-LABEL: test_mask_cmp_b_512:
1771 ; X86-NEXT: pushl %ebp # encoding: [0x55]
1772 ; X86-NEXT: .cfi_def_cfa_offset 8
1773 ; X86-NEXT: pushl %ebx # encoding: [0x53]
1774 ; X86-NEXT: .cfi_def_cfa_offset 12
1775 ; X86-NEXT: pushl %edi # encoding: [0x57]
1776 ; X86-NEXT: .cfi_def_cfa_offset 16
1777 ; X86-NEXT: pushl %esi # encoding: [0x56]
1778 ; X86-NEXT: .cfi_def_cfa_offset 20
1779 ; X86-NEXT: .cfi_offset %esi, -20
1780 ; X86-NEXT: .cfi_offset %edi, -16
1781 ; X86-NEXT: .cfi_offset %ebx, -12
1782 ; X86-NEXT: .cfi_offset %ebp, -8
1783 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi # encoding: [0x8b,0x74,0x24,0x14]
1784 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x18]
1785 ; X86-NEXT: vpcmpeqb %zmm1, %zmm0, %k2 # encoding: [0x62,0xf1,0x7d,0x48,0x74,0xd1]
1786 ; X86-NEXT: kmovd %esi, %k0 # encoding: [0xc5,0xfb,0x92,0xc6]
1787 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
1788 ; X86-NEXT: kshiftrq $32, %k2, %k3 # encoding: [0xc4,0xe3,0xf9,0x31,0xda,0x20]
1789 ; X86-NEXT: kandd %k1, %k3, %k3 # encoding: [0xc4,0xe1,0xe5,0x41,0xd9]
1790 ; X86-NEXT: kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3]
1791 ; X86-NEXT: kandd %k0, %k2, %k2 # encoding: [0xc4,0xe1,0xed,0x41,0xd0]
1792 ; X86-NEXT: kmovd %k2, %edx # encoding: [0xc5,0xfb,0x93,0xd2]
1793 ; X86-NEXT: vpcmpgtb %zmm0, %zmm1, %k2 # encoding: [0x62,0xf1,0x75,0x48,0x64,0xd0]
1794 ; X86-NEXT: kshiftrq $32, %k2, %k3 # encoding: [0xc4,0xe3,0xf9,0x31,0xda,0x20]
1795 ; X86-NEXT: kandd %k1, %k3, %k3 # encoding: [0xc4,0xe1,0xe5,0x41,0xd9]
1796 ; X86-NEXT: kmovd %k3, %edi # encoding: [0xc5,0xfb,0x93,0xfb]
1797 ; X86-NEXT: kandd %k0, %k2, %k2 # encoding: [0xc4,0xe1,0xed,0x41,0xd0]
1798 ; X86-NEXT: kmovd %k2, %ebx # encoding: [0xc5,0xfb,0x93,0xda]
1799 ; X86-NEXT: addl %edx, %ebx # encoding: [0x01,0xd3]
1800 ; X86-NEXT: adcl %eax, %edi # encoding: [0x11,0xc7]
1801 ; X86-NEXT: vpcmpleb %zmm1, %zmm0, %k2 # encoding: [0x62,0xf3,0x7d,0x48,0x3f,0xd1,0x02]
1802 ; X86-NEXT: kshiftrq $32, %k2, %k3 # encoding: [0xc4,0xe3,0xf9,0x31,0xda,0x20]
1803 ; X86-NEXT: kandd %k1, %k3, %k3 # encoding: [0xc4,0xe1,0xe5,0x41,0xd9]
1804 ; X86-NEXT: kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3]
1805 ; X86-NEXT: kandd %k0, %k2, %k2 # encoding: [0xc4,0xe1,0xed,0x41,0xd0]
1806 ; X86-NEXT: kmovd %k2, %edx # encoding: [0xc5,0xfb,0x93,0xd2]
1807 ; X86-NEXT: addl %ebx, %edx # encoding: [0x01,0xda]
1808 ; X86-NEXT: adcl %edi, %eax # encoding: [0x11,0xf8]
1809 ; X86-NEXT: vpcmpneqb %zmm1, %zmm0, %k2 # encoding: [0x62,0xf3,0x7d,0x48,0x3f,0xd1,0x04]
1810 ; X86-NEXT: kshiftrq $32, %k2, %k3 # encoding: [0xc4,0xe3,0xf9,0x31,0xda,0x20]
1811 ; X86-NEXT: kandd %k1, %k3, %k3 # encoding: [0xc4,0xe1,0xe5,0x41,0xd9]
1812 ; X86-NEXT: kmovd %k3, %edi # encoding: [0xc5,0xfb,0x93,0xfb]
1813 ; X86-NEXT: kandd %k0, %k2, %k2 # encoding: [0xc4,0xe1,0xed,0x41,0xd0]
1814 ; X86-NEXT: kmovd %k2, %ebx # encoding: [0xc5,0xfb,0x93,0xda]
1815 ; X86-NEXT: addl %edx, %ebx # encoding: [0x01,0xd3]
1816 ; X86-NEXT: adcl %eax, %edi # encoding: [0x11,0xc7]
1817 ; X86-NEXT: vpcmpnltb %zmm1, %zmm0, %k2 # encoding: [0x62,0xf3,0x7d,0x48,0x3f,0xd1,0x05]
1818 ; X86-NEXT: kshiftrq $32, %k2, %k3 # encoding: [0xc4,0xe3,0xf9,0x31,0xda,0x20]
1819 ; X86-NEXT: kandd %k1, %k3, %k3 # encoding: [0xc4,0xe1,0xe5,0x41,0xd9]
1820 ; X86-NEXT: kmovd %k3, %ebp # encoding: [0xc5,0xfb,0x93,0xeb]
1821 ; X86-NEXT: kandd %k0, %k2, %k2 # encoding: [0xc4,0xe1,0xed,0x41,0xd0]
1822 ; X86-NEXT: kmovd %k2, %ecx # encoding: [0xc5,0xfb,0x93,0xca]
1823 ; X86-NEXT: addl %ebx, %ecx # encoding: [0x01,0xd9]
1824 ; X86-NEXT: adcl %edi, %ebp # encoding: [0x11,0xfd]
1825 ; X86-NEXT: vpcmpgtb %zmm1, %zmm0, %k2 # encoding: [0x62,0xf1,0x7d,0x48,0x64,0xd1]
1826 ; X86-NEXT: kshiftrq $32, %k2, %k3 # encoding: [0xc4,0xe3,0xf9,0x31,0xda,0x20]
1827 ; X86-NEXT: kandd %k1, %k3, %k1 # encoding: [0xc4,0xe1,0xe5,0x41,0xc9]
1828 ; X86-NEXT: kmovd %k1, %edx # encoding: [0xc5,0xfb,0x93,0xd1]
1829 ; X86-NEXT: kandd %k0, %k2, %k0 # encoding: [0xc4,0xe1,0xed,0x41,0xc0]
1830 ; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
1831 ; X86-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8]
1832 ; X86-NEXT: adcl %ebp, %edx # encoding: [0x11,0xea]
1833 ; X86-NEXT: addl %esi, %eax # encoding: [0x01,0xf0]
1834 ; X86-NEXT: adcl {{[0-9]+}}(%esp), %edx # encoding: [0x13,0x54,0x24,0x18]
1835 ; X86-NEXT: popl %esi # encoding: [0x5e]
1836 ; X86-NEXT: .cfi_def_cfa_offset 16
1837 ; X86-NEXT: popl %edi # encoding: [0x5f]
1838 ; X86-NEXT: .cfi_def_cfa_offset 12
1839 ; X86-NEXT: popl %ebx # encoding: [0x5b]
1840 ; X86-NEXT: .cfi_def_cfa_offset 8
1841 ; X86-NEXT: popl %ebp # encoding: [0x5d]
1842 ; X86-NEXT: .cfi_def_cfa_offset 4
1843 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
1844 ; X86-NEXT: retl # encoding: [0xc3]
1846 ; X64-LABEL: test_mask_cmp_b_512:
1848 ; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
1849 ; X64-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x74,0xc1]
1850 ; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0]
1851 ; X64-NEXT: vpcmpgtb %zmm0, %zmm1, %k0 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x64,0xc0]
1852 ; X64-NEXT: kmovq %k0, %rcx # encoding: [0xc4,0xe1,0xfb,0x93,0xc8]
1853 ; X64-NEXT: addq %rax, %rcx # encoding: [0x48,0x01,0xc1]
1854 ; X64-NEXT: vpcmpleb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3f,0xc1,0x02]
1855 ; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0]
1856 ; X64-NEXT: addq %rcx, %rax # encoding: [0x48,0x01,0xc8]
1857 ; X64-NEXT: vpcmpneqb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3f,0xc1,0x04]
1858 ; X64-NEXT: kmovq %k0, %rcx # encoding: [0xc4,0xe1,0xfb,0x93,0xc8]
1859 ; X64-NEXT: addq %rax, %rcx # encoding: [0x48,0x01,0xc1]
1860 ; X64-NEXT: vpcmpnltb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3f,0xc1,0x05]
1861 ; X64-NEXT: kmovq %k0, %rdx # encoding: [0xc4,0xe1,0xfb,0x93,0xd0]
1862 ; X64-NEXT: addq %rcx, %rdx # encoding: [0x48,0x01,0xca]
1863 ; X64-NEXT: vpcmpgtb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x64,0xc1]
1864 ; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0]
1865 ; X64-NEXT: addq %rdx, %rax # encoding: [0x48,0x01,0xd0]
1866 ; X64-NEXT: addq %rdi, %rax # encoding: [0x48,0x01,0xf8]
1867 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
1868 ; X64-NEXT: retq # encoding: [0xc3]
1869 %res0 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 %mask)
1870 %res1 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 %mask)
1871 %ret1 = add i64 %res0, %res1
1872 %res2 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 2, i64 %mask)
1873 %ret2 = add i64 %ret1, %res2
1874 %res3 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 3, i64 %mask)
1875 %ret3 = add i64 %ret2, %res3
1876 %res4 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 4, i64 %mask)
1877 %ret4 = add i64 %ret3, %res4
1878 %res5 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 5, i64 %mask)
1879 %ret5 = add i64 %ret4, %res5
1880 %res6 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 6, i64 %mask)
1881 %ret6 = add i64 %ret5, %res6
1882 %res7 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 7, i64 %mask)
1883 %ret7 = add i64 %ret6, %res7
1887 declare i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8>, <64 x i8>, i32, i64) nounwind readnone
1889 define i64 @test_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1) {
1890 ; X86-LABEL: test_ucmp_b_512:
1892 ; X86-NEXT: pushl %edi # encoding: [0x57]
1893 ; X86-NEXT: .cfi_def_cfa_offset 8
1894 ; X86-NEXT: pushl %esi # encoding: [0x56]
1895 ; X86-NEXT: .cfi_def_cfa_offset 12
1896 ; X86-NEXT: .cfi_offset %esi, -12
1897 ; X86-NEXT: .cfi_offset %edi, -8
1898 ; X86-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x74,0xc1]
1899 ; X86-NEXT: kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20]
1900 ; X86-NEXT: kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1]
1901 ; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
1902 ; X86-NEXT: vpcmpltub %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3e,0xc1,0x01]
1903 ; X86-NEXT: kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20]
1904 ; X86-NEXT: kmovd %k1, %edx # encoding: [0xc5,0xfb,0x93,0xd1]
1905 ; X86-NEXT: kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0]
1906 ; X86-NEXT: addl %ecx, %esi # encoding: [0x01,0xce]
1907 ; X86-NEXT: adcl %eax, %edx # encoding: [0x11,0xc2]
1908 ; X86-NEXT: vpcmpleub %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3e,0xc1,0x02]
1909 ; X86-NEXT: kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20]
1910 ; X86-NEXT: kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1]
1911 ; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
1912 ; X86-NEXT: addl %esi, %ecx # encoding: [0x01,0xf1]
1913 ; X86-NEXT: adcl %edx, %eax # encoding: [0x11,0xd0]
1914 ; X86-NEXT: vpcmpneqb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3f,0xc1,0x04]
1915 ; X86-NEXT: kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20]
1916 ; X86-NEXT: kmovd %k1, %edx # encoding: [0xc5,0xfb,0x93,0xd1]
1917 ; X86-NEXT: kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0]
1918 ; X86-NEXT: addl %ecx, %esi # encoding: [0x01,0xce]
1919 ; X86-NEXT: adcl %eax, %edx # encoding: [0x11,0xc2]
1920 ; X86-NEXT: vpcmpnltub %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3e,0xc1,0x05]
1921 ; X86-NEXT: kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20]
1922 ; X86-NEXT: kmovd %k1, %ecx # encoding: [0xc5,0xfb,0x93,0xc9]
1923 ; X86-NEXT: kmovd %k0, %edi # encoding: [0xc5,0xfb,0x93,0xf8]
1924 ; X86-NEXT: addl %esi, %edi # encoding: [0x01,0xf7]
1925 ; X86-NEXT: adcl %edx, %ecx # encoding: [0x11,0xd1]
1926 ; X86-NEXT: vpcmpnleub %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3e,0xc1,0x06]
1927 ; X86-NEXT: kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20]
1928 ; X86-NEXT: kmovd %k1, %edx # encoding: [0xc5,0xfb,0x93,0xd1]
1929 ; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
1930 ; X86-NEXT: addl %edi, %eax # encoding: [0x01,0xf8]
1931 ; X86-NEXT: adcl %ecx, %edx # encoding: [0x11,0xca]
1932 ; X86-NEXT: addl $-1, %eax # encoding: [0x83,0xc0,0xff]
1933 ; X86-NEXT: adcl $-1, %edx # encoding: [0x83,0xd2,0xff]
1934 ; X86-NEXT: popl %esi # encoding: [0x5e]
1935 ; X86-NEXT: .cfi_def_cfa_offset 8
1936 ; X86-NEXT: popl %edi # encoding: [0x5f]
1937 ; X86-NEXT: .cfi_def_cfa_offset 4
1938 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
1939 ; X86-NEXT: retl # encoding: [0xc3]
1941 ; X64-LABEL: test_ucmp_b_512:
1943 ; X64-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x74,0xc1]
1944 ; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0]
1945 ; X64-NEXT: vpcmpltub %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3e,0xc1,0x01]
1946 ; X64-NEXT: kmovq %k0, %rcx # encoding: [0xc4,0xe1,0xfb,0x93,0xc8]
1947 ; X64-NEXT: addq %rax, %rcx # encoding: [0x48,0x01,0xc1]
1948 ; X64-NEXT: vpcmpleub %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3e,0xc1,0x02]
1949 ; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0]
1950 ; X64-NEXT: addq %rcx, %rax # encoding: [0x48,0x01,0xc8]
1951 ; X64-NEXT: vpcmpneqb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3f,0xc1,0x04]
1952 ; X64-NEXT: kmovq %k0, %rcx # encoding: [0xc4,0xe1,0xfb,0x93,0xc8]
1953 ; X64-NEXT: addq %rax, %rcx # encoding: [0x48,0x01,0xc1]
1954 ; X64-NEXT: vpcmpnltub %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3e,0xc1,0x05]
1955 ; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0]
1956 ; X64-NEXT: addq %rcx, %rax # encoding: [0x48,0x01,0xc8]
1957 ; X64-NEXT: vpcmpnleub %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3e,0xc1,0x06]
1958 ; X64-NEXT: kmovq %k0, %rcx # encoding: [0xc4,0xe1,0xfb,0x93,0xc8]
1959 ; X64-NEXT: leaq -1(%rcx,%rax), %rax # encoding: [0x48,0x8d,0x44,0x01,0xff]
1960 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
1961 ; X64-NEXT: retq # encoding: [0xc3]
1962 %res0 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 -1)
1963 %res1 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 -1)
1964 %ret1 = add i64 %res0, %res1
1965 %res2 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 2, i64 -1)
1966 %ret2 = add i64 %ret1, %res2
1967 %res3 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 3, i64 -1)
1968 %ret3 = add i64 %ret2, %res3
1969 %res4 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 4, i64 -1)
1970 %ret4 = add i64 %ret3, %res4
1971 %res5 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 5, i64 -1)
1972 %ret5 = add i64 %ret4, %res5
1973 %res6 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 6, i64 -1)
1974 %ret6 = add i64 %ret5, %res6
1975 %res7 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 7, i64 -1)
1976 %ret7 = add i64 %ret6, %res7
1980 define i64 @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) {
1981 ; X86-LABEL: test_mask_x86_avx512_ucmp_b_512:
1983 ; X86-NEXT: pushl %ebp # encoding: [0x55]
1984 ; X86-NEXT: .cfi_def_cfa_offset 8
1985 ; X86-NEXT: pushl %ebx # encoding: [0x53]
1986 ; X86-NEXT: .cfi_def_cfa_offset 12
1987 ; X86-NEXT: pushl %edi # encoding: [0x57]
1988 ; X86-NEXT: .cfi_def_cfa_offset 16
1989 ; X86-NEXT: pushl %esi # encoding: [0x56]
1990 ; X86-NEXT: .cfi_def_cfa_offset 20
1991 ; X86-NEXT: .cfi_offset %esi, -20
1992 ; X86-NEXT: .cfi_offset %edi, -16
1993 ; X86-NEXT: .cfi_offset %ebx, -12
1994 ; X86-NEXT: .cfi_offset %ebp, -8
1995 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi # encoding: [0x8b,0x74,0x24,0x14]
1996 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x18]
1997 ; X86-NEXT: vpcmpeqb %zmm1, %zmm0, %k2 # encoding: [0x62,0xf1,0x7d,0x48,0x74,0xd1]
1998 ; X86-NEXT: kmovd %esi, %k0 # encoding: [0xc5,0xfb,0x92,0xc6]
1999 ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
2000 ; X86-NEXT: kshiftrq $32, %k2, %k3 # encoding: [0xc4,0xe3,0xf9,0x31,0xda,0x20]
2001 ; X86-NEXT: kandd %k1, %k3, %k3 # encoding: [0xc4,0xe1,0xe5,0x41,0xd9]
2002 ; X86-NEXT: kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3]
2003 ; X86-NEXT: kandd %k0, %k2, %k2 # encoding: [0xc4,0xe1,0xed,0x41,0xd0]
2004 ; X86-NEXT: kmovd %k2, %edx # encoding: [0xc5,0xfb,0x93,0xd2]
2005 ; X86-NEXT: vpcmpltub %zmm1, %zmm0, %k2 # encoding: [0x62,0xf3,0x7d,0x48,0x3e,0xd1,0x01]
2006 ; X86-NEXT: kshiftrq $32, %k2, %k3 # encoding: [0xc4,0xe3,0xf9,0x31,0xda,0x20]
2007 ; X86-NEXT: kandd %k1, %k3, %k3 # encoding: [0xc4,0xe1,0xe5,0x41,0xd9]
2008 ; X86-NEXT: kmovd %k3, %edi # encoding: [0xc5,0xfb,0x93,0xfb]
2009 ; X86-NEXT: kandd %k0, %k2, %k2 # encoding: [0xc4,0xe1,0xed,0x41,0xd0]
2010 ; X86-NEXT: kmovd %k2, %ebx # encoding: [0xc5,0xfb,0x93,0xda]
2011 ; X86-NEXT: addl %edx, %ebx # encoding: [0x01,0xd3]
2012 ; X86-NEXT: adcl %eax, %edi # encoding: [0x11,0xc7]
2013 ; X86-NEXT: vpcmpleub %zmm1, %zmm0, %k2 # encoding: [0x62,0xf3,0x7d,0x48,0x3e,0xd1,0x02]
2014 ; X86-NEXT: kshiftrq $32, %k2, %k3 # encoding: [0xc4,0xe3,0xf9,0x31,0xda,0x20]
2015 ; X86-NEXT: kandd %k1, %k3, %k3 # encoding: [0xc4,0xe1,0xe5,0x41,0xd9]
2016 ; X86-NEXT: kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3]
2017 ; X86-NEXT: kandd %k0, %k2, %k2 # encoding: [0xc4,0xe1,0xed,0x41,0xd0]
2018 ; X86-NEXT: kmovd %k2, %edx # encoding: [0xc5,0xfb,0x93,0xd2]
2019 ; X86-NEXT: addl %ebx, %edx # encoding: [0x01,0xda]
2020 ; X86-NEXT: adcl %edi, %eax # encoding: [0x11,0xf8]
2021 ; X86-NEXT: vpcmpneqb %zmm1, %zmm0, %k2 # encoding: [0x62,0xf3,0x7d,0x48,0x3f,0xd1,0x04]
2022 ; X86-NEXT: kshiftrq $32, %k2, %k3 # encoding: [0xc4,0xe3,0xf9,0x31,0xda,0x20]
2023 ; X86-NEXT: kandd %k1, %k3, %k3 # encoding: [0xc4,0xe1,0xe5,0x41,0xd9]
2024 ; X86-NEXT: kmovd %k3, %edi # encoding: [0xc5,0xfb,0x93,0xfb]
2025 ; X86-NEXT: kandd %k0, %k2, %k2 # encoding: [0xc4,0xe1,0xed,0x41,0xd0]
2026 ; X86-NEXT: kmovd %k2, %ebx # encoding: [0xc5,0xfb,0x93,0xda]
2027 ; X86-NEXT: addl %edx, %ebx # encoding: [0x01,0xd3]
2028 ; X86-NEXT: adcl %eax, %edi # encoding: [0x11,0xc7]
2029 ; X86-NEXT: vpcmpnltub %zmm1, %zmm0, %k2 # encoding: [0x62,0xf3,0x7d,0x48,0x3e,0xd1,0x05]
2030 ; X86-NEXT: kshiftrq $32, %k2, %k3 # encoding: [0xc4,0xe3,0xf9,0x31,0xda,0x20]
2031 ; X86-NEXT: kandd %k1, %k3, %k3 # encoding: [0xc4,0xe1,0xe5,0x41,0xd9]
2032 ; X86-NEXT: kmovd %k3, %ebp # encoding: [0xc5,0xfb,0x93,0xeb]
2033 ; X86-NEXT: kandd %k0, %k2, %k2 # encoding: [0xc4,0xe1,0xed,0x41,0xd0]
2034 ; X86-NEXT: kmovd %k2, %ecx # encoding: [0xc5,0xfb,0x93,0xca]
2035 ; X86-NEXT: addl %ebx, %ecx # encoding: [0x01,0xd9]
2036 ; X86-NEXT: adcl %edi, %ebp # encoding: [0x11,0xfd]
2037 ; X86-NEXT: vpcmpnleub %zmm1, %zmm0, %k2 # encoding: [0x62,0xf3,0x7d,0x48,0x3e,0xd1,0x06]
2038 ; X86-NEXT: kshiftrq $32, %k2, %k3 # encoding: [0xc4,0xe3,0xf9,0x31,0xda,0x20]
2039 ; X86-NEXT: kandd %k1, %k3, %k1 # encoding: [0xc4,0xe1,0xe5,0x41,0xc9]
2040 ; X86-NEXT: kmovd %k1, %edx # encoding: [0xc5,0xfb,0x93,0xd1]
2041 ; X86-NEXT: kandd %k0, %k2, %k0 # encoding: [0xc4,0xe1,0xed,0x41,0xc0]
2042 ; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
2043 ; X86-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8]
2044 ; X86-NEXT: adcl %ebp, %edx # encoding: [0x11,0xea]
2045 ; X86-NEXT: addl %esi, %eax # encoding: [0x01,0xf0]
2046 ; X86-NEXT: adcl {{[0-9]+}}(%esp), %edx # encoding: [0x13,0x54,0x24,0x18]
2047 ; X86-NEXT: popl %esi # encoding: [0x5e]
2048 ; X86-NEXT: .cfi_def_cfa_offset 16
2049 ; X86-NEXT: popl %edi # encoding: [0x5f]
2050 ; X86-NEXT: .cfi_def_cfa_offset 12
2051 ; X86-NEXT: popl %ebx # encoding: [0x5b]
2052 ; X86-NEXT: .cfi_def_cfa_offset 8
2053 ; X86-NEXT: popl %ebp # encoding: [0x5d]
2054 ; X86-NEXT: .cfi_def_cfa_offset 4
2055 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
2056 ; X86-NEXT: retl # encoding: [0xc3]
2058 ; X64-LABEL: test_mask_x86_avx512_ucmp_b_512:
2060 ; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
2061 ; X64-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x74,0xc1]
2062 ; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0]
2063 ; X64-NEXT: vpcmpltub %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3e,0xc1,0x01]
2064 ; X64-NEXT: kmovq %k0, %rcx # encoding: [0xc4,0xe1,0xfb,0x93,0xc8]
2065 ; X64-NEXT: addq %rax, %rcx # encoding: [0x48,0x01,0xc1]
2066 ; X64-NEXT: vpcmpleub %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3e,0xc1,0x02]
2067 ; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0]
2068 ; X64-NEXT: addq %rcx, %rax # encoding: [0x48,0x01,0xc8]
2069 ; X64-NEXT: vpcmpneqb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3f,0xc1,0x04]
2070 ; X64-NEXT: kmovq %k0, %rcx # encoding: [0xc4,0xe1,0xfb,0x93,0xc8]
2071 ; X64-NEXT: addq %rax, %rcx # encoding: [0x48,0x01,0xc1]
2072 ; X64-NEXT: vpcmpnltub %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3e,0xc1,0x05]
2073 ; X64-NEXT: kmovq %k0, %rdx # encoding: [0xc4,0xe1,0xfb,0x93,0xd0]
2074 ; X64-NEXT: addq %rcx, %rdx # encoding: [0x48,0x01,0xca]
2075 ; X64-NEXT: vpcmpnleub %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3e,0xc1,0x06]
2076 ; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0]
2077 ; X64-NEXT: addq %rdx, %rax # encoding: [0x48,0x01,0xd0]
2078 ; X64-NEXT: addq %rdi, %rax # encoding: [0x48,0x01,0xf8]
2079 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
2080 ; X64-NEXT: retq # encoding: [0xc3]
2081 %res0 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 %mask)
2082 %res1 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 %mask)
2083 %ret1 = add i64 %res0, %res1
2084 %res2 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 2, i64 %mask)
2085 %ret2 = add i64 %ret1, %res2
2086 %res3 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 3, i64 %mask)
2087 %ret3 = add i64 %ret2, %res3
2088 %res4 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 4, i64 %mask)
2089 %ret4 = add i64 %ret3, %res4
2090 %res5 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 5, i64 %mask)
2091 %ret5 = add i64 %ret4, %res5
2092 %res6 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 6, i64 %mask)
2093 %ret6 = add i64 %ret5, %res6
2094 %res7 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 7, i64 %mask)
2095 %ret7 = add i64 %ret6, %res7
2099 declare i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8>, <64 x i8>, i32, i64) nounwind readnone
2101 define i32 @test_cmp_w_512(<32 x i16> %a0, <32 x i16> %a1) {
2102 ; X86-LABEL: test_cmp_w_512:
2104 ; X86-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x75,0xc1]
2105 ; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
2106 ; X86-NEXT: vpcmpgtw %zmm0, %zmm1, %k0 # encoding: [0x62,0xf1,0x75,0x48,0x65,0xc0]
2107 ; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
2108 ; X86-NEXT: addl %eax, %ecx # encoding: [0x01,0xc1]
2109 ; X86-NEXT: vpcmplew %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3f,0xc1,0x02]
2110 ; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
2111 ; X86-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8]
2112 ; X86-NEXT: vpcmpneqw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3f,0xc1,0x04]
2113 ; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
2114 ; X86-NEXT: addl %eax, %ecx # encoding: [0x01,0xc1]
2115 ; X86-NEXT: vpcmpnltw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3f,0xc1,0x05]
2116 ; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
2117 ; X86-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8]
2118 ; X86-NEXT: vpcmpgtw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x65,0xc1]
2119 ; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
2120 ; X86-NEXT: leal -1(%ecx,%eax), %eax # encoding: [0x8d,0x44,0x01,0xff]
2121 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
2122 ; X86-NEXT: retl # encoding: [0xc3]
2124 ; X64-LABEL: test_cmp_w_512:
2126 ; X64-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x75,0xc1]
2127 ; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
2128 ; X64-NEXT: vpcmpgtw %zmm0, %zmm1, %k0 # encoding: [0x62,0xf1,0x75,0x48,0x65,0xc0]
2129 ; X64-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
2130 ; X64-NEXT: addl %eax, %ecx # encoding: [0x01,0xc1]
2131 ; X64-NEXT: vpcmplew %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3f,0xc1,0x02]
2132 ; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
2133 ; X64-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8]
2134 ; X64-NEXT: vpcmpneqw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3f,0xc1,0x04]
2135 ; X64-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
2136 ; X64-NEXT: addl %eax, %ecx # encoding: [0x01,0xc1]
2137 ; X64-NEXT: vpcmpnltw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3f,0xc1,0x05]
2138 ; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
2139 ; X64-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8]
2140 ; X64-NEXT: vpcmpgtw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x65,0xc1]
2141 ; X64-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
2142 ; X64-NEXT: leal -1(%rcx,%rax), %eax # encoding: [0x8d,0x44,0x01,0xff]
2143 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
2144 ; X64-NEXT: retq # encoding: [0xc3]
2145 %res0 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 -1)
2146 %res1 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 1, i32 -1)
2147 %ret1 = add i32 %res0, %res1
2148 %res2 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 2, i32 -1)
2149 %ret2 = add i32 %ret1, %res2
2150 %res3 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 3, i32 -1)
2151 %ret3 = add i32 %ret2, %res3
2152 %res4 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 4, i32 -1)
2153 %ret4 = add i32 %ret3, %res4
2154 %res5 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 5, i32 -1)
2155 %ret5 = add i32 %ret4, %res5
2156 %res6 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 6, i32 -1)
2157 %ret6 = add i32 %ret5, %res6
2158 %res7 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 7, i32 -1)
2159 %ret7 = add i32 %ret6, %res7
2163 define i32 @test_mask_cmp_w_512(<32 x i16> %a0, <32 x i16> %a1, i32 %mask) {
2164 ; X86-LABEL: test_mask_cmp_w_512:
2166 ; X86-NEXT: pushl %esi # encoding: [0x56]
2167 ; X86-NEXT: .cfi_def_cfa_offset 8
2168 ; X86-NEXT: .cfi_offset %esi, -8
2169 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x08]
2170 ; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
2171 ; X86-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x75,0xc1]
2172 ; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
2173 ; X86-NEXT: vpcmpgtw %zmm0, %zmm1, %k0 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x65,0xc0]
2174 ; X86-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0]
2175 ; X86-NEXT: addl %eax, %edx # encoding: [0x01,0xc2]
2176 ; X86-NEXT: vpcmplew %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3f,0xc1,0x02]
2177 ; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
2178 ; X86-NEXT: addl %edx, %eax # encoding: [0x01,0xd0]
2179 ; X86-NEXT: vpcmpneqw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3f,0xc1,0x04]
2180 ; X86-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0]
2181 ; X86-NEXT: addl %eax, %edx # encoding: [0x01,0xc2]
2182 ; X86-NEXT: vpcmpnltw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3f,0xc1,0x05]
2183 ; X86-NEXT: kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0]
2184 ; X86-NEXT: addl %edx, %esi # encoding: [0x01,0xd6]
2185 ; X86-NEXT: vpcmpgtw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x65,0xc1]
2186 ; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
2187 ; X86-NEXT: addl %esi, %eax # encoding: [0x01,0xf0]
2188 ; X86-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8]
2189 ; X86-NEXT: popl %esi # encoding: [0x5e]
2190 ; X86-NEXT: .cfi_def_cfa_offset 4
2191 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
2192 ; X86-NEXT: retl # encoding: [0xc3]
2194 ; X64-LABEL: test_mask_cmp_w_512:
2196 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2197 ; X64-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x75,0xc1]
2198 ; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
2199 ; X64-NEXT: vpcmpgtw %zmm0, %zmm1, %k0 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x65,0xc0]
2200 ; X64-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
2201 ; X64-NEXT: addl %eax, %ecx # encoding: [0x01,0xc1]
2202 ; X64-NEXT: vpcmplew %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3f,0xc1,0x02]
2203 ; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
2204 ; X64-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8]
2205 ; X64-NEXT: vpcmpneqw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3f,0xc1,0x04]
2206 ; X64-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
2207 ; X64-NEXT: addl %eax, %ecx # encoding: [0x01,0xc1]
2208 ; X64-NEXT: vpcmpnltw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3f,0xc1,0x05]
2209 ; X64-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0]
2210 ; X64-NEXT: addl %ecx, %edx # encoding: [0x01,0xca]
2211 ; X64-NEXT: vpcmpgtw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x65,0xc1]
2212 ; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
2213 ; X64-NEXT: addl %edx, %eax # encoding: [0x01,0xd0]
2214 ; X64-NEXT: addl %edi, %eax # encoding: [0x01,0xf8]
2215 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
2216 ; X64-NEXT: retq # encoding: [0xc3]
2217 %res0 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 %mask)
2218 %res1 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 1, i32 %mask)
2219 %ret1 = add i32 %res0, %res1
2220 %res2 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 2, i32 %mask)
2221 %ret2 = add i32 %ret1, %res2
2222 %res3 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 3, i32 %mask)
2223 %ret3 = add i32 %ret2, %res3
2224 %res4 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 4, i32 %mask)
2225 %ret4 = add i32 %ret3, %res4
2226 %res5 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 5, i32 %mask)
2227 %ret5 = add i32 %ret4, %res5
2228 %res6 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 6, i32 %mask)
2229 %ret6 = add i32 %ret5, %res6
2230 %res7 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 7, i32 %mask)
2231 %ret7 = add i32 %ret6, %res7
2235 declare i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16>, <32 x i16>, i32, i32) nounwind readnone
2237 define i32 @test_ucmp_w_512(<32 x i16> %a0, <32 x i16> %a1) {
2238 ; X86-LABEL: test_ucmp_w_512:
2240 ; X86-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x75,0xc1]
2241 ; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
2242 ; X86-NEXT: vpcmpltuw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3e,0xc1,0x01]
2243 ; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
2244 ; X86-NEXT: addl %eax, %ecx # encoding: [0x01,0xc1]
2245 ; X86-NEXT: vpcmpleuw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3e,0xc1,0x02]
2246 ; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
2247 ; X86-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8]
2248 ; X86-NEXT: vpcmpneqw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3f,0xc1,0x04]
2249 ; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
2250 ; X86-NEXT: addl %eax, %ecx # encoding: [0x01,0xc1]
2251 ; X86-NEXT: vpcmpnltuw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3e,0xc1,0x05]
2252 ; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
2253 ; X86-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8]
2254 ; X86-NEXT: vpcmpnleuw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3e,0xc1,0x06]
2255 ; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
2256 ; X86-NEXT: leal -1(%ecx,%eax), %eax # encoding: [0x8d,0x44,0x01,0xff]
2257 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
2258 ; X86-NEXT: retl # encoding: [0xc3]
2260 ; X64-LABEL: test_ucmp_w_512:
2262 ; X64-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x75,0xc1]
2263 ; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
2264 ; X64-NEXT: vpcmpltuw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3e,0xc1,0x01]
2265 ; X64-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
2266 ; X64-NEXT: addl %eax, %ecx # encoding: [0x01,0xc1]
2267 ; X64-NEXT: vpcmpleuw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3e,0xc1,0x02]
2268 ; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
2269 ; X64-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8]
2270 ; X64-NEXT: vpcmpneqw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3f,0xc1,0x04]
2271 ; X64-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
2272 ; X64-NEXT: addl %eax, %ecx # encoding: [0x01,0xc1]
2273 ; X64-NEXT: vpcmpnltuw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3e,0xc1,0x05]
2274 ; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
2275 ; X64-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8]
2276 ; X64-NEXT: vpcmpnleuw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3e,0xc1,0x06]
2277 ; X64-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
2278 ; X64-NEXT: leal -1(%rcx,%rax), %eax # encoding: [0x8d,0x44,0x01,0xff]
2279 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
2280 ; X64-NEXT: retq # encoding: [0xc3]
2281 %res0 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 -1)
2282 %res1 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 1, i32 -1)
2283 %ret1 = add i32 %res0, %res1
2284 %res2 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 2, i32 -1)
2285 %ret2 = add i32 %ret1, %res2
2286 %res3 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 3, i32 -1)
2287 %ret3 = add i32 %ret2, %res3
2288 %res4 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 4, i32 -1)
2289 %ret4 = add i32 %ret3, %res4
2290 %res5 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 5, i32 -1)
2291 %ret5 = add i32 %ret4, %res5
2292 %res6 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 6, i32 -1)
2293 %ret6 = add i32 %ret5, %res6
2294 %res7 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 7, i32 -1)
2295 %ret7 = add i32 %ret6, %res7
2299 define i32 @test_mask_ucmp_w_512(<32 x i16> %a0, <32 x i16> %a1, i32 %mask) {
2300 ; X86-LABEL: test_mask_ucmp_w_512:
2302 ; X86-NEXT: pushl %esi # encoding: [0x56]
2303 ; X86-NEXT: .cfi_def_cfa_offset 8
2304 ; X86-NEXT: .cfi_offset %esi, -8
2305 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x08]
2306 ; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
2307 ; X86-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x75,0xc1]
2308 ; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
2309 ; X86-NEXT: vpcmpltuw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3e,0xc1,0x01]
2310 ; X86-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0]
2311 ; X86-NEXT: addl %eax, %edx # encoding: [0x01,0xc2]
2312 ; X86-NEXT: vpcmpleuw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3e,0xc1,0x02]
2313 ; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
2314 ; X86-NEXT: addl %edx, %eax # encoding: [0x01,0xd0]
2315 ; X86-NEXT: vpcmpneqw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3f,0xc1,0x04]
2316 ; X86-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0]
2317 ; X86-NEXT: addl %eax, %edx # encoding: [0x01,0xc2]
2318 ; X86-NEXT: vpcmpnltuw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3e,0xc1,0x05]
2319 ; X86-NEXT: kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0]
2320 ; X86-NEXT: addl %edx, %esi # encoding: [0x01,0xd6]
2321 ; X86-NEXT: vpcmpnleuw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3e,0xc1,0x06]
2322 ; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
2323 ; X86-NEXT: addl %esi, %eax # encoding: [0x01,0xf0]
2324 ; X86-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8]
2325 ; X86-NEXT: popl %esi # encoding: [0x5e]
2326 ; X86-NEXT: .cfi_def_cfa_offset 4
2327 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
2328 ; X86-NEXT: retl # encoding: [0xc3]
2330 ; X64-LABEL: test_mask_ucmp_w_512:
2332 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2333 ; X64-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x75,0xc1]
2334 ; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
2335 ; X64-NEXT: vpcmpltuw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3e,0xc1,0x01]
2336 ; X64-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
2337 ; X64-NEXT: addl %eax, %ecx # encoding: [0x01,0xc1]
2338 ; X64-NEXT: vpcmpleuw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3e,0xc1,0x02]
2339 ; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
2340 ; X64-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8]
2341 ; X64-NEXT: vpcmpneqw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3f,0xc1,0x04]
2342 ; X64-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
2343 ; X64-NEXT: addl %eax, %ecx # encoding: [0x01,0xc1]
2344 ; X64-NEXT: vpcmpnltuw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3e,0xc1,0x05]
2345 ; X64-NEXT: kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0]
2346 ; X64-NEXT: addl %ecx, %edx # encoding: [0x01,0xca]
2347 ; X64-NEXT: vpcmpnleuw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3e,0xc1,0x06]
2348 ; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
2349 ; X64-NEXT: addl %edx, %eax # encoding: [0x01,0xd0]
2350 ; X64-NEXT: addl %edi, %eax # encoding: [0x01,0xf8]
2351 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
2352 ; X64-NEXT: retq # encoding: [0xc3]
2353 %res0 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 %mask)
2354 %res1 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 1, i32 %mask)
2355 %ret1 = add i32 %res0, %res1
2356 %res2 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 2, i32 %mask)
2357 %ret2 = add i32 %ret1, %res2
2358 %res3 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 3, i32 %mask)
2359 %ret3 = add i32 %ret2, %res3
2360 %res4 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 4, i32 %mask)
2361 %ret4 = add i32 %ret3, %res4
2362 %res5 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 5, i32 %mask)
2363 %ret5 = add i32 %ret4, %res5
2364 %res6 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 6, i32 %mask)
2365 %ret6 = add i32 %ret5, %res6
2366 %res7 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 7, i32 %mask)
2367 %ret7 = add i32 %ret6, %res7
2371 declare i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16>, <32 x i16>, i32, i32) nounwind readnone
2374 declare <64 x i8> @llvm.x86.avx512.mask.pavg.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
2376 define <64 x i8>@mm512_avg_epu8(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
2377 ; X86-LABEL: mm512_avg_epu8:
2379 ; X86-NEXT: vpavgb %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0xe0,0xd9]
2380 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
2381 ; X86-NEXT: vpavgb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe0,0xd1]
2382 ; X86-NEXT: vpaddb %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfc,0xc3]
2383 ; X86-NEXT: retl # encoding: [0xc3]
2385 ; X64-LABEL: mm512_avg_epu8:
2387 ; X64-NEXT: vpavgb %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0xe0,0xd9]
2388 ; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
2389 ; X64-NEXT: vpavgb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe0,0xd1]
2390 ; X64-NEXT: vpaddb %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfc,0xc3]
2391 ; X64-NEXT: retq # encoding: [0xc3]
2392 %res = call <64 x i8> @llvm.x86.avx512.mask.pavg.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
2393 %res1 = call <64 x i8> @llvm.x86.avx512.mask.pavg.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
2394 %res2 = add <64 x i8> %res, %res1
2399 declare <32 x i16> @llvm.x86.avx512.mask.pavg.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
2401 define <32 x i16>@mm512_avg_epu16(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
2402 ; X86-LABEL: mm512_avg_epu16:
2404 ; X86-NEXT: vpavgw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0xe3,0xd9]
2405 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
2406 ; X86-NEXT: vpavgw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe3,0xd1]
2407 ; X86-NEXT: vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3]
2408 ; X86-NEXT: retl # encoding: [0xc3]
2410 ; X64-LABEL: mm512_avg_epu16:
2412 ; X64-NEXT: vpavgw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0xe3,0xd9]
2413 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2414 ; X64-NEXT: vpavgw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe3,0xd1]
2415 ; X64-NEXT: vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3]
2416 ; X64-NEXT: retq # encoding: [0xc3]
2417 %res = call <32 x i16> @llvm.x86.avx512.mask.pavg.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
2418 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pavg.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
2419 %res2 = add <32 x i16> %res, %res1
2420 ret <32 x i16> %res2
2423 declare <32 x i16> @llvm.x86.avx512.mask.pabs.w.512(<32 x i16>, <32 x i16>, i32)
2425 define <32 x i16>@test_int_x86_avx512_mask_pabs_w_512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2) {
2426 ; X86-LABEL: test_int_x86_avx512_mask_pabs_w_512:
2428 ; X86-NEXT: vpabsw %zmm0, %zmm2 # encoding: [0x62,0xf2,0x7d,0x48,0x1d,0xd0]
2429 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
2430 ; X86-NEXT: vpabsw %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x1d,0xc8]
2431 ; X86-NEXT: vpaddw %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc2]
2432 ; X86-NEXT: retl # encoding: [0xc3]
2434 ; X64-LABEL: test_int_x86_avx512_mask_pabs_w_512:
2436 ; X64-NEXT: vpabsw %zmm0, %zmm2 # encoding: [0x62,0xf2,0x7d,0x48,0x1d,0xd0]
2437 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2438 ; X64-NEXT: vpabsw %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x1d,0xc8]
2439 ; X64-NEXT: vpaddw %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc2]
2440 ; X64-NEXT: retq # encoding: [0xc3]
2441 %res = call <32 x i16> @llvm.x86.avx512.mask.pabs.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2)
2442 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pabs.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 -1)
2443 %res2 = add <32 x i16> %res, %res1
2444 ret <32 x i16> %res2
2447 declare <64 x i8> @llvm.x86.avx512.mask.pabs.b.512(<64 x i8>, <64 x i8>, i64)
2449 define <64 x i8>@test_int_x86_avx512_mask_pabs_b_512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2) {
2450 ; X86-LABEL: test_int_x86_avx512_mask_pabs_b_512:
2452 ; X86-NEXT: vpabsb %zmm0, %zmm2 # encoding: [0x62,0xf2,0x7d,0x48,0x1c,0xd0]
2453 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
2454 ; X86-NEXT: vpabsb %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x1c,0xc8]
2455 ; X86-NEXT: vpaddb %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfc,0xc2]
2456 ; X86-NEXT: retl # encoding: [0xc3]
2458 ; X64-LABEL: test_int_x86_avx512_mask_pabs_b_512:
2460 ; X64-NEXT: vpabsb %zmm0, %zmm2 # encoding: [0x62,0xf2,0x7d,0x48,0x1c,0xd0]
2461 ; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
2462 ; X64-NEXT: vpabsb %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x1c,0xc8]
2463 ; X64-NEXT: vpaddb %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfc,0xc2]
2464 ; X64-NEXT: retq # encoding: [0xc3]
2465 %res = call <64 x i8> @llvm.x86.avx512.mask.pabs.b.512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2)
2466 %res1 = call <64 x i8> @llvm.x86.avx512.mask.pabs.b.512(<64 x i8> %x0, <64 x i8> %x1, i64 -1)
2467 %res2 = add <64 x i8> %res, %res1
2471 declare i64 @llvm.x86.avx512.ptestm.b.512(<64 x i8>, <64 x i8>, i64)
2473 define i64@test_int_x86_avx512_ptestm_b_512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2) {
2474 ; X86-LABEL: test_int_x86_avx512_ptestm_b_512:
2476 ; X86-NEXT: pushl %esi # encoding: [0x56]
2477 ; X86-NEXT: .cfi_def_cfa_offset 8
2478 ; X86-NEXT: .cfi_offset %esi, -8
2479 ; X86-NEXT: vptestmb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf2,0x7d,0x48,0x26,0xc1]
2480 ; X86-NEXT: kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20]
2481 ; X86-NEXT: kmovd %k1, %ecx # encoding: [0xc5,0xfb,0x93,0xc9]
2482 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx # encoding: [0x8b,0x54,0x24,0x0c]
2483 ; X86-NEXT: andl %ecx, %edx # encoding: [0x21,0xca]
2484 ; X86-NEXT: kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0]
2485 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
2486 ; X86-NEXT: andl %esi, %eax # encoding: [0x21,0xf0]
2487 ; X86-NEXT: addl %esi, %eax # encoding: [0x01,0xf0]
2488 ; X86-NEXT: adcl %ecx, %edx # encoding: [0x11,0xca]
2489 ; X86-NEXT: popl %esi # encoding: [0x5e]
2490 ; X86-NEXT: .cfi_def_cfa_offset 4
2491 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
2492 ; X86-NEXT: retl # encoding: [0xc3]
2494 ; X64-LABEL: test_int_x86_avx512_ptestm_b_512:
2496 ; X64-NEXT: vptestmb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf2,0x7d,0x48,0x26,0xc1]
2497 ; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0]
2498 ; X64-NEXT: andq %rax, %rdi # encoding: [0x48,0x21,0xc7]
2499 ; X64-NEXT: addq %rdi, %rax # encoding: [0x48,0x01,0xf8]
2500 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
2501 ; X64-NEXT: retq # encoding: [0xc3]
2502 %res = call i64 @llvm.x86.avx512.ptestm.b.512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2)
2503 %res1 = call i64 @llvm.x86.avx512.ptestm.b.512(<64 x i8> %x0, <64 x i8> %x1, i64-1)
2504 %res2 = add i64 %res, %res1
2508 declare i32 @llvm.x86.avx512.ptestm.w.512(<32 x i16>, <32 x i16>, i32)
2510 define i32@test_int_x86_avx512_ptestm_w_512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2) {
2511 ; X86-LABEL: test_int_x86_avx512_ptestm_w_512:
2513 ; X86-NEXT: vptestmw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x48,0x26,0xc1]
2514 ; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
2515 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2516 ; X86-NEXT: andl %ecx, %eax # encoding: [0x21,0xc8]
2517 ; X86-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8]
2518 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
2519 ; X86-NEXT: retl # encoding: [0xc3]
2521 ; X64-LABEL: test_int_x86_avx512_ptestm_w_512:
2523 ; X64-NEXT: vptestmw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x48,0x26,0xc1]
2524 ; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
2525 ; X64-NEXT: andl %eax, %edi # encoding: [0x21,0xc7]
2526 ; X64-NEXT: addl %edi, %eax # encoding: [0x01,0xf8]
2527 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
2528 ; X64-NEXT: retq # encoding: [0xc3]
2529 %res = call i32 @llvm.x86.avx512.ptestm.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2)
2530 %res1 = call i32 @llvm.x86.avx512.ptestm.w.512(<32 x i16> %x0, <32 x i16> %x1, i32-1)
2531 %res2 = add i32 %res, %res1
2535 declare i64 @llvm.x86.avx512.ptestnm.b.512(<64 x i8>, <64 x i8>, i64 %x2)
2537 define i64@test_int_x86_avx512_ptestnm_b_512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2) {
2538 ; X86-LABEL: test_int_x86_avx512_ptestnm_b_512:
2540 ; X86-NEXT: pushl %esi # encoding: [0x56]
2541 ; X86-NEXT: .cfi_def_cfa_offset 8
2542 ; X86-NEXT: .cfi_offset %esi, -8
2543 ; X86-NEXT: vptestnmb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf2,0x7e,0x48,0x26,0xc1]
2544 ; X86-NEXT: kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20]
2545 ; X86-NEXT: kmovd %k1, %ecx # encoding: [0xc5,0xfb,0x93,0xc9]
2546 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx # encoding: [0x8b,0x54,0x24,0x0c]
2547 ; X86-NEXT: andl %ecx, %edx # encoding: [0x21,0xca]
2548 ; X86-NEXT: kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0]
2549 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
2550 ; X86-NEXT: andl %esi, %eax # encoding: [0x21,0xf0]
2551 ; X86-NEXT: addl %esi, %eax # encoding: [0x01,0xf0]
2552 ; X86-NEXT: adcl %ecx, %edx # encoding: [0x11,0xca]
2553 ; X86-NEXT: popl %esi # encoding: [0x5e]
2554 ; X86-NEXT: .cfi_def_cfa_offset 4
2555 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
2556 ; X86-NEXT: retl # encoding: [0xc3]
2558 ; X64-LABEL: test_int_x86_avx512_ptestnm_b_512:
2560 ; X64-NEXT: vptestnmb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf2,0x7e,0x48,0x26,0xc1]
2561 ; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0]
2562 ; X64-NEXT: andq %rax, %rdi # encoding: [0x48,0x21,0xc7]
2563 ; X64-NEXT: addq %rdi, %rax # encoding: [0x48,0x01,0xf8]
2564 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
2565 ; X64-NEXT: retq # encoding: [0xc3]
2566 %res = call i64 @llvm.x86.avx512.ptestnm.b.512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2)
2567 %res1 = call i64 @llvm.x86.avx512.ptestnm.b.512(<64 x i8> %x0, <64 x i8> %x1, i64-1)
2568 %res2 = add i64 %res, %res1
2572 declare i32 @llvm.x86.avx512.ptestnm.w.512(<32 x i16>, <32 x i16>, i32 %x2)
2574 define i32@test_int_x86_avx512_ptestnm_w_512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2) {
2575 ; X86-LABEL: test_int_x86_avx512_ptestnm_w_512:
2577 ; X86-NEXT: vptestnmw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf2,0xfe,0x48,0x26,0xc1]
2578 ; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
2579 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2580 ; X86-NEXT: andl %ecx, %eax # encoding: [0x21,0xc8]
2581 ; X86-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8]
2582 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
2583 ; X86-NEXT: retl # encoding: [0xc3]
2585 ; X64-LABEL: test_int_x86_avx512_ptestnm_w_512:
2587 ; X64-NEXT: vptestnmw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf2,0xfe,0x48,0x26,0xc1]
2588 ; X64-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
2589 ; X64-NEXT: andl %eax, %edi # encoding: [0x21,0xc7]
2590 ; X64-NEXT: addl %edi, %eax # encoding: [0x01,0xf8]
2591 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
2592 ; X64-NEXT: retq # encoding: [0xc3]
2593 %res = call i32 @llvm.x86.avx512.ptestnm.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2)
2594 %res1 = call i32 @llvm.x86.avx512.ptestnm.w.512(<32 x i16> %x0, <32 x i16> %x1, i32-1)
2595 %res2 = add i32 %res, %res1
2599 declare i64 @llvm.x86.avx512.cvtb2mask.512(<64 x i8>)
2601 define i64@test_int_x86_avx512_cvtb2mask_512(<64 x i8> %x0) {
2602 ; X86-LABEL: test_int_x86_avx512_cvtb2mask_512:
2604 ; X86-NEXT: vpmovb2m %zmm0, %k0 # encoding: [0x62,0xf2,0x7e,0x48,0x29,0xc0]
2605 ; X86-NEXT: kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20]
2606 ; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
2607 ; X86-NEXT: kmovd %k1, %edx # encoding: [0xc5,0xfb,0x93,0xd1]
2608 ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
2609 ; X86-NEXT: retl # encoding: [0xc3]
2611 ; X64-LABEL: test_int_x86_avx512_cvtb2mask_512:
2613 ; X64-NEXT: vpmovb2m %zmm0, %k0 # encoding: [0x62,0xf2,0x7e,0x48,0x29,0xc0]
2614 ; X64-NEXT: kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0]
2615 ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
2616 ; X64-NEXT: retq # encoding: [0xc3]
2617 %res = call i64 @llvm.x86.avx512.cvtb2mask.512(<64 x i8> %x0)
2621 declare i32 @llvm.x86.avx512.cvtw2mask.512(<32 x i16>)
2623 define i32@test_int_x86_avx512_cvtw2mask_512(<32 x i16> %x0) {
2624 ; CHECK-LABEL: test_int_x86_avx512_cvtw2mask_512:
2626 ; CHECK-NEXT: vpmovw2m %zmm0, %k0 # encoding: [0x62,0xf2,0xfe,0x48,0x29,0xc0]
2627 ; CHECK-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
2628 ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
2629 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
2630 %res = call i32 @llvm.x86.avx512.cvtw2mask.512(<32 x i16> %x0)
2634 declare <32 x i16> @llvm.x86.avx512.mask.pmulhu.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
2636 define <32 x i16>@test_int_x86_avx512_mask_pmulhu_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
2637 ; X86-LABEL: test_int_x86_avx512_mask_pmulhu_w_512:
2639 ; X86-NEXT: vpmulhuw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0xe4,0xd9]
2640 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
2641 ; X86-NEXT: vpmulhuw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe4,0xd1]
2642 ; X86-NEXT: vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3]
2643 ; X86-NEXT: retl # encoding: [0xc3]
2645 ; X64-LABEL: test_int_x86_avx512_mask_pmulhu_w_512:
2647 ; X64-NEXT: vpmulhuw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0xe4,0xd9]
2648 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2649 ; X64-NEXT: vpmulhuw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe4,0xd1]
2650 ; X64-NEXT: vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3]
2651 ; X64-NEXT: retq # encoding: [0xc3]
2652 %res = call <32 x i16> @llvm.x86.avx512.mask.pmulhu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
2653 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmulhu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
2654 %res2 = add <32 x i16> %res, %res1
2655 ret <32 x i16> %res2
2658 declare <32 x i16> @llvm.x86.avx512.mask.pmulh.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
2660 define <32 x i16>@test_int_x86_avx512_mask_pmulh_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
2661 ; X86-LABEL: test_int_x86_avx512_mask_pmulh_w_512:
2663 ; X86-NEXT: vpmulhw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0xe5,0xd9]
2664 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
2665 ; X86-NEXT: vpmulhw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe5,0xd1]
2666 ; X86-NEXT: vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3]
2667 ; X86-NEXT: retl # encoding: [0xc3]
2669 ; X64-LABEL: test_int_x86_avx512_mask_pmulh_w_512:
2671 ; X64-NEXT: vpmulhw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0xe5,0xd9]
2672 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2673 ; X64-NEXT: vpmulhw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe5,0xd1]
2674 ; X64-NEXT: vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3]
2675 ; X64-NEXT: retq # encoding: [0xc3]
2676 %res = call <32 x i16> @llvm.x86.avx512.mask.pmulh.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
2677 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmulh.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
2678 %res2 = add <32 x i16> %res, %res1
2679 ret <32 x i16> %res2
2682 declare <32 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
2684 define <32 x i16>@test_int_x86_avx512_mask_pmulhr_sw_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
2685 ; X86-LABEL: test_int_x86_avx512_mask_pmulhr_sw_512:
2687 ; X86-NEXT: vpmulhrsw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf2,0x7d,0x48,0x0b,0xd9]
2688 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
2689 ; X86-NEXT: vpmulhrsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x0b,0xd1]
2690 ; X86-NEXT: vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3]
2691 ; X86-NEXT: retl # encoding: [0xc3]
2693 ; X64-LABEL: test_int_x86_avx512_mask_pmulhr_sw_512:
2695 ; X64-NEXT: vpmulhrsw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf2,0x7d,0x48,0x0b,0xd9]
2696 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2697 ; X64-NEXT: vpmulhrsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x0b,0xd1]
2698 ; X64-NEXT: vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3]
2699 ; X64-NEXT: retq # encoding: [0xc3]
2700 %res = call <32 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
2701 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
2702 %res2 = add <32 x i16> %res, %res1
2703 ret <32 x i16> %res2
2706 declare <32 x i16> @llvm.x86.avx512.mask.pmaddubs.w.512(<64 x i8>, <64 x i8>, <32 x i16>, i32)
2708 define <32 x i16>@test_int_x86_avx512_mask_pmaddubs_w_512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x2, i32 %x3) {
2709 ; X86-LABEL: test_int_x86_avx512_mask_pmaddubs_w_512:
2711 ; X86-NEXT: vpmaddubsw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf2,0x7d,0x48,0x04,0xd9]
2712 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
2713 ; X86-NEXT: vpmaddubsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x04,0xd1]
2714 ; X86-NEXT: vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3]
2715 ; X86-NEXT: retl # encoding: [0xc3]
2717 ; X64-LABEL: test_int_x86_avx512_mask_pmaddubs_w_512:
2719 ; X64-NEXT: vpmaddubsw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf2,0x7d,0x48,0x04,0xd9]
2720 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2721 ; X64-NEXT: vpmaddubsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x04,0xd1]
2722 ; X64-NEXT: vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3]
2723 ; X64-NEXT: retq # encoding: [0xc3]
2724 %res = call <32 x i16> @llvm.x86.avx512.mask.pmaddubs.w.512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x2, i32 %x3)
2725 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmaddubs.w.512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x2, i32 -1)
2726 %res2 = add <32 x i16> %res, %res1
2727 ret <32 x i16> %res2
2730 declare <16 x i32> @llvm.x86.avx512.mask.pmaddw.d.512(<32 x i16>, <32 x i16>, <16 x i32>, i16)
2732 define <16 x i32>@test_int_x86_avx512_mask_pmaddw_d_512(<32 x i16> %x0, <32 x i16> %x1, <16 x i32> %x2, i16 %x3) {
2733 ; X86-LABEL: test_int_x86_avx512_mask_pmaddw_d_512:
2735 ; X86-NEXT: vpmaddwd %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0xf5,0xd9]
2736 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2737 ; X86-NEXT: vpmaddwd %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xf5,0xd1]
2738 ; X86-NEXT: vpaddd %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc3]
2739 ; X86-NEXT: retl # encoding: [0xc3]
2741 ; X64-LABEL: test_int_x86_avx512_mask_pmaddw_d_512:
2743 ; X64-NEXT: vpmaddwd %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0xf5,0xd9]
2744 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2745 ; X64-NEXT: vpmaddwd %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xf5,0xd1]
2746 ; X64-NEXT: vpaddd %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc3]
2747 ; X64-NEXT: retq # encoding: [0xc3]
2748 %res = call <16 x i32> @llvm.x86.avx512.mask.pmaddw.d.512(<32 x i16> %x0, <32 x i16> %x1, <16 x i32> %x2, i16 %x3)
2749 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pmaddw.d.512(<32 x i16> %x0, <32 x i16> %x1, <16 x i32> %x2, i16 -1)
2750 %res2 = add <16 x i32> %res, %res1
2751 ret <16 x i32> %res2
2754 declare <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
2756 define <32 x i16>@test_int_x86_avx512_mask_permvar_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
2757 ; X86-LABEL: test_int_x86_avx512_mask_permvar_hi_512:
2759 ; X86-NEXT: vpermw %zmm0, %zmm1, %zmm3 # encoding: [0x62,0xf2,0xf5,0x48,0x8d,0xd8]
2760 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
2761 ; X86-NEXT: vpermw %zmm0, %zmm1, %zmm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x8d,0xd0]
2762 ; X86-NEXT: vpermw %zmm0, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x8d,0xc0]
2763 ; X86-NEXT: vpaddw %zmm3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc3]
2764 ; X86-NEXT: vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0]
2765 ; X86-NEXT: retl # encoding: [0xc3]
2767 ; X64-LABEL: test_int_x86_avx512_mask_permvar_hi_512:
2769 ; X64-NEXT: vpermw %zmm0, %zmm1, %zmm3 # encoding: [0x62,0xf2,0xf5,0x48,0x8d,0xd8]
2770 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2771 ; X64-NEXT: vpermw %zmm0, %zmm1, %zmm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x8d,0xd0]
2772 ; X64-NEXT: vpermw %zmm0, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x8d,0xc0]
2773 ; X64-NEXT: vpaddw %zmm3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc3]
2774 ; X64-NEXT: vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0]
2775 ; X64-NEXT: retq # encoding: [0xc3]
2776 %res = call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
2777 %res1 = call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3)
2778 %res2 = call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
2779 %res3 = add <32 x i16> %res, %res1
2780 %res4 = add <32 x i16> %res3, %res2
2781 ret <32 x i16> %res4
2784 declare <32 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
2786 define <32 x i16>@test_int_x86_avx512_mask_vpermt2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
2787 ; X86-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_512:
2789 ; X86-NEXT: vmovdqa64 %zmm1, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd9]
2790 ; X86-NEXT: vpermt2w %zmm2, %zmm0, %zmm3 # encoding: [0x62,0xf2,0xfd,0x48,0x7d,0xda]
2791 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
2792 ; X86-NEXT: vpermt2w %zmm2, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x7d,0xca]
2793 ; X86-NEXT: vpaddw %zmm3, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc3]
2794 ; X86-NEXT: retl # encoding: [0xc3]
2796 ; X64-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_512:
2798 ; X64-NEXT: vmovdqa64 %zmm1, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd9]
2799 ; X64-NEXT: vpermt2w %zmm2, %zmm0, %zmm3 # encoding: [0x62,0xf2,0xfd,0x48,0x7d,0xda]
2800 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2801 ; X64-NEXT: vpermt2w %zmm2, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x7d,0xca]
2802 ; X64-NEXT: vpaddw %zmm3, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc3]
2803 ; X64-NEXT: retq # encoding: [0xc3]
2804 %res = call <32 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
2805 %res1 = call <32 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
2806 %res2 = add <32 x i16> %res, %res1
2807 ret <32 x i16> %res2
2810 declare <32 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
2812 define <32 x i16>@test_int_x86_avx512_maskz_vpermt2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
2813 ; X86-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_512:
2815 ; X86-NEXT: vmovdqa64 %zmm1, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd9]
2816 ; X86-NEXT: vpermt2w %zmm2, %zmm0, %zmm3 # encoding: [0x62,0xf2,0xfd,0x48,0x7d,0xda]
2817 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
2818 ; X86-NEXT: vpermt2w %zmm2, %zmm0, %zmm1 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x7d,0xca]
2819 ; X86-NEXT: vpaddw %zmm3, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc3]
2820 ; X86-NEXT: retl # encoding: [0xc3]
2822 ; X64-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_512:
2824 ; X64-NEXT: vmovdqa64 %zmm1, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd9]
2825 ; X64-NEXT: vpermt2w %zmm2, %zmm0, %zmm3 # encoding: [0x62,0xf2,0xfd,0x48,0x7d,0xda]
2826 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2827 ; X64-NEXT: vpermt2w %zmm2, %zmm0, %zmm1 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x7d,0xca]
2828 ; X64-NEXT: vpaddw %zmm3, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc3]
2829 ; X64-NEXT: retq # encoding: [0xc3]
2830 %res = call <32 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
2831 %res1 = call <32 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
2832 %res2 = add <32 x i16> %res, %res1
2833 ret <32 x i16> %res2
2836 declare <32 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
2838 define <32 x i16>@test_int_x86_avx512_mask_vpermi2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
2839 ; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_512:
2841 ; X86-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
2842 ; X86-NEXT: vpermt2w %zmm2, %zmm1, %zmm3 # encoding: [0x62,0xf2,0xf5,0x48,0x7d,0xda]
2843 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
2844 ; X86-NEXT: vpermi2w %zmm2, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x75,0xca]
2845 ; X86-NEXT: vpaddw %zmm3, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc3]
2846 ; X86-NEXT: retl # encoding: [0xc3]
2848 ; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_512:
2850 ; X64-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
2851 ; X64-NEXT: vpermt2w %zmm2, %zmm1, %zmm3 # encoding: [0x62,0xf2,0xf5,0x48,0x7d,0xda]
2852 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2853 ; X64-NEXT: vpermi2w %zmm2, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x75,0xca]
2854 ; X64-NEXT: vpaddw %zmm3, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc3]
2855 ; X64-NEXT: retq # encoding: [0xc3]
2856 %res = call <32 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
2857 %res1 = call <32 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
2858 %res2 = add <32 x i16> %res, %res1
2859 ret <32 x i16> %res2
2862 declare <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8>, <64 x i8>, i32, <32 x i16>, i32)
2864 define <32 x i16>@test_int_x86_avx512_mask_dbpsadbw_512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x3, i32 %x4) {
2865 ; X86-LABEL: test_int_x86_avx512_mask_dbpsadbw_512:
2867 ; X86-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0x7d,0x48,0x42,0xd9,0x02]
2868 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
2869 ; X86-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x42,0xd1,0x02]
2870 ; X86-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xc9,0x42,0xc1,0x02]
2871 ; X86-NEXT: vpaddw %zmm3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc3]
2872 ; X86-NEXT: vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0]
2873 ; X86-NEXT: retl # encoding: [0xc3]
2875 ; X64-LABEL: test_int_x86_avx512_mask_dbpsadbw_512:
2877 ; X64-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0x7d,0x48,0x42,0xd9,0x02]
2878 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2879 ; X64-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x42,0xd1,0x02]
2880 ; X64-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xc9,0x42,0xc1,0x02]
2881 ; X64-NEXT: vpaddw %zmm3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc3]
2882 ; X64-NEXT: vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0]
2883 ; X64-NEXT: retq # encoding: [0xc3]
2884 %res = call <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <32 x i16> %x3, i32 %x4)
2885 %res1 = call <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <32 x i16> zeroinitializer, i32 %x4)
2886 %res2 = call <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <32 x i16> %x3, i32 -1)
2887 %res3 = add <32 x i16> %res, %res1
2888 %res4 = add <32 x i16> %res3, %res2
2889 ret <32 x i16> %res4
2892 define <32 x i16> @test_mask_adds_epu16_rr_512(<32 x i16> %a, <32 x i16> %b) {
2893 ; CHECK-LABEL: test_mask_adds_epu16_rr_512:
2895 ; CHECK-NEXT: vpaddusw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xdd,0xc1]
2896 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
2897 %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
2901 define <32 x i16> @test_mask_adds_epu16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
2902 ; X86-LABEL: test_mask_adds_epu16_rrk_512:
2904 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
2905 ; X86-NEXT: vpaddusw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xdd,0xd1]
2906 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
2907 ; X86-NEXT: retl # encoding: [0xc3]
2909 ; X64-LABEL: test_mask_adds_epu16_rrk_512:
2911 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2912 ; X64-NEXT: vpaddusw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xdd,0xd1]
2913 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
2914 ; X64-NEXT: retq # encoding: [0xc3]
2915 %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
2919 define <32 x i16> @test_mask_adds_epu16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
2920 ; X86-LABEL: test_mask_adds_epu16_rrkz_512:
2922 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
2923 ; X86-NEXT: vpaddusw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xdd,0xc1]
2924 ; X86-NEXT: retl # encoding: [0xc3]
2926 ; X64-LABEL: test_mask_adds_epu16_rrkz_512:
2928 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
2929 ; X64-NEXT: vpaddusw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xdd,0xc1]
2930 ; X64-NEXT: retq # encoding: [0xc3]
2931 %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
2935 define <32 x i16> @test_mask_adds_epu16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
2936 ; X86-LABEL: test_mask_adds_epu16_rm_512:
2938 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2939 ; X86-NEXT: vpaddusw (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xdd,0x00]
2940 ; X86-NEXT: retl # encoding: [0xc3]
2942 ; X64-LABEL: test_mask_adds_epu16_rm_512:
2944 ; X64-NEXT: vpaddusw (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xdd,0x07]
2945 ; X64-NEXT: retq # encoding: [0xc3]
2946 %b = load <32 x i16>, <32 x i16>* %ptr_b
2947 %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
2951 define <32 x i16> @test_mask_adds_epu16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
2952 ; X86-LABEL: test_mask_adds_epu16_rmk_512:
2954 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2955 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
2956 ; X86-NEXT: vpaddusw (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xdd,0x08]
2957 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
2958 ; X86-NEXT: retl # encoding: [0xc3]
2960 ; X64-LABEL: test_mask_adds_epu16_rmk_512:
2962 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
2963 ; X64-NEXT: vpaddusw (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xdd,0x0f]
2964 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
2965 ; X64-NEXT: retq # encoding: [0xc3]
2966 %b = load <32 x i16>, <32 x i16>* %ptr_b
2967 %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
2971 define <32 x i16> @test_mask_adds_epu16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
2972 ; X86-LABEL: test_mask_adds_epu16_rmkz_512:
2974 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2975 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
2976 ; X86-NEXT: vpaddusw (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xdd,0x00]
2977 ; X86-NEXT: retl # encoding: [0xc3]
2979 ; X64-LABEL: test_mask_adds_epu16_rmkz_512:
2981 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
2982 ; X64-NEXT: vpaddusw (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xdd,0x07]
2983 ; X64-NEXT: retq # encoding: [0xc3]
2984 %b = load <32 x i16>, <32 x i16>* %ptr_b
2985 %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
2989 declare <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
2991 define <32 x i16> @test_mask_subs_epu16_rr_512(<32 x i16> %a, <32 x i16> %b) {
2992 ; CHECK-LABEL: test_mask_subs_epu16_rr_512:
2994 ; CHECK-NEXT: vpsubusw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xd9,0xc1]
2995 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
2996 %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
3000 define <32 x i16> @test_mask_subs_epu16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
3001 ; X86-LABEL: test_mask_subs_epu16_rrk_512:
3003 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
3004 ; X86-NEXT: vpsubusw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xd9,0xd1]
3005 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
3006 ; X86-NEXT: retl # encoding: [0xc3]
3008 ; X64-LABEL: test_mask_subs_epu16_rrk_512:
3010 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3011 ; X64-NEXT: vpsubusw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xd9,0xd1]
3012 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
3013 ; X64-NEXT: retq # encoding: [0xc3]
3014 %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
3018 define <32 x i16> @test_mask_subs_epu16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
3019 ; X86-LABEL: test_mask_subs_epu16_rrkz_512:
3021 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
3022 ; X86-NEXT: vpsubusw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xd9,0xc1]
3023 ; X86-NEXT: retl # encoding: [0xc3]
3025 ; X64-LABEL: test_mask_subs_epu16_rrkz_512:
3027 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3028 ; X64-NEXT: vpsubusw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xd9,0xc1]
3029 ; X64-NEXT: retq # encoding: [0xc3]
3030 %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
3034 define <32 x i16> @test_mask_subs_epu16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
3035 ; X86-LABEL: test_mask_subs_epu16_rm_512:
3037 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3038 ; X86-NEXT: vpsubusw (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xd9,0x00]
3039 ; X86-NEXT: retl # encoding: [0xc3]
3041 ; X64-LABEL: test_mask_subs_epu16_rm_512:
3043 ; X64-NEXT: vpsubusw (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xd9,0x07]
3044 ; X64-NEXT: retq # encoding: [0xc3]
3045 %b = load <32 x i16>, <32 x i16>* %ptr_b
3046 %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
3050 define <32 x i16> @test_mask_subs_epu16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
3051 ; X86-LABEL: test_mask_subs_epu16_rmk_512:
3053 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3054 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
3055 ; X86-NEXT: vpsubusw (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xd9,0x08]
3056 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
3057 ; X86-NEXT: retl # encoding: [0xc3]
3059 ; X64-LABEL: test_mask_subs_epu16_rmk_512:
3061 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
3062 ; X64-NEXT: vpsubusw (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xd9,0x0f]
3063 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
3064 ; X64-NEXT: retq # encoding: [0xc3]
3065 %b = load <32 x i16>, <32 x i16>* %ptr_b
3066 %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
3070 define <32 x i16> @test_mask_subs_epu16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
3071 ; X86-LABEL: test_mask_subs_epu16_rmkz_512:
3073 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3074 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
3075 ; X86-NEXT: vpsubusw (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xd9,0x00]
3076 ; X86-NEXT: retl # encoding: [0xc3]
3078 ; X64-LABEL: test_mask_subs_epu16_rmkz_512:
3080 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
3081 ; X64-NEXT: vpsubusw (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xd9,0x07]
3082 ; X64-NEXT: retq # encoding: [0xc3]
3083 %b = load <32 x i16>, <32 x i16>* %ptr_b
3084 %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
3088 declare <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
3090 define <64 x i8> @test_mask_adds_epu8_rr_512(<64 x i8> %a, <64 x i8> %b) {
3091 ; CHECK-LABEL: test_mask_adds_epu8_rr_512:
3093 ; CHECK-NEXT: vpaddusb %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xdc,0xc1]
3094 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3095 %res = call <64 x i8> @llvm.x86.avx512.mask.paddus.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> zeroinitializer, i64 -1)
3099 define <64 x i8> @test_mask_adds_epu8_rrk_512(<64 x i8> %a, <64 x i8> %b, <64 x i8> %passThru, i64 %mask) {
3100 ; X86-LABEL: test_mask_adds_epu8_rrk_512:
3102 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
3103 ; X86-NEXT: vpaddusb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xdc,0xd1]
3104 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
3105 ; X86-NEXT: retl # encoding: [0xc3]
3107 ; X64-LABEL: test_mask_adds_epu8_rrk_512:
3109 ; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
3110 ; X64-NEXT: vpaddusb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xdc,0xd1]
3111 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
3112 ; X64-NEXT: retq # encoding: [0xc3]
3113 %res = call <64 x i8> @llvm.x86.avx512.mask.paddus.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> %passThru, i64 %mask)
3117 define <64 x i8> @test_mask_adds_epu8_rrkz_512(<64 x i8> %a, <64 x i8> %b, i64 %mask) {
3118 ; X86-LABEL: test_mask_adds_epu8_rrkz_512:
3120 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
3121 ; X86-NEXT: vpaddusb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xdc,0xc1]
3122 ; X86-NEXT: retl # encoding: [0xc3]
3124 ; X64-LABEL: test_mask_adds_epu8_rrkz_512:
3126 ; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
3127 ; X64-NEXT: vpaddusb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xdc,0xc1]
3128 ; X64-NEXT: retq # encoding: [0xc3]
3129 %res = call <64 x i8> @llvm.x86.avx512.mask.paddus.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> zeroinitializer, i64 %mask)
3133 define <64 x i8> @test_mask_adds_epu8_rm_512(<64 x i8> %a, <64 x i8>* %ptr_b) {
3134 ; X86-LABEL: test_mask_adds_epu8_rm_512:
3136 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3137 ; X86-NEXT: vpaddusb (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xdc,0x00]
3138 ; X86-NEXT: retl # encoding: [0xc3]
3140 ; X64-LABEL: test_mask_adds_epu8_rm_512:
3142 ; X64-NEXT: vpaddusb (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xdc,0x07]
3143 ; X64-NEXT: retq # encoding: [0xc3]
3144 %b = load <64 x i8>, <64 x i8>* %ptr_b
3145 %res = call <64 x i8> @llvm.x86.avx512.mask.paddus.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> zeroinitializer, i64 -1)
3149 define <64 x i8> @test_mask_adds_epu8_rmk_512(<64 x i8> %a, <64 x i8>* %ptr_b, <64 x i8> %passThru, i64 %mask) {
3150 ; X86-LABEL: test_mask_adds_epu8_rmk_512:
3152 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3153 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08]
3154 ; X86-NEXT: vpaddusb (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xdc,0x08]
3155 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
3156 ; X86-NEXT: retl # encoding: [0xc3]
3158 ; X64-LABEL: test_mask_adds_epu8_rmk_512:
3160 ; X64-NEXT: kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce]
3161 ; X64-NEXT: vpaddusb (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xdc,0x0f]
3162 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
3163 ; X64-NEXT: retq # encoding: [0xc3]
3164 %b = load <64 x i8>, <64 x i8>* %ptr_b
3165 %res = call <64 x i8> @llvm.x86.avx512.mask.paddus.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> %passThru, i64 %mask)
3169 define <64 x i8> @test_mask_adds_epu8_rmkz_512(<64 x i8> %a, <64 x i8>* %ptr_b, i64 %mask) {
3170 ; X86-LABEL: test_mask_adds_epu8_rmkz_512:
3172 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3173 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08]
3174 ; X86-NEXT: vpaddusb (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xdc,0x00]
3175 ; X86-NEXT: retl # encoding: [0xc3]
3177 ; X64-LABEL: test_mask_adds_epu8_rmkz_512:
3179 ; X64-NEXT: kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce]
3180 ; X64-NEXT: vpaddusb (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xdc,0x07]
3181 ; X64-NEXT: retq # encoding: [0xc3]
3182 %b = load <64 x i8>, <64 x i8>* %ptr_b
3183 %res = call <64 x i8> @llvm.x86.avx512.mask.paddus.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> zeroinitializer, i64 %mask)
3187 declare <64 x i8> @llvm.x86.avx512.mask.paddus.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
3189 define <64 x i8> @test_mask_subs_epu8_rr_512(<64 x i8> %a, <64 x i8> %b) {
3190 ; CHECK-LABEL: test_mask_subs_epu8_rr_512:
3192 ; CHECK-NEXT: vpsubusb %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xd8,0xc1]
3193 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3194 %res = call <64 x i8> @llvm.x86.avx512.mask.psubus.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> zeroinitializer, i64 -1)
3198 define <64 x i8> @test_mask_subs_epu8_rrk_512(<64 x i8> %a, <64 x i8> %b, <64 x i8> %passThru, i64 %mask) {
3199 ; X86-LABEL: test_mask_subs_epu8_rrk_512:
3201 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
3202 ; X86-NEXT: vpsubusb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xd8,0xd1]
3203 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
3204 ; X86-NEXT: retl # encoding: [0xc3]
3206 ; X64-LABEL: test_mask_subs_epu8_rrk_512:
3208 ; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
3209 ; X64-NEXT: vpsubusb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xd8,0xd1]
3210 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
3211 ; X64-NEXT: retq # encoding: [0xc3]
3212 %res = call <64 x i8> @llvm.x86.avx512.mask.psubus.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> %passThru, i64 %mask)
3216 define <64 x i8> @test_mask_subs_epu8_rrkz_512(<64 x i8> %a, <64 x i8> %b, i64 %mask) {
3217 ; X86-LABEL: test_mask_subs_epu8_rrkz_512:
3219 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
3220 ; X86-NEXT: vpsubusb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xd8,0xc1]
3221 ; X86-NEXT: retl # encoding: [0xc3]
3223 ; X64-LABEL: test_mask_subs_epu8_rrkz_512:
3225 ; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
3226 ; X64-NEXT: vpsubusb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xd8,0xc1]
3227 ; X64-NEXT: retq # encoding: [0xc3]
3228 %res = call <64 x i8> @llvm.x86.avx512.mask.psubus.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> zeroinitializer, i64 %mask)
3232 define <64 x i8> @test_mask_subs_epu8_rm_512(<64 x i8> %a, <64 x i8>* %ptr_b) {
3233 ; X86-LABEL: test_mask_subs_epu8_rm_512:
3235 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3236 ; X86-NEXT: vpsubusb (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xd8,0x00]
3237 ; X86-NEXT: retl # encoding: [0xc3]
3239 ; X64-LABEL: test_mask_subs_epu8_rm_512:
3241 ; X64-NEXT: vpsubusb (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xd8,0x07]
3242 ; X64-NEXT: retq # encoding: [0xc3]
3243 %b = load <64 x i8>, <64 x i8>* %ptr_b
3244 %res = call <64 x i8> @llvm.x86.avx512.mask.psubus.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> zeroinitializer, i64 -1)
3248 define <64 x i8> @test_mask_subs_epu8_rmk_512(<64 x i8> %a, <64 x i8>* %ptr_b, <64 x i8> %passThru, i64 %mask) {
3249 ; X86-LABEL: test_mask_subs_epu8_rmk_512:
3251 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3252 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08]
3253 ; X86-NEXT: vpsubusb (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xd8,0x08]
3254 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
3255 ; X86-NEXT: retl # encoding: [0xc3]
3257 ; X64-LABEL: test_mask_subs_epu8_rmk_512:
3259 ; X64-NEXT: kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce]
3260 ; X64-NEXT: vpsubusb (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xd8,0x0f]
3261 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
3262 ; X64-NEXT: retq # encoding: [0xc3]
3263 %b = load <64 x i8>, <64 x i8>* %ptr_b
3264 %res = call <64 x i8> @llvm.x86.avx512.mask.psubus.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> %passThru, i64 %mask)
3268 define <64 x i8> @test_mask_subs_epu8_rmkz_512(<64 x i8> %a, <64 x i8>* %ptr_b, i64 %mask) {
3269 ; X86-LABEL: test_mask_subs_epu8_rmkz_512:
3271 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3272 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08]
3273 ; X86-NEXT: vpsubusb (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xd8,0x00]
3274 ; X86-NEXT: retl # encoding: [0xc3]
3276 ; X64-LABEL: test_mask_subs_epu8_rmkz_512:
3278 ; X64-NEXT: kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce]
3279 ; X64-NEXT: vpsubusb (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xd8,0x07]
3280 ; X64-NEXT: retq # encoding: [0xc3]
3281 %b = load <64 x i8>, <64 x i8>* %ptr_b
3282 %res = call <64 x i8> @llvm.x86.avx512.mask.psubus.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> zeroinitializer, i64 %mask)
3286 declare <64 x i8> @llvm.x86.avx512.mask.psubus.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
3288 define <32 x i16> @test_adds_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
3289 ; CHECK-LABEL: test_adds_epi16_rr_512:
3291 ; CHECK-NEXT: vpaddsw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xed,0xc1]
3292 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3293 %1 = call <32 x i16> @llvm.x86.avx512.padds.w.512(<32 x i16> %a, <32 x i16> %b)
3297 define <32 x i16> @test_adds_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
3298 ; X86-LABEL: test_adds_epi16_rrk_512:
3300 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
3301 ; X86-NEXT: vpaddsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xed,0xd1]
3302 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
3303 ; X86-NEXT: retl # encoding: [0xc3]
3305 ; X64-LABEL: test_adds_epi16_rrk_512:
3307 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3308 ; X64-NEXT: vpaddsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xed,0xd1]
3309 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
3310 ; X64-NEXT: retq # encoding: [0xc3]
3311 %1 = call <32 x i16> @llvm.x86.avx512.padds.w.512(<32 x i16> %a, <32 x i16> %b)
3312 %2 = bitcast i32 %mask to <32 x i1>
3313 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru
3317 define <32 x i16> @test_adds_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
3318 ; X86-LABEL: test_adds_epi16_rrkz_512:
3320 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
3321 ; X86-NEXT: vpaddsw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xed,0xc1]
3322 ; X86-NEXT: retl # encoding: [0xc3]
3324 ; X64-LABEL: test_adds_epi16_rrkz_512:
3326 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3327 ; X64-NEXT: vpaddsw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xed,0xc1]
3328 ; X64-NEXT: retq # encoding: [0xc3]
3329 %1 = call <32 x i16> @llvm.x86.avx512.padds.w.512(<32 x i16> %a, <32 x i16> %b)
3330 %2 = bitcast i32 %mask to <32 x i1>
3331 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer
3335 define <32 x i16> @test_adds_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
3336 ; X86-LABEL: test_adds_epi16_rm_512:
3338 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3339 ; X86-NEXT: vpaddsw (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xed,0x00]
3340 ; X86-NEXT: retl # encoding: [0xc3]
3342 ; X64-LABEL: test_adds_epi16_rm_512:
3344 ; X64-NEXT: vpaddsw (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xed,0x07]
3345 ; X64-NEXT: retq # encoding: [0xc3]
3346 %b = load <32 x i16>, <32 x i16>* %ptr_b
3347 %1 = call <32 x i16> @llvm.x86.avx512.padds.w.512(<32 x i16> %a, <32 x i16> %b)
3351 define <32 x i16> @test_adds_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
3352 ; X86-LABEL: test_adds_epi16_rmk_512:
3354 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3355 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
3356 ; X86-NEXT: vpaddsw (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xed,0x08]
3357 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
3358 ; X86-NEXT: retl # encoding: [0xc3]
3360 ; X64-LABEL: test_adds_epi16_rmk_512:
3362 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
3363 ; X64-NEXT: vpaddsw (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xed,0x0f]
3364 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
3365 ; X64-NEXT: retq # encoding: [0xc3]
3366 %b = load <32 x i16>, <32 x i16>* %ptr_b
3367 %1 = call <32 x i16> @llvm.x86.avx512.padds.w.512(<32 x i16> %a, <32 x i16> %b)
3368 %2 = bitcast i32 %mask to <32 x i1>
3369 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru
3373 define <32 x i16> @test_adds_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
3374 ; X86-LABEL: test_adds_epi16_rmkz_512:
3376 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3377 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
3378 ; X86-NEXT: vpaddsw (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xed,0x00]
3379 ; X86-NEXT: retl # encoding: [0xc3]
3381 ; X64-LABEL: test_adds_epi16_rmkz_512:
3383 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
3384 ; X64-NEXT: vpaddsw (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xed,0x07]
3385 ; X64-NEXT: retq # encoding: [0xc3]
3386 %b = load <32 x i16>, <32 x i16>* %ptr_b
3387 %1 = call <32 x i16> @llvm.x86.avx512.padds.w.512(<32 x i16> %a, <32 x i16> %b)
3388 %2 = bitcast i32 %mask to <32 x i1>
3389 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer
3393 declare <32 x i16> @llvm.x86.avx512.padds.w.512(<32 x i16>, <32 x i16>)
3395 define <32 x i16> @test_mask_adds_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
3396 ; CHECK-LABEL: test_mask_adds_epi16_rr_512:
3398 ; CHECK-NEXT: vpaddsw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xed,0xc1]
3399 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3400 %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
3404 define <32 x i16> @test_mask_adds_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
3405 ; X86-LABEL: test_mask_adds_epi16_rrk_512:
3407 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
3408 ; X86-NEXT: vpaddsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xed,0xd1]
3409 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
3410 ; X86-NEXT: retl # encoding: [0xc3]
3412 ; X64-LABEL: test_mask_adds_epi16_rrk_512:
3414 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3415 ; X64-NEXT: vpaddsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xed,0xd1]
3416 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
3417 ; X64-NEXT: retq # encoding: [0xc3]
3418 %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
3422 define <32 x i16> @test_mask_adds_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
3423 ; X86-LABEL: test_mask_adds_epi16_rrkz_512:
3425 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
3426 ; X86-NEXT: vpaddsw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xed,0xc1]
3427 ; X86-NEXT: retl # encoding: [0xc3]
3429 ; X64-LABEL: test_mask_adds_epi16_rrkz_512:
3431 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3432 ; X64-NEXT: vpaddsw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xed,0xc1]
3433 ; X64-NEXT: retq # encoding: [0xc3]
3434 %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
3438 define <32 x i16> @test_mask_adds_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
3439 ; X86-LABEL: test_mask_adds_epi16_rm_512:
3441 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3442 ; X86-NEXT: vpaddsw (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xed,0x00]
3443 ; X86-NEXT: retl # encoding: [0xc3]
3445 ; X64-LABEL: test_mask_adds_epi16_rm_512:
3447 ; X64-NEXT: vpaddsw (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xed,0x07]
3448 ; X64-NEXT: retq # encoding: [0xc3]
3449 %b = load <32 x i16>, <32 x i16>* %ptr_b
3450 %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
3454 define <32 x i16> @test_mask_adds_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
3455 ; X86-LABEL: test_mask_adds_epi16_rmk_512:
3457 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3458 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
3459 ; X86-NEXT: vpaddsw (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xed,0x08]
3460 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
3461 ; X86-NEXT: retl # encoding: [0xc3]
3463 ; X64-LABEL: test_mask_adds_epi16_rmk_512:
3465 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
3466 ; X64-NEXT: vpaddsw (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xed,0x0f]
3467 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
3468 ; X64-NEXT: retq # encoding: [0xc3]
3469 %b = load <32 x i16>, <32 x i16>* %ptr_b
3470 %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
3474 define <32 x i16> @test_mask_adds_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
3475 ; X86-LABEL: test_mask_adds_epi16_rmkz_512:
3477 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3478 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
3479 ; X86-NEXT: vpaddsw (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xed,0x00]
3480 ; X86-NEXT: retl # encoding: [0xc3]
3482 ; X64-LABEL: test_mask_adds_epi16_rmkz_512:
3484 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
3485 ; X64-NEXT: vpaddsw (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xed,0x07]
3486 ; X64-NEXT: retq # encoding: [0xc3]
3487 %b = load <32 x i16>, <32 x i16>* %ptr_b
3488 %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
3492 declare <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
3494 define <32 x i16> @test_subs_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
3495 ; CHECK-LABEL: test_subs_epi16_rr_512:
3497 ; CHECK-NEXT: vpsubsw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xe9,0xc1]
3498 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3499 %1 = call <32 x i16> @llvm.x86.avx512.psubs.w.512(<32 x i16> %a, <32 x i16> %b)
3503 define <32 x i16> @test_subs_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
3504 ; X86-LABEL: test_subs_epi16_rrk_512:
3506 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
3507 ; X86-NEXT: vpsubsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe9,0xd1]
3508 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
3509 ; X86-NEXT: retl # encoding: [0xc3]
3511 ; X64-LABEL: test_subs_epi16_rrk_512:
3513 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3514 ; X64-NEXT: vpsubsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe9,0xd1]
3515 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
3516 ; X64-NEXT: retq # encoding: [0xc3]
3517 %1 = call <32 x i16> @llvm.x86.avx512.psubs.w.512(<32 x i16> %a, <32 x i16> %b)
3518 %2 = bitcast i32 %mask to <32 x i1>
3519 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru
3523 define <32 x i16> @test_subs_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
3524 ; X86-LABEL: test_subs_epi16_rrkz_512:
3526 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
3527 ; X86-NEXT: vpsubsw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe9,0xc1]
3528 ; X86-NEXT: retl # encoding: [0xc3]
3530 ; X64-LABEL: test_subs_epi16_rrkz_512:
3532 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3533 ; X64-NEXT: vpsubsw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe9,0xc1]
3534 ; X64-NEXT: retq # encoding: [0xc3]
3535 %1 = call <32 x i16> @llvm.x86.avx512.psubs.w.512(<32 x i16> %a, <32 x i16> %b)
3536 %2 = bitcast i32 %mask to <32 x i1>
3537 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer
3541 define <32 x i16> @test_subs_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
3542 ; X86-LABEL: test_subs_epi16_rm_512:
3544 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3545 ; X86-NEXT: vpsubsw (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xe9,0x00]
3546 ; X86-NEXT: retl # encoding: [0xc3]
3548 ; X64-LABEL: test_subs_epi16_rm_512:
3550 ; X64-NEXT: vpsubsw (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xe9,0x07]
3551 ; X64-NEXT: retq # encoding: [0xc3]
3552 %b = load <32 x i16>, <32 x i16>* %ptr_b
3553 %1 = call <32 x i16> @llvm.x86.avx512.psubs.w.512(<32 x i16> %a, <32 x i16> %b)
3557 define <32 x i16> @test_subs_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
3558 ; X86-LABEL: test_subs_epi16_rmk_512:
3560 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3561 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
3562 ; X86-NEXT: vpsubsw (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe9,0x08]
3563 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
3564 ; X86-NEXT: retl # encoding: [0xc3]
3566 ; X64-LABEL: test_subs_epi16_rmk_512:
3568 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
3569 ; X64-NEXT: vpsubsw (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe9,0x0f]
3570 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
3571 ; X64-NEXT: retq # encoding: [0xc3]
3572 %b = load <32 x i16>, <32 x i16>* %ptr_b
3573 %1 = call <32 x i16> @llvm.x86.avx512.psubs.w.512(<32 x i16> %a, <32 x i16> %b)
3574 %2 = bitcast i32 %mask to <32 x i1>
3575 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru
3579 define <32 x i16> @test_subs_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
3580 ; X86-LABEL: test_subs_epi16_rmkz_512:
3582 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3583 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
3584 ; X86-NEXT: vpsubsw (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe9,0x00]
3585 ; X86-NEXT: retl # encoding: [0xc3]
3587 ; X64-LABEL: test_subs_epi16_rmkz_512:
3589 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
3590 ; X64-NEXT: vpsubsw (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe9,0x07]
3591 ; X64-NEXT: retq # encoding: [0xc3]
3592 %b = load <32 x i16>, <32 x i16>* %ptr_b
3593 %1 = call <32 x i16> @llvm.x86.avx512.psubs.w.512(<32 x i16> %a, <32 x i16> %b)
3594 %2 = bitcast i32 %mask to <32 x i1>
3595 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer
3599 declare <32 x i16> @llvm.x86.avx512.psubs.w.512(<32 x i16>, <32 x i16>)
3601 define <32 x i16> @test_mask_subs_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
3602 ; CHECK-LABEL: test_mask_subs_epi16_rr_512:
3604 ; CHECK-NEXT: vpsubsw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xe9,0xc1]
3605 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3606 %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
3610 define <32 x i16> @test_mask_subs_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
3611 ; X86-LABEL: test_mask_subs_epi16_rrk_512:
3613 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
3614 ; X86-NEXT: vpsubsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe9,0xd1]
3615 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
3616 ; X86-NEXT: retl # encoding: [0xc3]
3618 ; X64-LABEL: test_mask_subs_epi16_rrk_512:
3620 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3621 ; X64-NEXT: vpsubsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe9,0xd1]
3622 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
3623 ; X64-NEXT: retq # encoding: [0xc3]
3624 %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
3628 define <32 x i16> @test_mask_subs_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
3629 ; X86-LABEL: test_mask_subs_epi16_rrkz_512:
3631 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
3632 ; X86-NEXT: vpsubsw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe9,0xc1]
3633 ; X86-NEXT: retl # encoding: [0xc3]
3635 ; X64-LABEL: test_mask_subs_epi16_rrkz_512:
3637 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3638 ; X64-NEXT: vpsubsw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe9,0xc1]
3639 ; X64-NEXT: retq # encoding: [0xc3]
3640 %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
3644 define <32 x i16> @test_mask_subs_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
3645 ; X86-LABEL: test_mask_subs_epi16_rm_512:
3647 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3648 ; X86-NEXT: vpsubsw (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xe9,0x00]
3649 ; X86-NEXT: retl # encoding: [0xc3]
3651 ; X64-LABEL: test_mask_subs_epi16_rm_512:
3653 ; X64-NEXT: vpsubsw (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xe9,0x07]
3654 ; X64-NEXT: retq # encoding: [0xc3]
3655 %b = load <32 x i16>, <32 x i16>* %ptr_b
3656 %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
3660 define <32 x i16> @test_mask_subs_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
3661 ; X86-LABEL: test_mask_subs_epi16_rmk_512:
3663 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3664 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
3665 ; X86-NEXT: vpsubsw (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe9,0x08]
3666 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
3667 ; X86-NEXT: retl # encoding: [0xc3]
3669 ; X64-LABEL: test_mask_subs_epi16_rmk_512:
3671 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
3672 ; X64-NEXT: vpsubsw (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe9,0x0f]
3673 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
3674 ; X64-NEXT: retq # encoding: [0xc3]
3675 %b = load <32 x i16>, <32 x i16>* %ptr_b
3676 %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
3680 define <32 x i16> @test_mask_subs_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
3681 ; X86-LABEL: test_mask_subs_epi16_rmkz_512:
3683 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3684 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
3685 ; X86-NEXT: vpsubsw (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe9,0x00]
3686 ; X86-NEXT: retl # encoding: [0xc3]
3688 ; X64-LABEL: test_mask_subs_epi16_rmkz_512:
3690 ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
3691 ; X64-NEXT: vpsubsw (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe9,0x07]
3692 ; X64-NEXT: retq # encoding: [0xc3]
3693 %b = load <32 x i16>, <32 x i16>* %ptr_b
3694 %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
3698 declare <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
3700 define <64 x i8> @test_mask_adds_epi8_rr_512(<64 x i8> %a, <64 x i8> %b) {
3701 ; CHECK-LABEL: test_mask_adds_epi8_rr_512:
3703 ; CHECK-NEXT: vpaddsb %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xec,0xc1]
3704 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3705 %res = call <64 x i8> @llvm.x86.avx512.mask.padds.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> zeroinitializer, i64 -1)
3709 define <64 x i8> @test_mask_adds_epi8_rrk_512(<64 x i8> %a, <64 x i8> %b, <64 x i8> %passThru, i64 %mask) {
3710 ; X86-LABEL: test_mask_adds_epi8_rrk_512:
3712 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
3713 ; X86-NEXT: vpaddsb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xec,0xd1]
3714 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
3715 ; X86-NEXT: retl # encoding: [0xc3]
3717 ; X64-LABEL: test_mask_adds_epi8_rrk_512:
3719 ; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
3720 ; X64-NEXT: vpaddsb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xec,0xd1]
3721 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
3722 ; X64-NEXT: retq # encoding: [0xc3]
3723 %res = call <64 x i8> @llvm.x86.avx512.mask.padds.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> %passThru, i64 %mask)
3727 define <64 x i8> @test_mask_adds_epi8_rrkz_512(<64 x i8> %a, <64 x i8> %b, i64 %mask) {
3728 ; X86-LABEL: test_mask_adds_epi8_rrkz_512:
3730 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
3731 ; X86-NEXT: vpaddsb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xec,0xc1]
3732 ; X86-NEXT: retl # encoding: [0xc3]
3734 ; X64-LABEL: test_mask_adds_epi8_rrkz_512:
3736 ; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
3737 ; X64-NEXT: vpaddsb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xec,0xc1]
3738 ; X64-NEXT: retq # encoding: [0xc3]
3739 %res = call <64 x i8> @llvm.x86.avx512.mask.padds.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> zeroinitializer, i64 %mask)
3743 define <64 x i8> @test_mask_adds_epi8_rm_512(<64 x i8> %a, <64 x i8>* %ptr_b) {
3744 ; X86-LABEL: test_mask_adds_epi8_rm_512:
3746 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3747 ; X86-NEXT: vpaddsb (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xec,0x00]
3748 ; X86-NEXT: retl # encoding: [0xc3]
3750 ; X64-LABEL: test_mask_adds_epi8_rm_512:
3752 ; X64-NEXT: vpaddsb (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xec,0x07]
3753 ; X64-NEXT: retq # encoding: [0xc3]
3754 %b = load <64 x i8>, <64 x i8>* %ptr_b
3755 %res = call <64 x i8> @llvm.x86.avx512.mask.padds.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> zeroinitializer, i64 -1)
3759 define <64 x i8> @test_mask_adds_epi8_rmk_512(<64 x i8> %a, <64 x i8>* %ptr_b, <64 x i8> %passThru, i64 %mask) {
3760 ; X86-LABEL: test_mask_adds_epi8_rmk_512:
3762 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3763 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08]
3764 ; X86-NEXT: vpaddsb (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xec,0x08]
3765 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
3766 ; X86-NEXT: retl # encoding: [0xc3]
3768 ; X64-LABEL: test_mask_adds_epi8_rmk_512:
3770 ; X64-NEXT: kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce]
3771 ; X64-NEXT: vpaddsb (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xec,0x0f]
3772 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
3773 ; X64-NEXT: retq # encoding: [0xc3]
3774 %b = load <64 x i8>, <64 x i8>* %ptr_b
3775 %res = call <64 x i8> @llvm.x86.avx512.mask.padds.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> %passThru, i64 %mask)
3779 define <64 x i8> @test_mask_adds_epi8_rmkz_512(<64 x i8> %a, <64 x i8>* %ptr_b, i64 %mask) {
3780 ; X86-LABEL: test_mask_adds_epi8_rmkz_512:
3782 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3783 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08]
3784 ; X86-NEXT: vpaddsb (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xec,0x00]
3785 ; X86-NEXT: retl # encoding: [0xc3]
3787 ; X64-LABEL: test_mask_adds_epi8_rmkz_512:
3789 ; X64-NEXT: kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce]
3790 ; X64-NEXT: vpaddsb (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xec,0x07]
3791 ; X64-NEXT: retq # encoding: [0xc3]
3792 %b = load <64 x i8>, <64 x i8>* %ptr_b
3793 %res = call <64 x i8> @llvm.x86.avx512.mask.padds.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> zeroinitializer, i64 %mask)
3797 declare <64 x i8> @llvm.x86.avx512.mask.padds.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
3799 define <64 x i8> @test_mask_subs_epi8_rr_512(<64 x i8> %a, <64 x i8> %b) {
3800 ; CHECK-LABEL: test_mask_subs_epi8_rr_512:
3802 ; CHECK-NEXT: vpsubsb %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xe8,0xc1]
3803 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
3804 %res = call <64 x i8> @llvm.x86.avx512.mask.psubs.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> zeroinitializer, i64 -1)
3808 define <64 x i8> @test_mask_subs_epi8_rrk_512(<64 x i8> %a, <64 x i8> %b, <64 x i8> %passThru, i64 %mask) {
3809 ; X86-LABEL: test_mask_subs_epi8_rrk_512:
3811 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
3812 ; X86-NEXT: vpsubsb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe8,0xd1]
3813 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
3814 ; X86-NEXT: retl # encoding: [0xc3]
3816 ; X64-LABEL: test_mask_subs_epi8_rrk_512:
3818 ; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
3819 ; X64-NEXT: vpsubsb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe8,0xd1]
3820 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
3821 ; X64-NEXT: retq # encoding: [0xc3]
3822 %res = call <64 x i8> @llvm.x86.avx512.mask.psubs.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> %passThru, i64 %mask)
3826 define <64 x i8> @test_mask_subs_epi8_rrkz_512(<64 x i8> %a, <64 x i8> %b, i64 %mask) {
3827 ; X86-LABEL: test_mask_subs_epi8_rrkz_512:
3829 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
3830 ; X86-NEXT: vpsubsb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe8,0xc1]
3831 ; X86-NEXT: retl # encoding: [0xc3]
3833 ; X64-LABEL: test_mask_subs_epi8_rrkz_512:
3835 ; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
3836 ; X64-NEXT: vpsubsb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe8,0xc1]
3837 ; X64-NEXT: retq # encoding: [0xc3]
3838 %res = call <64 x i8> @llvm.x86.avx512.mask.psubs.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> zeroinitializer, i64 %mask)
3842 define <64 x i8> @test_mask_subs_epi8_rm_512(<64 x i8> %a, <64 x i8>* %ptr_b) {
3843 ; X86-LABEL: test_mask_subs_epi8_rm_512:
3845 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3846 ; X86-NEXT: vpsubsb (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xe8,0x00]
3847 ; X86-NEXT: retl # encoding: [0xc3]
3849 ; X64-LABEL: test_mask_subs_epi8_rm_512:
3851 ; X64-NEXT: vpsubsb (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xe8,0x07]
3852 ; X64-NEXT: retq # encoding: [0xc3]
3853 %b = load <64 x i8>, <64 x i8>* %ptr_b
3854 %res = call <64 x i8> @llvm.x86.avx512.mask.psubs.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> zeroinitializer, i64 -1)
3858 define <64 x i8> @test_mask_subs_epi8_rmk_512(<64 x i8> %a, <64 x i8>* %ptr_b, <64 x i8> %passThru, i64 %mask) {
3859 ; X86-LABEL: test_mask_subs_epi8_rmk_512:
3861 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3862 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08]
3863 ; X86-NEXT: vpsubsb (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe8,0x08]
3864 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
3865 ; X86-NEXT: retl # encoding: [0xc3]
3867 ; X64-LABEL: test_mask_subs_epi8_rmk_512:
3869 ; X64-NEXT: kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce]
3870 ; X64-NEXT: vpsubsb (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe8,0x0f]
3871 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
3872 ; X64-NEXT: retq # encoding: [0xc3]
3873 %b = load <64 x i8>, <64 x i8>* %ptr_b
3874 %res = call <64 x i8> @llvm.x86.avx512.mask.psubs.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> %passThru, i64 %mask)
3878 define <64 x i8> @test_mask_subs_epi8_rmkz_512(<64 x i8> %a, <64 x i8>* %ptr_b, i64 %mask) {
3879 ; X86-LABEL: test_mask_subs_epi8_rmkz_512:
3881 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3882 ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08]
3883 ; X86-NEXT: vpsubsb (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe8,0x00]
3884 ; X86-NEXT: retl # encoding: [0xc3]
3886 ; X64-LABEL: test_mask_subs_epi8_rmkz_512:
3888 ; X64-NEXT: kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce]
3889 ; X64-NEXT: vpsubsb (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe8,0x07]
3890 ; X64-NEXT: retq # encoding: [0xc3]
3891 %b = load <64 x i8>, <64 x i8>* %ptr_b
3892 %res = call <64 x i8> @llvm.x86.avx512.mask.psubs.b.512(<64 x i8> %a, <64 x i8> %b, <64 x i8> zeroinitializer, i64 %mask)
3896 declare <64 x i8> @llvm.x86.avx512.mask.psubs.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
3898 declare <32 x i16> @llvm.x86.avx512.mask.psrlv32hi(<32 x i16>, <32 x i16>, <32 x i16>, i32)
3900 define <32 x i16>@test_int_x86_avx512_mask_psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
3901 ; X86-LABEL: test_int_x86_avx512_mask_psrlv32hi:
3903 ; X86-NEXT: vpsrlvw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf2,0xfd,0x48,0x10,0xd9]
3904 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
3905 ; X86-NEXT: vpsrlvw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x10,0xd1]
3906 ; X86-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x10,0xc1]
3907 ; X86-NEXT: vpaddw %zmm3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc3]
3908 ; X86-NEXT: vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0]
3909 ; X86-NEXT: retl # encoding: [0xc3]
3911 ; X64-LABEL: test_int_x86_avx512_mask_psrlv32hi:
3913 ; X64-NEXT: vpsrlvw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf2,0xfd,0x48,0x10,0xd9]
3914 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3915 ; X64-NEXT: vpsrlvw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x10,0xd1]
3916 ; X64-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x10,0xc1]
3917 ; X64-NEXT: vpaddw %zmm3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc3]
3918 ; X64-NEXT: vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0]
3919 ; X64-NEXT: retq # encoding: [0xc3]
3920 %res = call <32 x i16> @llvm.x86.avx512.mask.psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
3921 %res1 = call <32 x i16> @llvm.x86.avx512.mask.psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3)
3922 %res2 = call <32 x i16> @llvm.x86.avx512.mask.psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
3923 %res3 = add <32 x i16> %res, %res1
3924 %res4 = add <32 x i16> %res3, %res2
3925 ret <32 x i16> %res4
3928 declare <32 x i16> @llvm.x86.avx512.mask.psrav32.hi(<32 x i16>, <32 x i16>, <32 x i16>, i32)
3930 define <32 x i16>@test_int_x86_avx512_mask_psrav32_hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
3931 ; X86-LABEL: test_int_x86_avx512_mask_psrav32_hi:
3933 ; X86-NEXT: vpsravw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf2,0xfd,0x48,0x11,0xd9]
3934 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
3935 ; X86-NEXT: vpsravw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x11,0xd1]
3936 ; X86-NEXT: vpsravw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x11,0xc1]
3937 ; X86-NEXT: vpaddw %zmm3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc3]
3938 ; X86-NEXT: vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0]
3939 ; X86-NEXT: retl # encoding: [0xc3]
3941 ; X64-LABEL: test_int_x86_avx512_mask_psrav32_hi:
3943 ; X64-NEXT: vpsravw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf2,0xfd,0x48,0x11,0xd9]
3944 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3945 ; X64-NEXT: vpsravw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x11,0xd1]
3946 ; X64-NEXT: vpsravw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x11,0xc1]
3947 ; X64-NEXT: vpaddw %zmm3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc3]
3948 ; X64-NEXT: vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0]
3949 ; X64-NEXT: retq # encoding: [0xc3]
3950 %res = call <32 x i16> @llvm.x86.avx512.mask.psrav32.hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
3951 %res1 = call <32 x i16> @llvm.x86.avx512.mask.psrav32.hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3)
3952 %res2 = call <32 x i16> @llvm.x86.avx512.mask.psrav32.hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
3953 %res3 = add <32 x i16> %res, %res1
3954 %res4 = add <32 x i16> %res3, %res2
3955 ret <32 x i16> %res4
3958 declare <32 x i16> @llvm.x86.avx512.mask.psllv32hi(<32 x i16>, <32 x i16>, <32 x i16>, i32)
3960 define <32 x i16>@test_int_x86_avx512_mask_psllv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
3961 ; X86-LABEL: test_int_x86_avx512_mask_psllv32hi:
3963 ; X86-NEXT: vpsllvw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf2,0xfd,0x48,0x12,0xd9]
3964 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
3965 ; X86-NEXT: vpsllvw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x12,0xd1]
3966 ; X86-NEXT: vpsllvw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x12,0xc1]
3967 ; X86-NEXT: vpaddw %zmm3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc3]
3968 ; X86-NEXT: vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0]
3969 ; X86-NEXT: retl # encoding: [0xc3]
3971 ; X64-LABEL: test_int_x86_avx512_mask_psllv32hi:
3973 ; X64-NEXT: vpsllvw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf2,0xfd,0x48,0x12,0xd9]
3974 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
3975 ; X64-NEXT: vpsllvw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x12,0xd1]
3976 ; X64-NEXT: vpsllvw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x12,0xc1]
3977 ; X64-NEXT: vpaddw %zmm3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc3]
3978 ; X64-NEXT: vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0]
3979 ; X64-NEXT: retq # encoding: [0xc3]
3980 %res = call <32 x i16> @llvm.x86.avx512.mask.psllv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
3981 %res1 = call <32 x i16> @llvm.x86.avx512.mask.psllv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3)
3982 %res2 = call <32 x i16> @llvm.x86.avx512.mask.psllv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
3983 %res3 = add <32 x i16> %res, %res1
3984 %res4 = add <32 x i16> %res3, %res2
3985 ret <32 x i16> %res4
3988 declare <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16>, <32 x i8>, i32)
3990 define <32 x i8>@test_int_x86_avx512_mask_pmov_wb_512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) {
3991 ; X86-LABEL: test_int_x86_avx512_mask_pmov_wb_512:
3993 ; X86-NEXT: vpmovwb %zmm0, %ymm2 # encoding: [0x62,0xf2,0x7e,0x48,0x30,0xc2]
3994 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
3995 ; X86-NEXT: vpmovwb %zmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x30,0xc1]
3996 ; X86-NEXT: vpmovwb %zmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xc9,0x30,0xc0]
3997 ; X86-NEXT: vpaddb %ymm0, %ymm1, %ymm0 # encoding: [0xc5,0xf5,0xfc,0xc0]
3998 ; X86-NEXT: vpaddb %ymm0, %ymm2, %ymm0 # encoding: [0xc5,0xed,0xfc,0xc0]
3999 ; X86-NEXT: retl # encoding: [0xc3]
4001 ; X64-LABEL: test_int_x86_avx512_mask_pmov_wb_512:
4003 ; X64-NEXT: vpmovwb %zmm0, %ymm2 # encoding: [0x62,0xf2,0x7e,0x48,0x30,0xc2]
4004 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
4005 ; X64-NEXT: vpmovwb %zmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x30,0xc1]
4006 ; X64-NEXT: vpmovwb %zmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xc9,0x30,0xc0]
4007 ; X64-NEXT: vpaddb %ymm0, %ymm1, %ymm0 # encoding: [0xc5,0xf5,0xfc,0xc0]
4008 ; X64-NEXT: vpaddb %ymm0, %ymm2, %ymm0 # encoding: [0xc5,0xed,0xfc,0xc0]
4009 ; X64-NEXT: retq # encoding: [0xc3]
4010 %res0 = call <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 -1)
4011 %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2)
4012 %res2 = call <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16> %x0, <32 x i8> zeroinitializer, i32 %x2)
4013 %res3 = add <32 x i8> %res0, %res1
4014 %res4 = add <32 x i8> %res3, %res2