1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx512f --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86
3 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64
5 declare i16 @llvm.x86.avx512.kunpck.bw(i16, i16) nounwind readnone
7 define i16 @unpckbw_test(i16 %a0, i16 %a1) {
8 ; X86-LABEL: unpckbw_test:
10 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k0 ## encoding: [0xc5,0xf8,0x90,0x44,0x24,0x04]
11 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
12 ; X86-NEXT: kunpckbw %k1, %k0, %k0 ## encoding: [0xc5,0xfd,0x4b,0xc1]
13 ; X86-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
14 ; X86-NEXT: ## kill: def $ax killed $ax killed $eax
15 ; X86-NEXT: retl ## encoding: [0xc3]
17 ; X64-LABEL: unpckbw_test:
19 ; X64-NEXT: kmovw %edi, %k0 ## encoding: [0xc5,0xf8,0x92,0xc7]
20 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
21 ; X64-NEXT: kunpckbw %k1, %k0, %k0 ## encoding: [0xc5,0xfd,0x4b,0xc1]
22 ; X64-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
23 ; X64-NEXT: ## kill: def $ax killed $ax killed $eax
24 ; X64-NEXT: retq ## encoding: [0xc3]
25 %res = call i16 @llvm.x86.avx512.kunpck.bw(i16 %a0, i16 %a1)
29 define <16 x i32>@test_int_x86_avx512_mask_pbroadcastd_gpr_512(i32 %x0, <16 x i32> %x1, i16 %mask) {
30 ; X86-LABEL: test_int_x86_avx512_mask_pbroadcastd_gpr_512:
32 ; X86-NEXT: vpbroadcastd {{[0-9]+}}(%esp), %zmm1 ## encoding: [0x62,0xf2,0x7d,0x48,0x58,0x4c,0x24,0x01]
33 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
34 ; X86-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x6f,0xc1]
35 ; X86-NEXT: vmovdqa32 %zmm1, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x6f,0xd1]
36 ; X86-NEXT: vpaddd %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc2]
37 ; X86-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0]
38 ; X86-NEXT: retl ## encoding: [0xc3]
40 ; X64-LABEL: test_int_x86_avx512_mask_pbroadcastd_gpr_512:
42 ; X64-NEXT: vpbroadcastd %edi, %zmm1 ## encoding: [0x62,0xf2,0x7d,0x48,0x7c,0xcf]
43 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
44 ; X64-NEXT: vpbroadcastd %edi, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x7c,0xc7]
45 ; X64-NEXT: vpbroadcastd %edi, %zmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x7c,0xd7]
46 ; X64-NEXT: vpaddd %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc2]
47 ; X64-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0]
48 ; X64-NEXT: retq ## encoding: [0xc3]
49 %res = call <16 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.512(i32 %x0, <16 x i32> %x1, i16 -1)
50 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.512(i32 %x0, <16 x i32> %x1, i16 %mask)
51 %res2 = call <16 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.512(i32 %x0, <16 x i32> zeroinitializer, i16 %mask)
52 %res3 = add <16 x i32> %res, %res1
53 %res4 = add <16 x i32> %res2, %res3
56 declare <16 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.512(i32, <16 x i32>, i16)
59 define <8 x i64>@test_int_x86_avx512_mask_pbroadcastq_gpr_512(i64 %x0, <8 x i64> %x1, i8 %mask) {
60 ; X86-LABEL: test_int_x86_avx512_mask_pbroadcastq_gpr_512:
62 ; X86-NEXT: vpbroadcastq {{[0-9]+}}(%esp), %zmm1 ## encoding: [0x62,0xf2,0xfd,0x48,0x59,0x8c,0x24,0x04,0x00,0x00,0x00]
63 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x0c]
64 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
65 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x6f,0xc1]
66 ; X86-NEXT: vmovdqa64 %zmm1, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x6f,0xd1]
67 ; X86-NEXT: vpaddq %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc2]
68 ; X86-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
69 ; X86-NEXT: retl ## encoding: [0xc3]
71 ; X64-LABEL: test_int_x86_avx512_mask_pbroadcastq_gpr_512:
73 ; X64-NEXT: vpbroadcastq %rdi, %zmm1 ## encoding: [0x62,0xf2,0xfd,0x48,0x7c,0xcf]
74 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
75 ; X64-NEXT: vpbroadcastq %rdi, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x7c,0xc7]
76 ; X64-NEXT: vpbroadcastq %rdi, %zmm2 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x7c,0xd7]
77 ; X64-NEXT: vpaddq %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc2]
78 ; X64-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
79 ; X64-NEXT: retq ## encoding: [0xc3]
80 %res = call <8 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.512(i64 %x0, <8 x i64> %x1,i8 -1)
81 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.512(i64 %x0, <8 x i64> %x1,i8 %mask)
82 %res2 = call <8 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.512(i64 %x0, <8 x i64> zeroinitializer,i8 %mask)
83 %res3 = add <8 x i64> %res, %res1
84 %res4 = add <8 x i64> %res2, %res3
87 declare <8 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.512(i64, <8 x i64>, i8)
90 declare <16 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.512(<4 x float>, <16 x float>, i16) nounwind readonly
92 define <16 x float> @test_x86_vbroadcast_ss_ps_512(<4 x float> %a0, <16 x float> %a1) {
93 ; CHECK-LABEL: test_x86_vbroadcast_ss_ps_512:
95 ; CHECK-NEXT: vbroadcastss %xmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x18,0xc0]
96 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
98 %res = call <16 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.512(<4 x float> %a0, <16 x float> undef, i16 -1)
102 define <16 x float> @test_x86_mask_vbroadcast_ss_ps_512(<4 x float> %a0, <16 x float> %a1, i16 %mask ) {
103 ; X86-LABEL: test_x86_mask_vbroadcast_ss_ps_512:
105 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
106 ; X86-NEXT: vbroadcastss %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x18,0xc8]
107 ; X86-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
108 ; X86-NEXT: retl ## encoding: [0xc3]
110 ; X64-LABEL: test_x86_mask_vbroadcast_ss_ps_512:
112 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
113 ; X64-NEXT: vbroadcastss %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x18,0xc8]
114 ; X64-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
115 ; X64-NEXT: retq ## encoding: [0xc3]
117 %res = call <16 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.512(<4 x float> %a0, <16 x float> %a1, i16 %mask)
118 ret <16 x float> %res
121 define <16 x float> @test_x86_maskz_vbroadcast_ss_ps_512(<4 x float> %a0, i16 %mask ) {
122 ; X86-LABEL: test_x86_maskz_vbroadcast_ss_ps_512:
124 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
125 ; X86-NEXT: vbroadcastss %xmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x18,0xc0]
126 ; X86-NEXT: retl ## encoding: [0xc3]
128 ; X64-LABEL: test_x86_maskz_vbroadcast_ss_ps_512:
130 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
131 ; X64-NEXT: vbroadcastss %xmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x18,0xc0]
132 ; X64-NEXT: retq ## encoding: [0xc3]
134 %res = call <16 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.512(<4 x float> %a0, <16 x float> zeroinitializer, i16 %mask)
135 ret <16 x float> %res
138 declare <8 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.512(<2 x double>, <8 x double>, i8) nounwind readonly
140 define <8 x double> @test_x86_vbroadcast_sd_pd_512(<2 x double> %a0, <8 x double> %a1) {
141 ; CHECK-LABEL: test_x86_vbroadcast_sd_pd_512:
143 ; CHECK-NEXT: vbroadcastsd %xmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x19,0xc0]
144 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
146 %res = call <8 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.512(<2 x double> %a0, <8 x double> undef, i8 -1)
147 ret <8 x double> %res
150 define <8 x double> @test_x86_mask_vbroadcast_sd_pd_512(<2 x double> %a0, <8 x double> %a1, i8 %mask ) {
151 ; X86-LABEL: test_x86_mask_vbroadcast_sd_pd_512:
153 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
154 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
155 ; X86-NEXT: vbroadcastsd %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x19,0xc8]
156 ; X86-NEXT: vmovapd %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc1]
157 ; X86-NEXT: retl ## encoding: [0xc3]
159 ; X64-LABEL: test_x86_mask_vbroadcast_sd_pd_512:
161 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
162 ; X64-NEXT: vbroadcastsd %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x19,0xc8]
163 ; X64-NEXT: vmovapd %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc1]
164 ; X64-NEXT: retq ## encoding: [0xc3]
166 %res = call <8 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.512(<2 x double> %a0, <8 x double> %a1, i8 %mask)
167 ret <8 x double> %res
170 define <8 x double> @test_x86_maskz_vbroadcast_sd_pd_512(<2 x double> %a0, i8 %mask ) {
171 ; X86-LABEL: test_x86_maskz_vbroadcast_sd_pd_512:
173 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
174 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
175 ; X86-NEXT: vbroadcastsd %xmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x19,0xc0]
176 ; X86-NEXT: retl ## encoding: [0xc3]
178 ; X64-LABEL: test_x86_maskz_vbroadcast_sd_pd_512:
180 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
181 ; X64-NEXT: vbroadcastsd %xmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x19,0xc0]
182 ; X64-NEXT: retq ## encoding: [0xc3]
184 %res = call <8 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.512(<2 x double> %a0, <8 x double> zeroinitializer, i8 %mask)
185 ret <8 x double> %res
188 declare <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32>, <16 x i32>, i16)
190 define <16 x i32>@test_int_x86_avx512_pbroadcastd_512(<4 x i32> %x0, <16 x i32> %x1) {
191 ; CHECK-LABEL: test_int_x86_avx512_pbroadcastd_512:
193 ; CHECK-NEXT: vbroadcastss %xmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x18,0xc0]
194 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
195 %res = call <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32> %x0, <16 x i32> %x1, i16 -1)
199 define <16 x i32>@test_int_x86_avx512_mask_pbroadcastd_512(<4 x i32> %x0, <16 x i32> %x1, i16 %mask) {
200 ; X86-LABEL: test_int_x86_avx512_mask_pbroadcastd_512:
202 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
203 ; X86-NEXT: vpbroadcastd %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x58,0xc8]
204 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
205 ; X86-NEXT: retl ## encoding: [0xc3]
207 ; X64-LABEL: test_int_x86_avx512_mask_pbroadcastd_512:
209 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
210 ; X64-NEXT: vpbroadcastd %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x58,0xc8]
211 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
212 ; X64-NEXT: retq ## encoding: [0xc3]
213 %res = call <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32> %x0, <16 x i32> %x1, i16 %mask)
217 define <16 x i32>@test_int_x86_avx512_maskz_pbroadcastd_512(<4 x i32> %x0, i16 %mask) {
218 ; X86-LABEL: test_int_x86_avx512_maskz_pbroadcastd_512:
220 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
221 ; X86-NEXT: vpbroadcastd %xmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x58,0xc0]
222 ; X86-NEXT: retl ## encoding: [0xc3]
224 ; X64-LABEL: test_int_x86_avx512_maskz_pbroadcastd_512:
226 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
227 ; X64-NEXT: vpbroadcastd %xmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x58,0xc0]
228 ; X64-NEXT: retq ## encoding: [0xc3]
229 %res = call <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32> %x0, <16 x i32> zeroinitializer, i16 %mask)
233 declare <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64>, <8 x i64>, i8)
235 define <8 x i64>@test_int_x86_avx512_pbroadcastq_512(<2 x i64> %x0, <8 x i64> %x1) {
236 ; CHECK-LABEL: test_int_x86_avx512_pbroadcastq_512:
238 ; CHECK-NEXT: vbroadcastsd %xmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x19,0xc0]
239 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
240 %res = call <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64> %x0, <8 x i64> %x1,i8 -1)
244 define <8 x i64>@test_int_x86_avx512_mask_pbroadcastq_512(<2 x i64> %x0, <8 x i64> %x1, i8 %mask) {
245 ; X86-LABEL: test_int_x86_avx512_mask_pbroadcastq_512:
247 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
248 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
249 ; X86-NEXT: vpbroadcastq %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x59,0xc8]
250 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
251 ; X86-NEXT: retl ## encoding: [0xc3]
253 ; X64-LABEL: test_int_x86_avx512_mask_pbroadcastq_512:
255 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
256 ; X64-NEXT: vpbroadcastq %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x59,0xc8]
257 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
258 ; X64-NEXT: retq ## encoding: [0xc3]
259 %res = call <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64> %x0, <8 x i64> %x1,i8 %mask)
263 define <8 x i64>@test_int_x86_avx512_maskz_pbroadcastq_512(<2 x i64> %x0, i8 %mask) {
264 ; X86-LABEL: test_int_x86_avx512_maskz_pbroadcastq_512:
266 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
267 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
268 ; X86-NEXT: vpbroadcastq %xmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x59,0xc0]
269 ; X86-NEXT: retl ## encoding: [0xc3]
271 ; X64-LABEL: test_int_x86_avx512_maskz_pbroadcastq_512:
273 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
274 ; X64-NEXT: vpbroadcastq %xmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x59,0xc0]
275 ; X64-NEXT: retq ## encoding: [0xc3]
276 %res = call <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64> %x0, <8 x i64> zeroinitializer,i8 %mask)
280 declare <16 x float> @llvm.x86.avx512.mask.movsldup.512(<16 x float>, <16 x float>, i16)
282 define <16 x float>@test_int_x86_avx512_movsldup_512(<16 x float> %x0, <16 x float> %x1) {
283 ; CHECK-LABEL: test_int_x86_avx512_movsldup_512:
285 ; CHECK-NEXT: vmovsldup %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7e,0x48,0x12,0xc0]
286 ; CHECK-NEXT: ## zmm0 = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
287 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
288 %res = call <16 x float> @llvm.x86.avx512.mask.movsldup.512(<16 x float> %x0, <16 x float> %x1, i16 -1)
289 ret <16 x float> %res
292 define <16 x float>@test_int_x86_avx512_mask_movsldup_512(<16 x float> %x0, <16 x float> %x1, i16 %x2) {
293 ; X86-LABEL: test_int_x86_avx512_mask_movsldup_512:
295 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
296 ; X86-NEXT: vmovsldup %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x49,0x12,0xc8]
297 ; X86-NEXT: ## zmm1 {%k1} = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
298 ; X86-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
299 ; X86-NEXT: retl ## encoding: [0xc3]
301 ; X64-LABEL: test_int_x86_avx512_mask_movsldup_512:
303 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
304 ; X64-NEXT: vmovsldup %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x49,0x12,0xc8]
305 ; X64-NEXT: ## zmm1 {%k1} = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
306 ; X64-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
307 ; X64-NEXT: retq ## encoding: [0xc3]
308 %res = call <16 x float> @llvm.x86.avx512.mask.movsldup.512(<16 x float> %x0, <16 x float> %x1, i16 %x2)
309 ret <16 x float> %res
312 define <16 x float>@test_int_x86_avx512_maskz_movsldup_512(<16 x float> %x0, i16 %x2) {
313 ; X86-LABEL: test_int_x86_avx512_maskz_movsldup_512:
315 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
316 ; X86-NEXT: vmovsldup %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0xc9,0x12,0xc0]
317 ; X86-NEXT: ## zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
318 ; X86-NEXT: retl ## encoding: [0xc3]
320 ; X64-LABEL: test_int_x86_avx512_maskz_movsldup_512:
322 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
323 ; X64-NEXT: vmovsldup %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0xc9,0x12,0xc0]
324 ; X64-NEXT: ## zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
325 ; X64-NEXT: retq ## encoding: [0xc3]
326 %res = call <16 x float> @llvm.x86.avx512.mask.movsldup.512(<16 x float> %x0, <16 x float> zeroinitializer, i16 %x2)
327 ret <16 x float> %res
330 declare <16 x float> @llvm.x86.avx512.mask.movshdup.512(<16 x float>, <16 x float>, i16)
332 define <16 x float>@test_int_x86_avx512_movshdup_512(<16 x float> %x0, <16 x float> %x1) {
333 ; CHECK-LABEL: test_int_x86_avx512_movshdup_512:
335 ; CHECK-NEXT: vmovshdup %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7e,0x48,0x16,0xc0]
336 ; CHECK-NEXT: ## zmm0 = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
337 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
338 %res = call <16 x float> @llvm.x86.avx512.mask.movshdup.512(<16 x float> %x0, <16 x float> %x1, i16 -1)
339 ret <16 x float> %res
342 define <16 x float>@test_int_x86_avx512_mask_movshdup_512(<16 x float> %x0, <16 x float> %x1, i16 %x2) {
343 ; X86-LABEL: test_int_x86_avx512_mask_movshdup_512:
345 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
346 ; X86-NEXT: vmovshdup %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x49,0x16,0xc8]
347 ; X86-NEXT: ## zmm1 {%k1} = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
348 ; X86-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
349 ; X86-NEXT: retl ## encoding: [0xc3]
351 ; X64-LABEL: test_int_x86_avx512_mask_movshdup_512:
353 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
354 ; X64-NEXT: vmovshdup %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x49,0x16,0xc8]
355 ; X64-NEXT: ## zmm1 {%k1} = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
356 ; X64-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
357 ; X64-NEXT: retq ## encoding: [0xc3]
358 %res = call <16 x float> @llvm.x86.avx512.mask.movshdup.512(<16 x float> %x0, <16 x float> %x1, i16 %x2)
359 ret <16 x float> %res
362 define <16 x float>@test_int_x86_avx512_maskz_movshdup_512(<16 x float> %x0, i16 %x2) {
363 ; X86-LABEL: test_int_x86_avx512_maskz_movshdup_512:
365 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
366 ; X86-NEXT: vmovshdup %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0xc9,0x16,0xc0]
367 ; X86-NEXT: ## zmm0 {%k1} {z} = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
368 ; X86-NEXT: retl ## encoding: [0xc3]
370 ; X64-LABEL: test_int_x86_avx512_maskz_movshdup_512:
372 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
373 ; X64-NEXT: vmovshdup %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0xc9,0x16,0xc0]
374 ; X64-NEXT: ## zmm0 {%k1} {z} = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
375 ; X64-NEXT: retq ## encoding: [0xc3]
376 %res = call <16 x float> @llvm.x86.avx512.mask.movshdup.512(<16 x float> %x0, <16 x float> zeroinitializer, i16 %x2)
377 ret <16 x float> %res
380 declare <8 x double> @llvm.x86.avx512.mask.movddup.512(<8 x double>, <8 x double>, i8)
382 define <8 x double>@test_int_x86_avx512_movddup_512(<8 x double> %x0, <8 x double> %x1) {
383 ; CHECK-LABEL: test_int_x86_avx512_movddup_512:
385 ; CHECK-NEXT: vmovddup %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xff,0x48,0x12,0xc0]
386 ; CHECK-NEXT: ## zmm0 = zmm0[0,0,2,2,4,4,6,6]
387 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
388 %res = call <8 x double> @llvm.x86.avx512.mask.movddup.512(<8 x double> %x0, <8 x double> %x1, i8 -1)
389 ret <8 x double> %res
392 define <8 x double>@test_int_x86_avx512_mask_movddup_512(<8 x double> %x0, <8 x double> %x1, i8 %x2) {
393 ; X86-LABEL: test_int_x86_avx512_mask_movddup_512:
395 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
396 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
397 ; X86-NEXT: vmovddup %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xff,0x49,0x12,0xc8]
398 ; X86-NEXT: ## zmm1 {%k1} = zmm0[0,0,2,2,4,4,6,6]
399 ; X86-NEXT: vmovapd %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc1]
400 ; X86-NEXT: retl ## encoding: [0xc3]
402 ; X64-LABEL: test_int_x86_avx512_mask_movddup_512:
404 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
405 ; X64-NEXT: vmovddup %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xff,0x49,0x12,0xc8]
406 ; X64-NEXT: ## zmm1 {%k1} = zmm0[0,0,2,2,4,4,6,6]
407 ; X64-NEXT: vmovapd %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc1]
408 ; X64-NEXT: retq ## encoding: [0xc3]
409 %res = call <8 x double> @llvm.x86.avx512.mask.movddup.512(<8 x double> %x0, <8 x double> %x1, i8 %x2)
410 ret <8 x double> %res
413 define <8 x double>@test_int_x86_avx512_maskz_movddup_512(<8 x double> %x0, i8 %x2) {
414 ; X86-LABEL: test_int_x86_avx512_maskz_movddup_512:
416 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
417 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
418 ; X86-NEXT: vmovddup %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xff,0xc9,0x12,0xc0]
419 ; X86-NEXT: ## zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6]
420 ; X86-NEXT: retl ## encoding: [0xc3]
422 ; X64-LABEL: test_int_x86_avx512_maskz_movddup_512:
424 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
425 ; X64-NEXT: vmovddup %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xff,0xc9,0x12,0xc0]
426 ; X64-NEXT: ## zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6]
427 ; X64-NEXT: retq ## encoding: [0xc3]
428 %res = call <8 x double> @llvm.x86.avx512.mask.movddup.512(<8 x double> %x0, <8 x double> zeroinitializer, i8 %x2)
429 ret <8 x double> %res
432 declare <8 x double> @llvm.x86.avx512.mask.perm.df.512(<8 x double>, i32, <8 x double>, i8)
434 define <8 x double>@test_int_x86_avx512_perm_df_512(<8 x double> %x0, <8 x double> %x2) {
435 ; CHECK-LABEL: test_int_x86_avx512_perm_df_512:
437 ; CHECK-NEXT: vpermpd $3, %zmm0, %zmm0 ## encoding: [0x62,0xf3,0xfd,0x48,0x01,0xc0,0x03]
438 ; CHECK-NEXT: ## zmm0 = zmm0[3,0,0,0,7,4,4,4]
439 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
440 %res = call <8 x double> @llvm.x86.avx512.mask.perm.df.512(<8 x double> %x0, i32 3, <8 x double> %x2, i8 -1)
441 ret <8 x double> %res
444 define <8 x double>@test_int_x86_avx512_mask_perm_df_512(<8 x double> %x0, <8 x double> %x2, i8 %x3) {
445 ; X86-LABEL: test_int_x86_avx512_mask_perm_df_512:
447 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
448 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
449 ; X86-NEXT: vpermpd $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x01,0xc8,0x03]
450 ; X86-NEXT: ## zmm1 {%k1} = zmm0[3,0,0,0,7,4,4,4]
451 ; X86-NEXT: vmovapd %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc1]
452 ; X86-NEXT: retl ## encoding: [0xc3]
454 ; X64-LABEL: test_int_x86_avx512_mask_perm_df_512:
456 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
457 ; X64-NEXT: vpermpd $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x01,0xc8,0x03]
458 ; X64-NEXT: ## zmm1 {%k1} = zmm0[3,0,0,0,7,4,4,4]
459 ; X64-NEXT: vmovapd %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc1]
460 ; X64-NEXT: retq ## encoding: [0xc3]
461 %res = call <8 x double> @llvm.x86.avx512.mask.perm.df.512(<8 x double> %x0, i32 3, <8 x double> %x2, i8 %x3)
462 ret <8 x double> %res
465 define <8 x double>@test_int_x86_avx512_maskz_perm_df_512(<8 x double> %x0, i8 %x3) {
466 ; X86-LABEL: test_int_x86_avx512_maskz_perm_df_512:
468 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
469 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
470 ; X86-NEXT: vpermpd $3, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xc9,0x01,0xc0,0x03]
471 ; X86-NEXT: ## zmm0 {%k1} {z} = zmm0[3,0,0,0,7,4,4,4]
472 ; X86-NEXT: retl ## encoding: [0xc3]
474 ; X64-LABEL: test_int_x86_avx512_maskz_perm_df_512:
476 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
477 ; X64-NEXT: vpermpd $3, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xc9,0x01,0xc0,0x03]
478 ; X64-NEXT: ## zmm0 {%k1} {z} = zmm0[3,0,0,0,7,4,4,4]
479 ; X64-NEXT: retq ## encoding: [0xc3]
480 %res = call <8 x double> @llvm.x86.avx512.mask.perm.df.512(<8 x double> %x0, i32 3, <8 x double> zeroinitializer, i8 %x3)
481 ret <8 x double> %res
484 declare <8 x i64> @llvm.x86.avx512.mask.perm.di.512(<8 x i64>, i32, <8 x i64>, i8)
486 define <8 x i64>@test_int_x86_avx512_perm_di_512(<8 x i64> %x0, i32 %x1, <8 x i64> %x2) {
487 ; CHECK-LABEL: test_int_x86_avx512_perm_di_512:
489 ; CHECK-NEXT: vpermpd $3, %zmm0, %zmm0 ## encoding: [0x62,0xf3,0xfd,0x48,0x01,0xc0,0x03]
490 ; CHECK-NEXT: ## zmm0 = zmm0[3,0,0,0,7,4,4,4]
491 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
492 %res = call <8 x i64> @llvm.x86.avx512.mask.perm.di.512(<8 x i64> %x0, i32 3, <8 x i64> %x2, i8 -1)
496 define <8 x i64>@test_int_x86_avx512_mask_perm_di_512(<8 x i64> %x0, i32 %x1, <8 x i64> %x2, i8 %x3) {
497 ; X86-LABEL: test_int_x86_avx512_mask_perm_di_512:
499 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x08]
500 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
501 ; X86-NEXT: vpermq $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x00,0xc8,0x03]
502 ; X86-NEXT: ## zmm1 {%k1} = zmm0[3,0,0,0,7,4,4,4]
503 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
504 ; X86-NEXT: retl ## encoding: [0xc3]
506 ; X64-LABEL: test_int_x86_avx512_mask_perm_di_512:
508 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
509 ; X64-NEXT: vpermq $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x00,0xc8,0x03]
510 ; X64-NEXT: ## zmm1 {%k1} = zmm0[3,0,0,0,7,4,4,4]
511 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
512 ; X64-NEXT: retq ## encoding: [0xc3]
513 %res = call <8 x i64> @llvm.x86.avx512.mask.perm.di.512(<8 x i64> %x0, i32 3, <8 x i64> %x2, i8 %x3)
517 define <8 x i64>@test_int_x86_avx512_maskz_perm_di_512(<8 x i64> %x0, i32 %x1, i8 %x3) {
518 ; X86-LABEL: test_int_x86_avx512_maskz_perm_di_512:
520 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x08]
521 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
522 ; X86-NEXT: vpermq $3, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xc9,0x00,0xc0,0x03]
523 ; X86-NEXT: ## zmm0 {%k1} {z} = zmm0[3,0,0,0,7,4,4,4]
524 ; X86-NEXT: retl ## encoding: [0xc3]
526 ; X64-LABEL: test_int_x86_avx512_maskz_perm_di_512:
528 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
529 ; X64-NEXT: vpermq $3, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xc9,0x00,0xc0,0x03]
530 ; X64-NEXT: ## zmm0 {%k1} {z} = zmm0[3,0,0,0,7,4,4,4]
531 ; X64-NEXT: retq ## encoding: [0xc3]
532 %res = call <8 x i64> @llvm.x86.avx512.mask.perm.di.512(<8 x i64> %x0, i32 3, <8 x i64> zeroinitializer, i8 %x3)
536 define void @test_store1(<16 x float> %data, i8* %ptr, i8* %ptr2, i16 %mask) {
537 ; X86-LABEL: test_store1:
539 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x08]
540 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x04]
541 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x0c]
542 ; X86-NEXT: vmovups %zmm0, (%ecx) {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x11,0x01]
543 ; X86-NEXT: vmovups %zmm0, (%eax) ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x00]
544 ; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
545 ; X86-NEXT: retl ## encoding: [0xc3]
547 ; X64-LABEL: test_store1:
549 ; X64-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
550 ; X64-NEXT: vmovups %zmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x11,0x07]
551 ; X64-NEXT: vmovups %zmm0, (%rsi) ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x06]
552 ; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
553 ; X64-NEXT: retq ## encoding: [0xc3]
554 call void @llvm.x86.avx512.mask.storeu.ps.512(i8* %ptr, <16 x float> %data, i16 %mask)
555 call void @llvm.x86.avx512.mask.storeu.ps.512(i8* %ptr2, <16 x float> %data, i16 -1)
559 declare void @llvm.x86.avx512.mask.storeu.ps.512(i8*, <16 x float>, i16 )
561 define void @test_store2(<8 x double> %data, i8* %ptr, i8* %ptr2, i8 %mask) {
562 ; X86-LABEL: test_store2:
564 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x08]
565 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x04]
566 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx ## encoding: [0x0f,0xb6,0x54,0x24,0x0c]
567 ; X86-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
568 ; X86-NEXT: vmovupd %zmm0, (%ecx) {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x11,0x01]
569 ; X86-NEXT: vmovupd %zmm0, (%eax) ## encoding: [0x62,0xf1,0xfd,0x48,0x11,0x00]
570 ; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
571 ; X86-NEXT: retl ## encoding: [0xc3]
573 ; X64-LABEL: test_store2:
575 ; X64-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
576 ; X64-NEXT: vmovupd %zmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x11,0x07]
577 ; X64-NEXT: vmovupd %zmm0, (%rsi) ## encoding: [0x62,0xf1,0xfd,0x48,0x11,0x06]
578 ; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
579 ; X64-NEXT: retq ## encoding: [0xc3]
580 call void @llvm.x86.avx512.mask.storeu.pd.512(i8* %ptr, <8 x double> %data, i8 %mask)
581 call void @llvm.x86.avx512.mask.storeu.pd.512(i8* %ptr2, <8 x double> %data, i8 -1)
585 declare void @llvm.x86.avx512.mask.storeu.pd.512(i8*, <8 x double>, i8)
587 define void @test_mask_store_aligned_ps(<16 x float> %data, i8* %ptr, i8* %ptr2, i16 %mask) {
588 ; X86-LABEL: test_mask_store_aligned_ps:
590 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x08]
591 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x04]
592 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x0c]
593 ; X86-NEXT: vmovaps %zmm0, (%ecx) {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x29,0x01]
594 ; X86-NEXT: vmovaps %zmm0, (%eax) ## encoding: [0x62,0xf1,0x7c,0x48,0x29,0x00]
595 ; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
596 ; X86-NEXT: retl ## encoding: [0xc3]
598 ; X64-LABEL: test_mask_store_aligned_ps:
600 ; X64-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
601 ; X64-NEXT: vmovaps %zmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x29,0x07]
602 ; X64-NEXT: vmovaps %zmm0, (%rsi) ## encoding: [0x62,0xf1,0x7c,0x48,0x29,0x06]
603 ; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
604 ; X64-NEXT: retq ## encoding: [0xc3]
605 call void @llvm.x86.avx512.mask.store.ps.512(i8* %ptr, <16 x float> %data, i16 %mask)
606 call void @llvm.x86.avx512.mask.store.ps.512(i8* %ptr2, <16 x float> %data, i16 -1)
610 declare void @llvm.x86.avx512.mask.store.ps.512(i8*, <16 x float>, i16 )
612 define void @test_mask_store_aligned_pd(<8 x double> %data, i8* %ptr, i8* %ptr2, i8 %mask) {
613 ; X86-LABEL: test_mask_store_aligned_pd:
615 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x08]
616 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x04]
617 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx ## encoding: [0x0f,0xb6,0x54,0x24,0x0c]
618 ; X86-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
619 ; X86-NEXT: vmovapd %zmm0, (%ecx) {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x29,0x01]
620 ; X86-NEXT: vmovapd %zmm0, (%eax) ## encoding: [0x62,0xf1,0xfd,0x48,0x29,0x00]
621 ; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
622 ; X86-NEXT: retl ## encoding: [0xc3]
624 ; X64-LABEL: test_mask_store_aligned_pd:
626 ; X64-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
627 ; X64-NEXT: vmovapd %zmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x29,0x07]
628 ; X64-NEXT: vmovapd %zmm0, (%rsi) ## encoding: [0x62,0xf1,0xfd,0x48,0x29,0x06]
629 ; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
630 ; X64-NEXT: retq ## encoding: [0xc3]
631 call void @llvm.x86.avx512.mask.store.pd.512(i8* %ptr, <8 x double> %data, i8 %mask)
632 call void @llvm.x86.avx512.mask.store.pd.512(i8* %ptr2, <8 x double> %data, i8 -1)
636 declare void @llvm.x86.avx512.mask.store.pd.512(i8*, <8 x double>, i8)
638 define void@test_int_x86_avx512_mask_storeu_q_512(i8* %ptr1, i8* %ptr2, <8 x i64> %x1, i8 %x2) {
639 ; X86-LABEL: test_int_x86_avx512_mask_storeu_q_512:
641 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x08]
642 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x04]
643 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx ## encoding: [0x0f,0xb6,0x54,0x24,0x0c]
644 ; X86-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
645 ; X86-NEXT: vmovdqu64 %zmm0, (%ecx) {%k1} ## encoding: [0x62,0xf1,0xfe,0x49,0x7f,0x01]
646 ; X86-NEXT: vmovdqu64 %zmm0, (%eax) ## encoding: [0x62,0xf1,0xfe,0x48,0x7f,0x00]
647 ; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
648 ; X86-NEXT: retl ## encoding: [0xc3]
650 ; X64-LABEL: test_int_x86_avx512_mask_storeu_q_512:
652 ; X64-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
653 ; X64-NEXT: vmovdqu64 %zmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0xfe,0x49,0x7f,0x07]
654 ; X64-NEXT: vmovdqu64 %zmm0, (%rsi) ## encoding: [0x62,0xf1,0xfe,0x48,0x7f,0x06]
655 ; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
656 ; X64-NEXT: retq ## encoding: [0xc3]
657 call void @llvm.x86.avx512.mask.storeu.q.512(i8* %ptr1, <8 x i64> %x1, i8 %x2)
658 call void @llvm.x86.avx512.mask.storeu.q.512(i8* %ptr2, <8 x i64> %x1, i8 -1)
662 declare void @llvm.x86.avx512.mask.storeu.q.512(i8*, <8 x i64>, i8)
664 define void@test_int_x86_avx512_mask_storeu_d_512(i8* %ptr1, i8* %ptr2, <16 x i32> %x1, i16 %x2) {
665 ; X86-LABEL: test_int_x86_avx512_mask_storeu_d_512:
667 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x08]
668 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x04]
669 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x0c]
670 ; X86-NEXT: vmovdqu32 %zmm0, (%ecx) {%k1} ## encoding: [0x62,0xf1,0x7e,0x49,0x7f,0x01]
671 ; X86-NEXT: vmovdqu64 %zmm0, (%eax) ## encoding: [0x62,0xf1,0xfe,0x48,0x7f,0x00]
672 ; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
673 ; X86-NEXT: retl ## encoding: [0xc3]
675 ; X64-LABEL: test_int_x86_avx512_mask_storeu_d_512:
677 ; X64-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
678 ; X64-NEXT: vmovdqu32 %zmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7e,0x49,0x7f,0x07]
679 ; X64-NEXT: vmovdqu64 %zmm0, (%rsi) ## encoding: [0x62,0xf1,0xfe,0x48,0x7f,0x06]
680 ; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
681 ; X64-NEXT: retq ## encoding: [0xc3]
682 call void @llvm.x86.avx512.mask.storeu.d.512(i8* %ptr1, <16 x i32> %x1, i16 %x2)
683 call void @llvm.x86.avx512.mask.storeu.d.512(i8* %ptr2, <16 x i32> %x1, i16 -1)
687 declare void @llvm.x86.avx512.mask.storeu.d.512(i8*, <16 x i32>, i16)
689 define void@test_int_x86_avx512_mask_store_q_512(i8* %ptr1, i8* %ptr2, <8 x i64> %x1, i8 %x2) {
690 ; X86-LABEL: test_int_x86_avx512_mask_store_q_512:
692 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x08]
693 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x04]
694 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx ## encoding: [0x0f,0xb6,0x54,0x24,0x0c]
695 ; X86-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
696 ; X86-NEXT: vmovdqa64 %zmm0, (%ecx) {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x7f,0x01]
697 ; X86-NEXT: vmovdqa64 %zmm0, (%eax) ## encoding: [0x62,0xf1,0xfd,0x48,0x7f,0x00]
698 ; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
699 ; X86-NEXT: retl ## encoding: [0xc3]
701 ; X64-LABEL: test_int_x86_avx512_mask_store_q_512:
703 ; X64-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
704 ; X64-NEXT: vmovdqa64 %zmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x7f,0x07]
705 ; X64-NEXT: vmovdqa64 %zmm0, (%rsi) ## encoding: [0x62,0xf1,0xfd,0x48,0x7f,0x06]
706 ; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
707 ; X64-NEXT: retq ## encoding: [0xc3]
708 call void @llvm.x86.avx512.mask.store.q.512(i8* %ptr1, <8 x i64> %x1, i8 %x2)
709 call void @llvm.x86.avx512.mask.store.q.512(i8* %ptr2, <8 x i64> %x1, i8 -1)
713 declare void @llvm.x86.avx512.mask.store.q.512(i8*, <8 x i64>, i8)
715 define void@test_int_x86_avx512_mask_store_d_512(i8* %ptr1, i8* %ptr2, <16 x i32> %x1, i16 %x2) {
716 ; X86-LABEL: test_int_x86_avx512_mask_store_d_512:
718 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x08]
719 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x04]
720 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x0c]
721 ; X86-NEXT: vmovdqa32 %zmm0, (%ecx) {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x7f,0x01]
722 ; X86-NEXT: vmovdqa64 %zmm0, (%eax) ## encoding: [0x62,0xf1,0xfd,0x48,0x7f,0x00]
723 ; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
724 ; X86-NEXT: retl ## encoding: [0xc3]
726 ; X64-LABEL: test_int_x86_avx512_mask_store_d_512:
728 ; X64-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
729 ; X64-NEXT: vmovdqa32 %zmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x7f,0x07]
730 ; X64-NEXT: vmovdqa64 %zmm0, (%rsi) ## encoding: [0x62,0xf1,0xfd,0x48,0x7f,0x06]
731 ; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
732 ; X64-NEXT: retq ## encoding: [0xc3]
733 call void @llvm.x86.avx512.mask.store.d.512(i8* %ptr1, <16 x i32> %x1, i16 %x2)
734 call void @llvm.x86.avx512.mask.store.d.512(i8* %ptr2, <16 x i32> %x1, i16 -1)
738 declare void @llvm.x86.avx512.mask.store.d.512(i8*, <16 x i32>, i16)
740 define <16 x float> @test_mask_load_aligned_ps(<16 x float> %data, i8* %ptr, i16 %mask) {
741 ; X86-LABEL: test_mask_load_aligned_ps:
743 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
744 ; X86-NEXT: vmovaps (%eax), %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0x00]
745 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
746 ; X86-NEXT: vmovaps (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x28,0x00]
747 ; X86-NEXT: vmovaps (%eax), %zmm1 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x28,0x08]
748 ; X86-NEXT: vaddps %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x74,0x48,0x58,0xc0]
749 ; X86-NEXT: retl ## encoding: [0xc3]
751 ; X64-LABEL: test_mask_load_aligned_ps:
753 ; X64-NEXT: vmovaps (%rdi), %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0x07]
754 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
755 ; X64-NEXT: vmovaps (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x28,0x07]
756 ; X64-NEXT: vmovaps (%rdi), %zmm1 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x28,0x0f]
757 ; X64-NEXT: vaddps %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x74,0x48,0x58,0xc0]
758 ; X64-NEXT: retq ## encoding: [0xc3]
759 %res = call <16 x float> @llvm.x86.avx512.mask.load.ps.512(i8* %ptr, <16 x float> zeroinitializer, i16 -1)
760 %res1 = call <16 x float> @llvm.x86.avx512.mask.load.ps.512(i8* %ptr, <16 x float> %res, i16 %mask)
761 %res2 = call <16 x float> @llvm.x86.avx512.mask.load.ps.512(i8* %ptr, <16 x float> zeroinitializer, i16 %mask)
762 %res4 = fadd <16 x float> %res2, %res1
763 ret <16 x float> %res4
766 declare <16 x float> @llvm.x86.avx512.mask.load.ps.512(i8*, <16 x float>, i16)
768 define <16 x float> @test_mask_load_unaligned_ps(<16 x float> %data, i8* %ptr, i16 %mask) {
769 ; X86-LABEL: test_mask_load_unaligned_ps:
771 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
772 ; X86-NEXT: vmovups (%eax), %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x00]
773 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
774 ; X86-NEXT: vmovups (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x10,0x00]
775 ; X86-NEXT: vmovups (%eax), %zmm1 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x10,0x08]
776 ; X86-NEXT: vaddps %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x74,0x48,0x58,0xc0]
777 ; X86-NEXT: retl ## encoding: [0xc3]
779 ; X64-LABEL: test_mask_load_unaligned_ps:
781 ; X64-NEXT: vmovups (%rdi), %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x07]
782 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
783 ; X64-NEXT: vmovups (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x10,0x07]
784 ; X64-NEXT: vmovups (%rdi), %zmm1 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x10,0x0f]
785 ; X64-NEXT: vaddps %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x74,0x48,0x58,0xc0]
786 ; X64-NEXT: retq ## encoding: [0xc3]
787 %res = call <16 x float> @llvm.x86.avx512.mask.loadu.ps.512(i8* %ptr, <16 x float> zeroinitializer, i16 -1)
788 %res1 = call <16 x float> @llvm.x86.avx512.mask.loadu.ps.512(i8* %ptr, <16 x float> %res, i16 %mask)
789 %res2 = call <16 x float> @llvm.x86.avx512.mask.loadu.ps.512(i8* %ptr, <16 x float> zeroinitializer, i16 %mask)
790 %res4 = fadd <16 x float> %res2, %res1
791 ret <16 x float> %res4
794 declare <16 x float> @llvm.x86.avx512.mask.loadu.ps.512(i8*, <16 x float>, i16)
796 define <8 x double> @test_mask_load_aligned_pd(<8 x double> %data, i8* %ptr, i8 %mask) {
797 ; X86-LABEL: test_mask_load_aligned_pd:
799 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
800 ; X86-NEXT: vmovapd (%eax), %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0x00]
801 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
802 ; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
803 ; X86-NEXT: vmovapd (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x28,0x00]
804 ; X86-NEXT: vmovapd (%eax), %zmm1 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x28,0x08]
805 ; X86-NEXT: vaddpd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0x58,0xc0]
806 ; X86-NEXT: retl ## encoding: [0xc3]
808 ; X64-LABEL: test_mask_load_aligned_pd:
810 ; X64-NEXT: vmovapd (%rdi), %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0x07]
811 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
812 ; X64-NEXT: vmovapd (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x28,0x07]
813 ; X64-NEXT: vmovapd (%rdi), %zmm1 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x28,0x0f]
814 ; X64-NEXT: vaddpd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0x58,0xc0]
815 ; X64-NEXT: retq ## encoding: [0xc3]
816 %res = call <8 x double> @llvm.x86.avx512.mask.load.pd.512(i8* %ptr, <8 x double> zeroinitializer, i8 -1)
817 %res1 = call <8 x double> @llvm.x86.avx512.mask.load.pd.512(i8* %ptr, <8 x double> %res, i8 %mask)
818 %res2 = call <8 x double> @llvm.x86.avx512.mask.load.pd.512(i8* %ptr, <8 x double> zeroinitializer, i8 %mask)
819 %res4 = fadd <8 x double> %res2, %res1
820 ret <8 x double> %res4
823 declare <8 x double> @llvm.x86.avx512.mask.load.pd.512(i8*, <8 x double>, i8)
825 define <8 x double> @test_mask_load_unaligned_pd(<8 x double> %data, i8* %ptr, i8 %mask) {
826 ; X86-LABEL: test_mask_load_unaligned_pd:
828 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
829 ; X86-NEXT: vmovupd (%eax), %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x10,0x00]
830 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
831 ; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
832 ; X86-NEXT: vmovupd (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x10,0x00]
833 ; X86-NEXT: vmovupd (%eax), %zmm1 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x10,0x08]
834 ; X86-NEXT: vaddpd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0x58,0xc0]
835 ; X86-NEXT: retl ## encoding: [0xc3]
837 ; X64-LABEL: test_mask_load_unaligned_pd:
839 ; X64-NEXT: vmovupd (%rdi), %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x10,0x07]
840 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
841 ; X64-NEXT: vmovupd (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x10,0x07]
842 ; X64-NEXT: vmovupd (%rdi), %zmm1 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x10,0x0f]
843 ; X64-NEXT: vaddpd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0x58,0xc0]
844 ; X64-NEXT: retq ## encoding: [0xc3]
845 %res = call <8 x double> @llvm.x86.avx512.mask.loadu.pd.512(i8* %ptr, <8 x double> zeroinitializer, i8 -1)
846 %res1 = call <8 x double> @llvm.x86.avx512.mask.loadu.pd.512(i8* %ptr, <8 x double> %res, i8 %mask)
847 %res2 = call <8 x double> @llvm.x86.avx512.mask.loadu.pd.512(i8* %ptr, <8 x double> zeroinitializer, i8 %mask)
848 %res4 = fadd <8 x double> %res2, %res1
849 ret <8 x double> %res4
852 declare <8 x double> @llvm.x86.avx512.mask.loadu.pd.512(i8*, <8 x double>, i8)
854 declare <16 x i32> @llvm.x86.avx512.mask.loadu.d.512(i8*, <16 x i32>, i16)
856 define <16 x i32> @test_mask_load_unaligned_d(i8* %ptr, i8* %ptr2, <16 x i32> %data, i16 %mask) {
857 ; X86-LABEL: test_mask_load_unaligned_d:
859 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x08]
860 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x04]
861 ; X86-NEXT: vmovdqu64 (%ecx), %zmm0 ## encoding: [0x62,0xf1,0xfe,0x48,0x6f,0x01]
862 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x0c]
863 ; X86-NEXT: vmovdqu32 (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf1,0x7e,0x49,0x6f,0x00]
864 ; X86-NEXT: vmovdqu32 (%ecx), %zmm1 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0xc9,0x6f,0x09]
865 ; X86-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0]
866 ; X86-NEXT: retl ## encoding: [0xc3]
868 ; X64-LABEL: test_mask_load_unaligned_d:
870 ; X64-NEXT: vmovdqu64 (%rdi), %zmm0 ## encoding: [0x62,0xf1,0xfe,0x48,0x6f,0x07]
871 ; X64-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
872 ; X64-NEXT: vmovdqu32 (%rsi), %zmm0 {%k1} ## encoding: [0x62,0xf1,0x7e,0x49,0x6f,0x06]
873 ; X64-NEXT: vmovdqu32 (%rdi), %zmm1 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0xc9,0x6f,0x0f]
874 ; X64-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0]
875 ; X64-NEXT: retq ## encoding: [0xc3]
876 %res = call <16 x i32> @llvm.x86.avx512.mask.loadu.d.512(i8* %ptr, <16 x i32> zeroinitializer, i16 -1)
877 %res1 = call <16 x i32> @llvm.x86.avx512.mask.loadu.d.512(i8* %ptr2, <16 x i32> %res, i16 %mask)
878 %res2 = call <16 x i32> @llvm.x86.avx512.mask.loadu.d.512(i8* %ptr, <16 x i32> zeroinitializer, i16 %mask)
879 %res4 = add <16 x i32> %res2, %res1
883 declare <8 x i64> @llvm.x86.avx512.mask.loadu.q.512(i8*, <8 x i64>, i8)
885 define <8 x i64> @test_mask_load_unaligned_q(i8* %ptr, i8* %ptr2, <8 x i64> %data, i8 %mask) {
886 ; X86-LABEL: test_mask_load_unaligned_q:
888 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x08]
889 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x04]
890 ; X86-NEXT: vmovdqu64 (%ecx), %zmm0 ## encoding: [0x62,0xf1,0xfe,0x48,0x6f,0x01]
891 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx ## encoding: [0x0f,0xb6,0x54,0x24,0x0c]
892 ; X86-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
893 ; X86-NEXT: vmovdqu64 (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf1,0xfe,0x49,0x6f,0x00]
894 ; X86-NEXT: vmovdqu64 (%ecx), %zmm1 {%k1} {z} ## encoding: [0x62,0xf1,0xfe,0xc9,0x6f,0x09]
895 ; X86-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
896 ; X86-NEXT: retl ## encoding: [0xc3]
898 ; X64-LABEL: test_mask_load_unaligned_q:
900 ; X64-NEXT: vmovdqu64 (%rdi), %zmm0 ## encoding: [0x62,0xf1,0xfe,0x48,0x6f,0x07]
901 ; X64-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
902 ; X64-NEXT: vmovdqu64 (%rsi), %zmm0 {%k1} ## encoding: [0x62,0xf1,0xfe,0x49,0x6f,0x06]
903 ; X64-NEXT: vmovdqu64 (%rdi), %zmm1 {%k1} {z} ## encoding: [0x62,0xf1,0xfe,0xc9,0x6f,0x0f]
904 ; X64-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
905 ; X64-NEXT: retq ## encoding: [0xc3]
906 %res = call <8 x i64> @llvm.x86.avx512.mask.loadu.q.512(i8* %ptr, <8 x i64> zeroinitializer, i8 -1)
907 %res1 = call <8 x i64> @llvm.x86.avx512.mask.loadu.q.512(i8* %ptr2, <8 x i64> %res, i8 %mask)
908 %res2 = call <8 x i64> @llvm.x86.avx512.mask.loadu.q.512(i8* %ptr, <8 x i64> zeroinitializer, i8 %mask)
909 %res4 = add <8 x i64> %res2, %res1
913 declare <16 x i32> @llvm.x86.avx512.mask.load.d.512(i8*, <16 x i32>, i16)
915 define <16 x i32> @test_mask_load_aligned_d(<16 x i32> %data, i8* %ptr, i16 %mask) {
916 ; X86-LABEL: test_mask_load_aligned_d:
918 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
919 ; X86-NEXT: vmovdqa64 (%eax), %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0x00]
920 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
921 ; X86-NEXT: vmovdqa32 (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x6f,0x00]
922 ; X86-NEXT: vmovdqa32 (%eax), %zmm1 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x6f,0x08]
923 ; X86-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0]
924 ; X86-NEXT: retl ## encoding: [0xc3]
926 ; X64-LABEL: test_mask_load_aligned_d:
928 ; X64-NEXT: vmovdqa64 (%rdi), %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0x07]
929 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
930 ; X64-NEXT: vmovdqa32 (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x6f,0x07]
931 ; X64-NEXT: vmovdqa32 (%rdi), %zmm1 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x6f,0x0f]
932 ; X64-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0]
933 ; X64-NEXT: retq ## encoding: [0xc3]
934 %res = call <16 x i32> @llvm.x86.avx512.mask.load.d.512(i8* %ptr, <16 x i32> zeroinitializer, i16 -1)
935 %res1 = call <16 x i32> @llvm.x86.avx512.mask.load.d.512(i8* %ptr, <16 x i32> %res, i16 %mask)
936 %res2 = call <16 x i32> @llvm.x86.avx512.mask.load.d.512(i8* %ptr, <16 x i32> zeroinitializer, i16 %mask)
937 %res4 = add <16 x i32> %res2, %res1
941 declare <8 x i64> @llvm.x86.avx512.mask.load.q.512(i8*, <8 x i64>, i8)
943 define <8 x i64> @test_mask_load_aligned_q(<8 x i64> %data, i8* %ptr, i8 %mask) {
944 ; X86-LABEL: test_mask_load_aligned_q:
946 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
947 ; X86-NEXT: vmovdqa64 (%eax), %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0x00]
948 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
949 ; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
950 ; X86-NEXT: vmovdqa64 (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x6f,0x00]
951 ; X86-NEXT: vmovdqa64 (%eax), %zmm1 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x6f,0x08]
952 ; X86-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
953 ; X86-NEXT: retl ## encoding: [0xc3]
955 ; X64-LABEL: test_mask_load_aligned_q:
957 ; X64-NEXT: vmovdqa64 (%rdi), %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0x07]
958 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
959 ; X64-NEXT: vmovdqa64 (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x6f,0x07]
960 ; X64-NEXT: vmovdqa64 (%rdi), %zmm1 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x6f,0x0f]
961 ; X64-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
962 ; X64-NEXT: retq ## encoding: [0xc3]
963 %res = call <8 x i64> @llvm.x86.avx512.mask.load.q.512(i8* %ptr, <8 x i64> zeroinitializer, i8 -1)
964 %res1 = call <8 x i64> @llvm.x86.avx512.mask.load.q.512(i8* %ptr, <8 x i64> %res, i8 %mask)
965 %res2 = call <8 x i64> @llvm.x86.avx512.mask.load.q.512(i8* %ptr, <8 x i64> zeroinitializer, i8 %mask)
966 %res4 = add <8 x i64> %res2, %res1
970 declare <8 x double> @llvm.x86.avx512.mask.vpermil.pd.512(<8 x double>, i32, <8 x double>, i8)
972 define <8 x double>@test_int_x86_avx512_vpermil_pd_512(<8 x double> %x0, <8 x double> %x2) {
973 ; CHECK-LABEL: test_int_x86_avx512_vpermil_pd_512:
975 ; CHECK-NEXT: vpermilpd $22, %zmm0, %zmm0 ## encoding: [0x62,0xf3,0xfd,0x48,0x05,0xc0,0x16]
976 ; CHECK-NEXT: ## zmm0 = zmm0[0,1,3,2,5,4,6,6]
977 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
978 %res = call <8 x double> @llvm.x86.avx512.mask.vpermil.pd.512(<8 x double> %x0, i32 22, <8 x double> %x2, i8 -1)
979 ret <8 x double> %res
982 define <8 x double>@test_int_x86_avx512_mask_vpermil_pd_512(<8 x double> %x0, <8 x double> %x2, i8 %x3) {
983 ; X86-LABEL: test_int_x86_avx512_mask_vpermil_pd_512:
985 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
986 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
987 ; X86-NEXT: vpermilpd $22, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x05,0xc8,0x16]
988 ; X86-NEXT: ## zmm1 {%k1} = zmm0[0,1,3,2,5,4,6,6]
989 ; X86-NEXT: vmovapd %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc1]
990 ; X86-NEXT: retl ## encoding: [0xc3]
992 ; X64-LABEL: test_int_x86_avx512_mask_vpermil_pd_512:
994 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
995 ; X64-NEXT: vpermilpd $22, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x05,0xc8,0x16]
996 ; X64-NEXT: ## zmm1 {%k1} = zmm0[0,1,3,2,5,4,6,6]
997 ; X64-NEXT: vmovapd %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc1]
998 ; X64-NEXT: retq ## encoding: [0xc3]
999 %res = call <8 x double> @llvm.x86.avx512.mask.vpermil.pd.512(<8 x double> %x0, i32 22, <8 x double> %x2, i8 %x3)
1000 ret <8 x double> %res
1003 define <8 x double>@test_int_x86_avx512_maskz_vpermil_pd_512(<8 x double> %x0, i8 %x3) {
1004 ; X86-LABEL: test_int_x86_avx512_maskz_vpermil_pd_512:
1006 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
1007 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
1008 ; X86-NEXT: vpermilpd $22, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xc9,0x05,0xc0,0x16]
1009 ; X86-NEXT: ## zmm0 {%k1} {z} = zmm0[0,1,3,2,5,4,6,6]
1010 ; X86-NEXT: retl ## encoding: [0xc3]
1012 ; X64-LABEL: test_int_x86_avx512_maskz_vpermil_pd_512:
1014 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1015 ; X64-NEXT: vpermilpd $22, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xc9,0x05,0xc0,0x16]
1016 ; X64-NEXT: ## zmm0 {%k1} {z} = zmm0[0,1,3,2,5,4,6,6]
1017 ; X64-NEXT: retq ## encoding: [0xc3]
1018 %res = call <8 x double> @llvm.x86.avx512.mask.vpermil.pd.512(<8 x double> %x0, i32 22, <8 x double> zeroinitializer, i8 %x3)
1019 ret <8 x double> %res
1022 declare <16 x float> @llvm.x86.avx512.mask.vpermil.ps.512(<16 x float>, i32, <16 x float>, i16)
1024 define <16 x float>@test_int_x86_avx512_vpermil_ps_512(<16 x float> %x0, <16 x float> %x2) {
1025 ; CHECK-LABEL: test_int_x86_avx512_vpermil_ps_512:
1027 ; CHECK-NEXT: vpermilps $22, %zmm0, %zmm0 ## encoding: [0x62,0xf3,0x7d,0x48,0x04,0xc0,0x16]
1028 ; CHECK-NEXT: ## zmm0 = zmm0[2,1,1,0,6,5,5,4,10,9,9,8,14,13,13,12]
1029 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1030 %res = call <16 x float> @llvm.x86.avx512.mask.vpermil.ps.512(<16 x float> %x0, i32 22, <16 x float> %x2, i16 -1)
1031 ret <16 x float> %res
1034 define <16 x float>@test_int_x86_avx512_mask_vpermil_ps_512(<16 x float> %x0, <16 x float> %x2, i16 %x3) {
1035 ; X86-LABEL: test_int_x86_avx512_mask_vpermil_ps_512:
1037 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1038 ; X86-NEXT: vpermilps $22, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x04,0xc8,0x16]
1039 ; X86-NEXT: ## zmm1 {%k1} = zmm0[2,1,1,0,6,5,5,4,10,9,9,8,14,13,13,12]
1040 ; X86-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
1041 ; X86-NEXT: retl ## encoding: [0xc3]
1043 ; X64-LABEL: test_int_x86_avx512_mask_vpermil_ps_512:
1045 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1046 ; X64-NEXT: vpermilps $22, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x04,0xc8,0x16]
1047 ; X64-NEXT: ## zmm1 {%k1} = zmm0[2,1,1,0,6,5,5,4,10,9,9,8,14,13,13,12]
1048 ; X64-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
1049 ; X64-NEXT: retq ## encoding: [0xc3]
1050 %res = call <16 x float> @llvm.x86.avx512.mask.vpermil.ps.512(<16 x float> %x0, i32 22, <16 x float> %x2, i16 %x3)
1051 ret <16 x float> %res
1054 define <16 x float>@test_int_x86_avx512_maskz_vpermil_ps_512(<16 x float> %x0, i16 %x3) {
1055 ; X86-LABEL: test_int_x86_avx512_maskz_vpermil_ps_512:
1057 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1058 ; X86-NEXT: vpermilps $22, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xc9,0x04,0xc0,0x16]
1059 ; X86-NEXT: ## zmm0 {%k1} {z} = zmm0[2,1,1,0,6,5,5,4,10,9,9,8,14,13,13,12]
1060 ; X86-NEXT: retl ## encoding: [0xc3]
1062 ; X64-LABEL: test_int_x86_avx512_maskz_vpermil_ps_512:
1064 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1065 ; X64-NEXT: vpermilps $22, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xc9,0x04,0xc0,0x16]
1066 ; X64-NEXT: ## zmm0 {%k1} {z} = zmm0[2,1,1,0,6,5,5,4,10,9,9,8,14,13,13,12]
1067 ; X64-NEXT: retq ## encoding: [0xc3]
1068 %res = call <16 x float> @llvm.x86.avx512.mask.vpermil.ps.512(<16 x float> %x0, i32 22, <16 x float> zeroinitializer, i16 %x3)
1069 ret <16 x float> %res
1072 declare <16 x i32> @llvm.x86.avx512.mask.pshuf.d.512(<16 x i32>, i32, <16 x i32>, i16)
1074 define <16 x i32>@test_int_x86_avx512_pshuf_d_512(<16 x i32> %x0, i32 %x1, <16 x i32> %x2) {
1075 ; CHECK-LABEL: test_int_x86_avx512_pshuf_d_512:
1077 ; CHECK-NEXT: vpermilps $3, %zmm0, %zmm0 ## encoding: [0x62,0xf3,0x7d,0x48,0x04,0xc0,0x03]
1078 ; CHECK-NEXT: ## zmm0 = zmm0[3,0,0,0,7,4,4,4,11,8,8,8,15,12,12,12]
1079 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1080 %res = call <16 x i32> @llvm.x86.avx512.mask.pshuf.d.512(<16 x i32> %x0, i32 3, <16 x i32> %x2, i16 -1)
1084 define <16 x i32>@test_int_x86_avx512_mask_pshuf_d_512(<16 x i32> %x0, i32 %x1, <16 x i32> %x2, i16 %x3) {
1085 ; X86-LABEL: test_int_x86_avx512_mask_pshuf_d_512:
1087 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
1088 ; X86-NEXT: vpshufd $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x70,0xc8,0x03]
1089 ; X86-NEXT: ## zmm1 {%k1} = zmm0[3,0,0,0,7,4,4,4,11,8,8,8,15,12,12,12]
1090 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1091 ; X86-NEXT: retl ## encoding: [0xc3]
1093 ; X64-LABEL: test_int_x86_avx512_mask_pshuf_d_512:
1095 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
1096 ; X64-NEXT: vpshufd $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x70,0xc8,0x03]
1097 ; X64-NEXT: ## zmm1 {%k1} = zmm0[3,0,0,0,7,4,4,4,11,8,8,8,15,12,12,12]
1098 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1099 ; X64-NEXT: retq ## encoding: [0xc3]
1100 %res = call <16 x i32> @llvm.x86.avx512.mask.pshuf.d.512(<16 x i32> %x0, i32 3, <16 x i32> %x2, i16 %x3)
1104 define <16 x i32>@test_int_x86_avx512_maskz_pshuf_d_512(<16 x i32> %x0, i32 %x1, i16 %x3) {
1105 ; X86-LABEL: test_int_x86_avx512_maskz_pshuf_d_512:
1107 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
1108 ; X86-NEXT: vpshufd $3, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x70,0xc0,0x03]
1109 ; X86-NEXT: ## zmm0 {%k1} {z} = zmm0[3,0,0,0,7,4,4,4,11,8,8,8,15,12,12,12]
1110 ; X86-NEXT: retl ## encoding: [0xc3]
1112 ; X64-LABEL: test_int_x86_avx512_maskz_pshuf_d_512:
1114 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
1115 ; X64-NEXT: vpshufd $3, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x70,0xc0,0x03]
1116 ; X64-NEXT: ## zmm0 {%k1} {z} = zmm0[3,0,0,0,7,4,4,4,11,8,8,8,15,12,12,12]
1117 ; X64-NEXT: retq ## encoding: [0xc3]
1118 %res = call <16 x i32> @llvm.x86.avx512.mask.pshuf.d.512(<16 x i32> %x0, i32 3, <16 x i32> zeroinitializer, i16 %x3)
1122 define i16 @test_pcmpeq_d(<16 x i32> %a, <16 x i32> %b) {
1123 ; CHECK-LABEL: test_pcmpeq_d:
1125 ; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x48,0x76,0xc1]
1126 ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
1127 ; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax
1128 ; CHECK-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
1129 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1130 %res = call i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32> %a, <16 x i32> %b, i16 -1)
1134 define i16 @test_mask_pcmpeq_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
1135 ; X86-LABEL: test_mask_pcmpeq_d:
1137 ; X86-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x48,0x76,0xc1]
1138 ; X86-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
1139 ; X86-NEXT: andw {{[0-9]+}}(%esp), %ax ## encoding: [0x66,0x23,0x44,0x24,0x04]
1140 ; X86-NEXT: ## kill: def $ax killed $ax killed $eax
1141 ; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
1142 ; X86-NEXT: retl ## encoding: [0xc3]
1144 ; X64-LABEL: test_mask_pcmpeq_d:
1146 ; X64-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x48,0x76,0xc1]
1147 ; X64-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
1148 ; X64-NEXT: andl %edi, %eax ## encoding: [0x21,0xf8]
1149 ; X64-NEXT: ## kill: def $ax killed $ax killed $eax
1150 ; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
1151 ; X64-NEXT: retq ## encoding: [0xc3]
1152 %res = call i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask)
1156 declare i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32>, <16 x i32>, i16)
1158 define i8 @test_pcmpeq_q(<8 x i64> %a, <8 x i64> %b) {
1159 ; CHECK-LABEL: test_pcmpeq_q:
1161 ; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x48,0x29,0xc1]
1162 ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
1163 ; CHECK-NEXT: ## kill: def $al killed $al killed $eax
1164 ; CHECK-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
1165 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1166 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64> %a, <8 x i64> %b, i8 -1)
1170 define i8 @test_mask_pcmpeq_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
1171 ; X86-LABEL: test_mask_pcmpeq_q:
1173 ; X86-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x48,0x29,0xc1]
1174 ; X86-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
1175 ; X86-NEXT: andb {{[0-9]+}}(%esp), %al ## encoding: [0x22,0x44,0x24,0x04]
1176 ; X86-NEXT: ## kill: def $al killed $al killed $eax
1177 ; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
1178 ; X86-NEXT: retl ## encoding: [0xc3]
1180 ; X64-LABEL: test_mask_pcmpeq_q:
1182 ; X64-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x48,0x29,0xc1]
1183 ; X64-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
1184 ; X64-NEXT: andb %dil, %al ## encoding: [0x40,0x20,0xf8]
1185 ; X64-NEXT: ## kill: def $al killed $al killed $eax
1186 ; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
1187 ; X64-NEXT: retq ## encoding: [0xc3]
1188 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
1192 declare i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64>, <8 x i64>, i8)
1194 define i16 @test_pcmpgt_d(<16 x i32> %a, <16 x i32> %b) {
1195 ; CHECK-LABEL: test_pcmpgt_d:
1197 ; CHECK-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x48,0x66,0xc1]
1198 ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
1199 ; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax
1200 ; CHECK-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
1201 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1202 %res = call i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32> %a, <16 x i32> %b, i16 -1)
1206 define i16 @test_mask_pcmpgt_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
1207 ; X86-LABEL: test_mask_pcmpgt_d:
1209 ; X86-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x48,0x66,0xc1]
1210 ; X86-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
1211 ; X86-NEXT: andw {{[0-9]+}}(%esp), %ax ## encoding: [0x66,0x23,0x44,0x24,0x04]
1212 ; X86-NEXT: ## kill: def $ax killed $ax killed $eax
1213 ; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
1214 ; X86-NEXT: retl ## encoding: [0xc3]
1216 ; X64-LABEL: test_mask_pcmpgt_d:
1218 ; X64-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x48,0x66,0xc1]
1219 ; X64-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
1220 ; X64-NEXT: andl %edi, %eax ## encoding: [0x21,0xf8]
1221 ; X64-NEXT: ## kill: def $ax killed $ax killed $eax
1222 ; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
1223 ; X64-NEXT: retq ## encoding: [0xc3]
1224 %res = call i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask)
1228 declare i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32>, <16 x i32>, i16)
1230 define i8 @test_pcmpgt_q(<8 x i64> %a, <8 x i64> %b) {
1231 ; CHECK-LABEL: test_pcmpgt_q:
1233 ; CHECK-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x48,0x37,0xc1]
1234 ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
1235 ; CHECK-NEXT: ## kill: def $al killed $al killed $eax
1236 ; CHECK-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
1237 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1238 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64> %a, <8 x i64> %b, i8 -1)
1242 define i8 @test_mask_pcmpgt_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
1243 ; X86-LABEL: test_mask_pcmpgt_q:
1245 ; X86-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x48,0x37,0xc1]
1246 ; X86-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
1247 ; X86-NEXT: andb {{[0-9]+}}(%esp), %al ## encoding: [0x22,0x44,0x24,0x04]
1248 ; X86-NEXT: ## kill: def $al killed $al killed $eax
1249 ; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
1250 ; X86-NEXT: retl ## encoding: [0xc3]
1252 ; X64-LABEL: test_mask_pcmpgt_q:
1254 ; X64-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x48,0x37,0xc1]
1255 ; X64-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
1256 ; X64-NEXT: andb %dil, %al ## encoding: [0x40,0x20,0xf8]
1257 ; X64-NEXT: ## kill: def $al killed $al killed $eax
1258 ; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
1259 ; X64-NEXT: retq ## encoding: [0xc3]
1260 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
1264 declare i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64>, <8 x i64>, i8)
1266 declare <8 x double> @llvm.x86.avx512.mask.unpckh.pd.512(<8 x double>, <8 x double>, <8 x double>, i8)
1268 define <8 x double>@test_int_x86_avx512_unpckh_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2) {
1269 ; CHECK-LABEL: test_int_x86_avx512_unpckh_pd_512:
1271 ; CHECK-NEXT: vunpckhpd %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x15,0xc1]
1272 ; CHECK-NEXT: ## zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
1273 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1274 %res = call <8 x double> @llvm.x86.avx512.mask.unpckh.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1)
1275 ret <8 x double> %res
1278 define <8 x double>@test_int_x86_avx512_mask_unpckh_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3) {
1279 ; X86-LABEL: test_int_x86_avx512_mask_unpckh_pd_512:
1281 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
1282 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
1283 ; X86-NEXT: vunpckhpd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x15,0xd1]
1284 ; X86-NEXT: ## zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
1285 ; X86-NEXT: vmovapd %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2]
1286 ; X86-NEXT: retl ## encoding: [0xc3]
1288 ; X64-LABEL: test_int_x86_avx512_mask_unpckh_pd_512:
1290 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1291 ; X64-NEXT: vunpckhpd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x15,0xd1]
1292 ; X64-NEXT: ## zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
1293 ; X64-NEXT: vmovapd %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2]
1294 ; X64-NEXT: retq ## encoding: [0xc3]
1295 %res = call <8 x double> @llvm.x86.avx512.mask.unpckh.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3)
1296 ret <8 x double> %res
1299 declare <16 x float> @llvm.x86.avx512.mask.unpckh.ps.512(<16 x float>, <16 x float>, <16 x float>, i16)
1301 define <16 x float>@test_int_x86_avx512_unpckh_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2) {
1302 ; CHECK-LABEL: test_int_x86_avx512_unpckh_ps_512:
1304 ; CHECK-NEXT: vunpckhps %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x15,0xc1]
1305 ; CHECK-NEXT: ## zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
1306 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1307 %res = call <16 x float> @llvm.x86.avx512.mask.unpckh.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1)
1308 ret <16 x float> %res
1311 define <16 x float>@test_int_x86_avx512_mask_unpckh_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) {
1312 ; X86-LABEL: test_int_x86_avx512_mask_unpckh_ps_512:
1314 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1315 ; X86-NEXT: vunpckhps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x15,0xd1]
1316 ; X86-NEXT: ## zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
1317 ; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
1318 ; X86-NEXT: retl ## encoding: [0xc3]
1320 ; X64-LABEL: test_int_x86_avx512_mask_unpckh_ps_512:
1322 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1323 ; X64-NEXT: vunpckhps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x15,0xd1]
1324 ; X64-NEXT: ## zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
1325 ; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
1326 ; X64-NEXT: retq ## encoding: [0xc3]
1327 %res = call <16 x float> @llvm.x86.avx512.mask.unpckh.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3)
1328 ret <16 x float> %res
1331 declare <8 x double> @llvm.x86.avx512.mask.unpckl.pd.512(<8 x double>, <8 x double>, <8 x double>, i8)
1333 define <8 x double>@test_int_x86_avx512_unpckl_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2) {
1334 ; CHECK-LABEL: test_int_x86_avx512_unpckl_pd_512:
1336 ; CHECK-NEXT: vunpcklpd %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x14,0xc1]
1337 ; CHECK-NEXT: ## zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
1338 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1339 %res = call <8 x double> @llvm.x86.avx512.mask.unpckl.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1)
1340 ret <8 x double> %res
1343 define <8 x double>@test_int_x86_avx512_mask_unpckl_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3) {
1344 ; X86-LABEL: test_int_x86_avx512_mask_unpckl_pd_512:
1346 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
1347 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
1348 ; X86-NEXT: vunpcklpd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x14,0xd1]
1349 ; X86-NEXT: ## zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
1350 ; X86-NEXT: vmovapd %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2]
1351 ; X86-NEXT: retl ## encoding: [0xc3]
1353 ; X64-LABEL: test_int_x86_avx512_mask_unpckl_pd_512:
1355 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1356 ; X64-NEXT: vunpcklpd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x14,0xd1]
1357 ; X64-NEXT: ## zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
1358 ; X64-NEXT: vmovapd %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2]
1359 ; X64-NEXT: retq ## encoding: [0xc3]
1360 %res = call <8 x double> @llvm.x86.avx512.mask.unpckl.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3)
1361 ret <8 x double> %res
1364 declare <16 x float> @llvm.x86.avx512.mask.unpckl.ps.512(<16 x float>, <16 x float>, <16 x float>, i16)
1366 define <16 x float>@test_int_x86_avx512_unpckl_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2) {
1367 ; CHECK-LABEL: test_int_x86_avx512_unpckl_ps_512:
1369 ; CHECK-NEXT: vunpcklps %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x14,0xc1]
1370 ; CHECK-NEXT: ## zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
1371 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1372 %res = call <16 x float> @llvm.x86.avx512.mask.unpckl.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1)
1373 ret <16 x float> %res
1376 define <16 x float>@test_int_x86_avx512_mask_unpckl_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) {
1377 ; X86-LABEL: test_int_x86_avx512_mask_unpckl_ps_512:
1379 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1380 ; X86-NEXT: vunpcklps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x14,0xd1]
1381 ; X86-NEXT: ## zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
1382 ; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
1383 ; X86-NEXT: retl ## encoding: [0xc3]
1385 ; X64-LABEL: test_int_x86_avx512_mask_unpckl_ps_512:
1387 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1388 ; X64-NEXT: vunpcklps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x14,0xd1]
1389 ; X64-NEXT: ## zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
1390 ; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
1391 ; X64-NEXT: retq ## encoding: [0xc3]
1392 %res = call <16 x float> @llvm.x86.avx512.mask.unpckl.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3)
1393 ret <16 x float> %res
1396 declare <8 x i64> @llvm.x86.avx512.mask.punpcklqd.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
1398 define <8 x i64>@test_int_x86_avx512_punpcklqd_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2) {
1399 ; CHECK-LABEL: test_int_x86_avx512_punpcklqd_q_512:
1401 ; CHECK-NEXT: vunpcklpd %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x14,0xc1]
1402 ; CHECK-NEXT: ## zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
1403 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1404 %res = call <8 x i64> @llvm.x86.avx512.mask.punpcklqd.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
1408 define <8 x i64>@test_int_x86_avx512_mask_punpcklqd_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
1409 ; X86-LABEL: test_int_x86_avx512_mask_punpcklqd_q_512:
1411 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
1412 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
1413 ; X86-NEXT: vpunpcklqdq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x6c,0xd1]
1414 ; X86-NEXT: ## zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
1415 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1416 ; X86-NEXT: retl ## encoding: [0xc3]
1418 ; X64-LABEL: test_int_x86_avx512_mask_punpcklqd_q_512:
1420 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1421 ; X64-NEXT: vpunpcklqdq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x6c,0xd1]
1422 ; X64-NEXT: ## zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
1423 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1424 ; X64-NEXT: retq ## encoding: [0xc3]
1425 %res = call <8 x i64> @llvm.x86.avx512.mask.punpcklqd.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
1429 define <8 x i64>@test_int_x86_avx512_maskz_punpcklqd_q_512(<8 x i64> %x0, <8 x i64> %x1, i8 %x3) {
1430 ; X86-LABEL: test_int_x86_avx512_maskz_punpcklqd_q_512:
1432 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
1433 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
1434 ; X86-NEXT: vpunpcklqdq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x6c,0xc1]
1435 ; X86-NEXT: ## zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
1436 ; X86-NEXT: retl ## encoding: [0xc3]
1438 ; X64-LABEL: test_int_x86_avx512_maskz_punpcklqd_q_512:
1440 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1441 ; X64-NEXT: vpunpcklqdq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x6c,0xc1]
1442 ; X64-NEXT: ## zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
1443 ; X64-NEXT: retq ## encoding: [0xc3]
1444 %res = call <8 x i64> @llvm.x86.avx512.mask.punpcklqd.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> zeroinitializer,i8 %x3)
1448 declare <8 x i64> @llvm.x86.avx512.mask.punpckhqd.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
1450 define <8 x i64>@test_int_x86_avx512_punpckhqd_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2) {
1451 ; CHECK-LABEL: test_int_x86_avx512_punpckhqd_q_512:
1453 ; CHECK-NEXT: vunpckhpd %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x15,0xc1]
1454 ; CHECK-NEXT: ## zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
1455 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1456 %res = call <8 x i64> @llvm.x86.avx512.mask.punpckhqd.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
1460 define <8 x i64>@test_int_x86_avx512_mask_punpckhqd_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
1461 ; X86-LABEL: test_int_x86_avx512_mask_punpckhqd_q_512:
1463 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
1464 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
1465 ; X86-NEXT: vpunpckhqdq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x6d,0xd1]
1466 ; X86-NEXT: ## zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
1467 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1468 ; X86-NEXT: retl ## encoding: [0xc3]
1470 ; X64-LABEL: test_int_x86_avx512_mask_punpckhqd_q_512:
1472 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1473 ; X64-NEXT: vpunpckhqdq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x6d,0xd1]
1474 ; X64-NEXT: ## zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
1475 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1476 ; X64-NEXT: retq ## encoding: [0xc3]
1477 %res = call <8 x i64> @llvm.x86.avx512.mask.punpckhqd.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
1481 declare <16 x i32> @llvm.x86.avx512.mask.punpckhd.q.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
1483 define <16 x i32>@test_int_x86_avx512_punpckhd_q_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) {
1484 ; CHECK-LABEL: test_int_x86_avx512_punpckhd_q_512:
1486 ; CHECK-NEXT: vunpckhps %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x15,0xc1]
1487 ; CHECK-NEXT: ## zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
1488 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1489 %res = call <16 x i32> @llvm.x86.avx512.mask.punpckhd.q.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
1493 define <16 x i32>@test_int_x86_avx512_mask_punpckhd_q_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
1494 ; X86-LABEL: test_int_x86_avx512_mask_punpckhd_q_512:
1496 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1497 ; X86-NEXT: vpunpckhdq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x6a,0xd1]
1498 ; X86-NEXT: ## zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
1499 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1500 ; X86-NEXT: retl ## encoding: [0xc3]
1502 ; X64-LABEL: test_int_x86_avx512_mask_punpckhd_q_512:
1504 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1505 ; X64-NEXT: vpunpckhdq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x6a,0xd1]
1506 ; X64-NEXT: ## zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
1507 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1508 ; X64-NEXT: retq ## encoding: [0xc3]
1509 %res = call <16 x i32> @llvm.x86.avx512.mask.punpckhd.q.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
1513 declare <16 x i32> @llvm.x86.avx512.mask.punpckld.q.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
1515 define <16 x i32>@test_int_x86_avx512_punpckld_q_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) {
1516 ; CHECK-LABEL: test_int_x86_avx512_punpckld_q_512:
1518 ; CHECK-NEXT: vunpcklps %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x14,0xc1]
1519 ; CHECK-NEXT: ## zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
1520 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1521 %res = call <16 x i32> @llvm.x86.avx512.mask.punpckld.q.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
1525 define <16 x i32>@test_int_x86_avx512_mask_punpckld_q_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
1526 ; X86-LABEL: test_int_x86_avx512_mask_punpckld_q_512:
1528 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1529 ; X86-NEXT: vpunpckldq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x62,0xd1]
1530 ; X86-NEXT: ## zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
1531 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1532 ; X86-NEXT: retl ## encoding: [0xc3]
1534 ; X64-LABEL: test_int_x86_avx512_mask_punpckld_q_512:
1536 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1537 ; X64-NEXT: vpunpckldq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x62,0xd1]
1538 ; X64-NEXT: ## zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
1539 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1540 ; X64-NEXT: retq ## encoding: [0xc3]
1541 %res = call <16 x i32> @llvm.x86.avx512.mask.punpckld.q.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
1545 define <16 x i32> @test_x86_avx512_pslli_d(<16 x i32> %a0) {
1546 ; CHECK-LABEL: test_x86_avx512_pslli_d:
1548 ; CHECK-NEXT: vpslld $7, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0x72,0xf0,0x07]
1549 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1550 %res = call <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 -1)
1554 define <16 x i32> @test_x86_avx512_mask_pslli_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
1555 ; X86-LABEL: test_x86_avx512_mask_pslli_d:
1557 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1558 ; X86-NEXT: vpslld $7, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xf0,0x07]
1559 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1560 ; X86-NEXT: retl ## encoding: [0xc3]
1562 ; X64-LABEL: test_x86_avx512_mask_pslli_d:
1564 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1565 ; X64-NEXT: vpslld $7, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xf0,0x07]
1566 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1567 ; X64-NEXT: retq ## encoding: [0xc3]
1568 %res = call <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32> %a0, i32 7, <16 x i32> %a1, i16 %mask)
1572 define <16 x i32> @test_x86_avx512_maskz_pslli_d(<16 x i32> %a0, i16 %mask) {
1573 ; X86-LABEL: test_x86_avx512_maskz_pslli_d:
1575 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1576 ; X86-NEXT: vpslld $7, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x72,0xf0,0x07]
1577 ; X86-NEXT: retl ## encoding: [0xc3]
1579 ; X64-LABEL: test_x86_avx512_maskz_pslli_d:
1581 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1582 ; X64-NEXT: vpslld $7, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x72,0xf0,0x07]
1583 ; X64-NEXT: retq ## encoding: [0xc3]
1584 %res = call <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 %mask)
1588 declare <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone
1590 define <8 x i64> @test_x86_avx512_pslli_q(<8 x i64> %a0) {
1591 ; CHECK-LABEL: test_x86_avx512_pslli_q:
1593 ; CHECK-NEXT: vpsllq $7, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x73,0xf0,0x07]
1594 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1595 %res = call <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 -1)
1599 define <8 x i64> @test_x86_avx512_mask_pslli_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
1600 ; X86-LABEL: test_x86_avx512_mask_pslli_q:
1602 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
1603 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
1604 ; X86-NEXT: vpsllq $7, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x73,0xf0,0x07]
1605 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1606 ; X86-NEXT: retl ## encoding: [0xc3]
1608 ; X64-LABEL: test_x86_avx512_mask_pslli_q:
1610 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1611 ; X64-NEXT: vpsllq $7, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x73,0xf0,0x07]
1612 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1613 ; X64-NEXT: retq ## encoding: [0xc3]
1614 %res = call <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64> %a0, i32 7, <8 x i64> %a1, i8 %mask)
1618 define <8 x i64> @test_x86_avx512_maskz_pslli_q(<8 x i64> %a0, i8 %mask) {
1619 ; X86-LABEL: test_x86_avx512_maskz_pslli_q:
1621 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
1622 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
1623 ; X86-NEXT: vpsllq $7, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x73,0xf0,0x07]
1624 ; X86-NEXT: retl ## encoding: [0xc3]
1626 ; X64-LABEL: test_x86_avx512_maskz_pslli_q:
1628 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1629 ; X64-NEXT: vpsllq $7, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x73,0xf0,0x07]
1630 ; X64-NEXT: retq ## encoding: [0xc3]
1631 %res = call <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 %mask)
1635 declare <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone
1637 define <16 x i32> @test_x86_avx512_psrli_d(<16 x i32> %a0) {
1638 ; CHECK-LABEL: test_x86_avx512_psrli_d:
1640 ; CHECK-NEXT: vpsrld $7, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0x72,0xd0,0x07]
1641 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1642 %res = call <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 -1)
1646 define <16 x i32> @test_x86_avx512_mask_psrli_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
1647 ; X86-LABEL: test_x86_avx512_mask_psrli_d:
1649 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1650 ; X86-NEXT: vpsrld $7, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xd0,0x07]
1651 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1652 ; X86-NEXT: retl ## encoding: [0xc3]
1654 ; X64-LABEL: test_x86_avx512_mask_psrli_d:
1656 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1657 ; X64-NEXT: vpsrld $7, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xd0,0x07]
1658 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1659 ; X64-NEXT: retq ## encoding: [0xc3]
1660 %res = call <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32> %a0, i32 7, <16 x i32> %a1, i16 %mask)
1664 define <16 x i32> @test_x86_avx512_maskz_psrli_d(<16 x i32> %a0, i16 %mask) {
1665 ; X86-LABEL: test_x86_avx512_maskz_psrli_d:
1667 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1668 ; X86-NEXT: vpsrld $7, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x72,0xd0,0x07]
1669 ; X86-NEXT: retl ## encoding: [0xc3]
1671 ; X64-LABEL: test_x86_avx512_maskz_psrli_d:
1673 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1674 ; X64-NEXT: vpsrld $7, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x72,0xd0,0x07]
1675 ; X64-NEXT: retq ## encoding: [0xc3]
1676 %res = call <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 %mask)
1680 declare <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone
1682 define <8 x i64> @test_x86_avx512_psrli_q(<8 x i64> %a0) {
1683 ; CHECK-LABEL: test_x86_avx512_psrli_q:
1685 ; CHECK-NEXT: vpsrlq $7, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x73,0xd0,0x07]
1686 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1687 %res = call <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 -1)
1691 define <8 x i64> @test_x86_avx512_mask_psrli_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
1692 ; X86-LABEL: test_x86_avx512_mask_psrli_q:
1694 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
1695 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
1696 ; X86-NEXT: vpsrlq $7, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x73,0xd0,0x07]
1697 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1698 ; X86-NEXT: retl ## encoding: [0xc3]
1700 ; X64-LABEL: test_x86_avx512_mask_psrli_q:
1702 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1703 ; X64-NEXT: vpsrlq $7, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x73,0xd0,0x07]
1704 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1705 ; X64-NEXT: retq ## encoding: [0xc3]
1706 %res = call <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64> %a0, i32 7, <8 x i64> %a1, i8 %mask)
1710 define <8 x i64> @test_x86_avx512_maskz_psrli_q(<8 x i64> %a0, i8 %mask) {
1711 ; X86-LABEL: test_x86_avx512_maskz_psrli_q:
1713 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
1714 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
1715 ; X86-NEXT: vpsrlq $7, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x73,0xd0,0x07]
1716 ; X86-NEXT: retl ## encoding: [0xc3]
1718 ; X64-LABEL: test_x86_avx512_maskz_psrli_q:
1720 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1721 ; X64-NEXT: vpsrlq $7, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x73,0xd0,0x07]
1722 ; X64-NEXT: retq ## encoding: [0xc3]
1723 %res = call <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 %mask)
1727 declare <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone
1729 define <16 x i32> @test_x86_avx512_psrai_d(<16 x i32> %a0) {
1730 ; CHECK-LABEL: test_x86_avx512_psrai_d:
1732 ; CHECK-NEXT: vpsrad $7, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0x72,0xe0,0x07]
1733 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1734 %res = call <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 -1)
1738 define <16 x i32> @test_x86_avx512_mask_psrai_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
1739 ; X86-LABEL: test_x86_avx512_mask_psrai_d:
1741 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1742 ; X86-NEXT: vpsrad $7, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xe0,0x07]
1743 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1744 ; X86-NEXT: retl ## encoding: [0xc3]
1746 ; X64-LABEL: test_x86_avx512_mask_psrai_d:
1748 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1749 ; X64-NEXT: vpsrad $7, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xe0,0x07]
1750 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1751 ; X64-NEXT: retq ## encoding: [0xc3]
1752 %res = call <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32> %a0, i32 7, <16 x i32> %a1, i16 %mask)
1756 define <16 x i32> @test_x86_avx512_maskz_psrai_d(<16 x i32> %a0, i16 %mask) {
1757 ; X86-LABEL: test_x86_avx512_maskz_psrai_d:
1759 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1760 ; X86-NEXT: vpsrad $7, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x72,0xe0,0x07]
1761 ; X86-NEXT: retl ## encoding: [0xc3]
1763 ; X64-LABEL: test_x86_avx512_maskz_psrai_d:
1765 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1766 ; X64-NEXT: vpsrad $7, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x72,0xe0,0x07]
1767 ; X64-NEXT: retq ## encoding: [0xc3]
1768 %res = call <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 %mask)
1772 declare <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone
1774 define <8 x i64> @test_x86_avx512_psrai_q(<8 x i64> %a0) {
1775 ; CHECK-LABEL: test_x86_avx512_psrai_q:
1777 ; CHECK-NEXT: vpsraq $7, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x72,0xe0,0x07]
1778 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1779 %res = call <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 -1)
1783 define <8 x i64> @test_x86_avx512_mask_psrai_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
1784 ; X86-LABEL: test_x86_avx512_mask_psrai_q:
1786 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
1787 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
1788 ; X86-NEXT: vpsraq $7, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x72,0xe0,0x07]
1789 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1790 ; X86-NEXT: retl ## encoding: [0xc3]
1792 ; X64-LABEL: test_x86_avx512_mask_psrai_q:
1794 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1795 ; X64-NEXT: vpsraq $7, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x72,0xe0,0x07]
1796 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
1797 ; X64-NEXT: retq ## encoding: [0xc3]
1798 %res = call <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64> %a0, i32 7, <8 x i64> %a1, i8 %mask)
1802 define <8 x i64> @test_x86_avx512_maskz_psrai_q(<8 x i64> %a0, i8 %mask) {
1803 ; X86-LABEL: test_x86_avx512_maskz_psrai_q:
1805 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
1806 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
1807 ; X86-NEXT: vpsraq $7, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x72,0xe0,0x07]
1808 ; X86-NEXT: retl ## encoding: [0xc3]
1810 ; X64-LABEL: test_x86_avx512_maskz_psrai_q:
1812 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1813 ; X64-NEXT: vpsraq $7, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x72,0xe0,0x07]
1814 ; X64-NEXT: retq ## encoding: [0xc3]
1815 %res = call <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 %mask)
1819 declare <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone
1821 declare void @llvm.x86.avx512.storent.q.512(i8*, <8 x i64>)
1823 define void@test_storent_q_512(<8 x i64> %data, i8* %ptr) {
1824 ; X86-LABEL: test_storent_q_512:
1826 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
1827 ; X86-NEXT: vmovntps %zmm0, (%eax) ## encoding: [0x62,0xf1,0x7c,0x48,0x2b,0x00]
1828 ; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
1829 ; X86-NEXT: retl ## encoding: [0xc3]
1831 ; X64-LABEL: test_storent_q_512:
1833 ; X64-NEXT: vmovntps %zmm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x48,0x2b,0x07]
1834 ; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
1835 ; X64-NEXT: retq ## encoding: [0xc3]
1836 call void @llvm.x86.avx512.storent.q.512(i8* %ptr, <8 x i64> %data)
1840 declare void @llvm.x86.avx512.storent.pd.512(i8*, <8 x double>)
1842 define void @test_storent_pd_512(<8 x double> %data, i8* %ptr) {
1843 ; X86-LABEL: test_storent_pd_512:
1845 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
1846 ; X86-NEXT: vmovntps %zmm0, (%eax) ## encoding: [0x62,0xf1,0x7c,0x48,0x2b,0x00]
1847 ; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
1848 ; X86-NEXT: retl ## encoding: [0xc3]
1850 ; X64-LABEL: test_storent_pd_512:
1852 ; X64-NEXT: vmovntps %zmm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x48,0x2b,0x07]
1853 ; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
1854 ; X64-NEXT: retq ## encoding: [0xc3]
1855 call void @llvm.x86.avx512.storent.pd.512(i8* %ptr, <8 x double> %data)
1859 declare void @llvm.x86.avx512.storent.ps.512(i8*, <16 x float>)
1861 define void @test_storent_ps_512(<16 x float> %data, i8* %ptr) {
1862 ; X86-LABEL: test_storent_ps_512:
1864 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
1865 ; X86-NEXT: vmovntps %zmm0, (%eax) ## encoding: [0x62,0xf1,0x7c,0x48,0x2b,0x00]
1866 ; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
1867 ; X86-NEXT: retl ## encoding: [0xc3]
1869 ; X64-LABEL: test_storent_ps_512:
1871 ; X64-NEXT: vmovntps %zmm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x48,0x2b,0x07]
1872 ; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
1873 ; X64-NEXT: retq ## encoding: [0xc3]
1874 call void @llvm.x86.avx512.storent.ps.512(i8* %ptr, <16 x float> %data)
1878 define <16 x i32> @test_xor_epi32(<16 x i32> %a, <16 x i32> %b) {
1879 ; CHECK-LABEL: test_xor_epi32:
1881 ; CHECK-NEXT: vpxord %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xef,0xc1]
1882 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1883 %res = call <16 x i32> @llvm.x86.avx512.mask.pxor.d.512(<16 x i32> %a,<16 x i32> %b, <16 x i32>zeroinitializer, i16 -1)
1884 ret < 16 x i32> %res
1887 define <16 x i32> @test_mask_xor_epi32(<16 x i32> %a,<16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
1888 ; X86-LABEL: test_mask_xor_epi32:
1890 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1891 ; X86-NEXT: vpxord %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xef,0xd1]
1892 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1893 ; X86-NEXT: retl ## encoding: [0xc3]
1895 ; X64-LABEL: test_mask_xor_epi32:
1897 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1898 ; X64-NEXT: vpxord %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xef,0xd1]
1899 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1900 ; X64-NEXT: retq ## encoding: [0xc3]
1901 %res = call <16 x i32> @llvm.x86.avx512.mask.pxor.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
1902 ret < 16 x i32> %res
1905 declare <16 x i32> @llvm.x86.avx512.mask.pxor.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
1907 define <16 x i32> @test_or_epi32(<16 x i32> %a, <16 x i32> %b) {
1908 ; CHECK-LABEL: test_or_epi32:
1910 ; CHECK-NEXT: vpord %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xeb,0xc1]
1911 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1912 %res = call <16 x i32> @llvm.x86.avx512.mask.por.d.512(<16 x i32> %a,<16 x i32> %b, <16 x i32>zeroinitializer, i16 -1)
1913 ret < 16 x i32> %res
1916 define <16 x i32> @test_mask_or_epi32(<16 x i32> %a,<16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
1917 ; X86-LABEL: test_mask_or_epi32:
1919 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1920 ; X86-NEXT: vpord %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xeb,0xd1]
1921 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1922 ; X86-NEXT: retl ## encoding: [0xc3]
1924 ; X64-LABEL: test_mask_or_epi32:
1926 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1927 ; X64-NEXT: vpord %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xeb,0xd1]
1928 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1929 ; X64-NEXT: retq ## encoding: [0xc3]
1930 %res = call <16 x i32> @llvm.x86.avx512.mask.por.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
1931 ret < 16 x i32> %res
1934 declare <16 x i32> @llvm.x86.avx512.mask.por.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
1936 define <16 x i32> @test_and_epi32(<16 x i32> %a, <16 x i32> %b) {
1937 ; CHECK-LABEL: test_and_epi32:
1939 ; CHECK-NEXT: vpandd %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xdb,0xc1]
1940 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1941 %res = call <16 x i32> @llvm.x86.avx512.mask.pand.d.512(<16 x i32> %a,<16 x i32> %b, <16 x i32>zeroinitializer, i16 -1)
1942 ret < 16 x i32> %res
1945 define <16 x i32> @test_mask_and_epi32(<16 x i32> %a,<16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
1946 ; X86-LABEL: test_mask_and_epi32:
1948 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1949 ; X86-NEXT: vpandd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xdb,0xd1]
1950 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1951 ; X86-NEXT: retl ## encoding: [0xc3]
1953 ; X64-LABEL: test_mask_and_epi32:
1955 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1956 ; X64-NEXT: vpandd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xdb,0xd1]
1957 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1958 ; X64-NEXT: retq ## encoding: [0xc3]
1959 %res = call <16 x i32> @llvm.x86.avx512.mask.pand.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
1960 ret < 16 x i32> %res
1963 declare <16 x i32> @llvm.x86.avx512.mask.pand.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
1965 define <8 x i64> @test_xor_epi64(<8 x i64> %a, <8 x i64> %b) {
1966 ; CHECK-LABEL: test_xor_epi64:
1968 ; CHECK-NEXT: vpxorq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xef,0xc1]
1969 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1970 %res = call <8 x i64> @llvm.x86.avx512.mask.pxor.q.512(<8 x i64> %a,<8 x i64> %b, <8 x i64>zeroinitializer, i8 -1)
1974 define <8 x i64> @test_mask_xor_epi64(<8 x i64> %a,<8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
1975 ; X86-LABEL: test_mask_xor_epi64:
1977 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
1978 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
1979 ; X86-NEXT: vpxorq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xef,0xd1]
1980 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1981 ; X86-NEXT: retl ## encoding: [0xc3]
1983 ; X64-LABEL: test_mask_xor_epi64:
1985 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1986 ; X64-NEXT: vpxorq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xef,0xd1]
1987 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
1988 ; X64-NEXT: retq ## encoding: [0xc3]
1989 %res = call <8 x i64> @llvm.x86.avx512.mask.pxor.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
1993 declare <8 x i64> @llvm.x86.avx512.mask.pxor.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
1995 define <8 x i64> @test_or_epi64(<8 x i64> %a, <8 x i64> %b) {
1996 ; CHECK-LABEL: test_or_epi64:
1998 ; CHECK-NEXT: vporq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xeb,0xc1]
1999 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
2000 %res = call <8 x i64> @llvm.x86.avx512.mask.por.q.512(<8 x i64> %a,<8 x i64> %b, <8 x i64>zeroinitializer, i8 -1)
2004 define <8 x i64> @test_mask_or_epi64(<8 x i64> %a,<8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
2005 ; X86-LABEL: test_mask_or_epi64:
2007 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
2008 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
2009 ; X86-NEXT: vporq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xeb,0xd1]
2010 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
2011 ; X86-NEXT: retl ## encoding: [0xc3]
2013 ; X64-LABEL: test_mask_or_epi64:
2015 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2016 ; X64-NEXT: vporq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xeb,0xd1]
2017 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
2018 ; X64-NEXT: retq ## encoding: [0xc3]
2019 %res = call <8 x i64> @llvm.x86.avx512.mask.por.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
2023 declare <8 x i64> @llvm.x86.avx512.mask.por.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
2025 define <8 x i64> @test_and_epi64(<8 x i64> %a, <8 x i64> %b) {
2026 ; CHECK-LABEL: test_and_epi64:
2028 ; CHECK-NEXT: vpandq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xdb,0xc1]
2029 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
2030 %res = call <8 x i64> @llvm.x86.avx512.mask.pand.q.512(<8 x i64> %a,<8 x i64> %b, <8 x i64>zeroinitializer, i8 -1)
2034 define <8 x i64> @test_mask_and_epi64(<8 x i64> %a,<8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
2035 ; X86-LABEL: test_mask_and_epi64:
2037 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
2038 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
2039 ; X86-NEXT: vpandq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xdb,0xd1]
2040 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
2041 ; X86-NEXT: retl ## encoding: [0xc3]
2043 ; X64-LABEL: test_mask_and_epi64:
2045 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2046 ; X64-NEXT: vpandq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xdb,0xd1]
2047 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
2048 ; X64-NEXT: retq ## encoding: [0xc3]
2049 %res = call <8 x i64> @llvm.x86.avx512.mask.pand.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
2053 declare <8 x i64> @llvm.x86.avx512.mask.pand.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
2055 define <16 x i32> @test_mask_add_epi32_rr(<16 x i32> %a, <16 x i32> %b) {
2056 ; CHECK-LABEL: test_mask_add_epi32_rr:
2058 ; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc1]
2059 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
2060 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
2061 ret < 16 x i32> %res
2064 define <16 x i32> @test_mask_add_epi32_rrk(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
2065 ; X86-LABEL: test_mask_add_epi32_rrk:
2067 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2068 ; X86-NEXT: vpaddd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfe,0xd1]
2069 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
2070 ; X86-NEXT: retl ## encoding: [0xc3]
2072 ; X64-LABEL: test_mask_add_epi32_rrk:
2074 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2075 ; X64-NEXT: vpaddd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfe,0xd1]
2076 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
2077 ; X64-NEXT: retq ## encoding: [0xc3]
2078 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
2079 ret < 16 x i32> %res
2082 define <16 x i32> @test_mask_add_epi32_rrkz(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
2083 ; X86-LABEL: test_mask_add_epi32_rrkz:
2085 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2086 ; X86-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfe,0xc1]
2087 ; X86-NEXT: retl ## encoding: [0xc3]
2089 ; X64-LABEL: test_mask_add_epi32_rrkz:
2091 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2092 ; X64-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfe,0xc1]
2093 ; X64-NEXT: retq ## encoding: [0xc3]
2094 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
2095 ret < 16 x i32> %res
2098 define <16 x i32> @test_mask_add_epi32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) {
2099 ; X86-LABEL: test_mask_add_epi32_rm:
2101 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
2102 ; X86-NEXT: vpaddd (%eax), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0x00]
2103 ; X86-NEXT: retl ## encoding: [0xc3]
2105 ; X64-LABEL: test_mask_add_epi32_rm:
2107 ; X64-NEXT: vpaddd (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0x07]
2108 ; X64-NEXT: retq ## encoding: [0xc3]
2109 %b = load <16 x i32>, <16 x i32>* %ptr_b
2110 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
2111 ret < 16 x i32> %res
2114 define <16 x i32> @test_mask_add_epi32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <16 x i32> %passThru, i16 %mask) {
2115 ; X86-LABEL: test_mask_add_epi32_rmk:
2117 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
2118 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
2119 ; X86-NEXT: vpaddd (%eax), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfe,0x08]
2120 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
2121 ; X86-NEXT: retl ## encoding: [0xc3]
2123 ; X64-LABEL: test_mask_add_epi32_rmk:
2125 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
2126 ; X64-NEXT: vpaddd (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfe,0x0f]
2127 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
2128 ; X64-NEXT: retq ## encoding: [0xc3]
2129 %b = load <16 x i32>, <16 x i32>* %ptr_b
2130 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
2131 ret < 16 x i32> %res
2134 define <16 x i32> @test_mask_add_epi32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i16 %mask) {
2135 ; X86-LABEL: test_mask_add_epi32_rmkz:
2137 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
2138 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
2139 ; X86-NEXT: vpaddd (%eax), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfe,0x00]
2140 ; X86-NEXT: retl ## encoding: [0xc3]
2142 ; X64-LABEL: test_mask_add_epi32_rmkz:
2144 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
2145 ; X64-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfe,0x07]
2146 ; X64-NEXT: retq ## encoding: [0xc3]
2147 %b = load <16 x i32>, <16 x i32>* %ptr_b
2148 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
2149 ret < 16 x i32> %res
2152 define <16 x i32> @test_mask_add_epi32_rmb(<16 x i32> %a, i32* %ptr_b) {
2153 ; X86-LABEL: test_mask_add_epi32_rmb:
2155 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
2156 ; X86-NEXT: vpaddd (%eax){1to16}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x58,0xfe,0x00]
2157 ; X86-NEXT: retl ## encoding: [0xc3]
2159 ; X64-LABEL: test_mask_add_epi32_rmb:
2161 ; X64-NEXT: vpaddd (%rdi){1to16}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x58,0xfe,0x07]
2162 ; X64-NEXT: retq ## encoding: [0xc3]
2163 %q = load i32, i32* %ptr_b
2164 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
2165 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
2166 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
2167 ret < 16 x i32> %res
2170 define <16 x i32> @test_mask_add_epi32_rmbk(<16 x i32> %a, i32* %ptr_b, <16 x i32> %passThru, i16 %mask) {
2171 ; X86-LABEL: test_mask_add_epi32_rmbk:
2173 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
2174 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
2175 ; X86-NEXT: vpaddd (%eax){1to16}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x59,0xfe,0x08]
2176 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
2177 ; X86-NEXT: retl ## encoding: [0xc3]
2179 ; X64-LABEL: test_mask_add_epi32_rmbk:
2181 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
2182 ; X64-NEXT: vpaddd (%rdi){1to16}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x59,0xfe,0x0f]
2183 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
2184 ; X64-NEXT: retq ## encoding: [0xc3]
2185 %q = load i32, i32* %ptr_b
2186 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
2187 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
2188 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
2189 ret < 16 x i32> %res
2192 define <16 x i32> @test_mask_add_epi32_rmbkz(<16 x i32> %a, i32* %ptr_b, i16 %mask) {
2193 ; X86-LABEL: test_mask_add_epi32_rmbkz:
2195 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
2196 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
2197 ; X86-NEXT: vpaddd (%eax){1to16}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xd9,0xfe,0x00]
2198 ; X86-NEXT: retl ## encoding: [0xc3]
2200 ; X64-LABEL: test_mask_add_epi32_rmbkz:
2202 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
2203 ; X64-NEXT: vpaddd (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xd9,0xfe,0x07]
2204 ; X64-NEXT: retq ## encoding: [0xc3]
2205 %q = load i32, i32* %ptr_b
2206 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
2207 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
2208 %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
2209 ret < 16 x i32> %res
2212 declare <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
2214 define <16 x i32> @test_mask_sub_epi32_rr(<16 x i32> %a, <16 x i32> %b) {
2215 ; CHECK-LABEL: test_mask_sub_epi32_rr:
2217 ; CHECK-NEXT: vpsubd %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfa,0xc1]
2218 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
2219 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
2220 ret < 16 x i32> %res
2223 define <16 x i32> @test_mask_sub_epi32_rrk(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
2224 ; X86-LABEL: test_mask_sub_epi32_rrk:
2226 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2227 ; X86-NEXT: vpsubd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfa,0xd1]
2228 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
2229 ; X86-NEXT: retl ## encoding: [0xc3]
2231 ; X64-LABEL: test_mask_sub_epi32_rrk:
2233 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2234 ; X64-NEXT: vpsubd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfa,0xd1]
2235 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
2236 ; X64-NEXT: retq ## encoding: [0xc3]
2237 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
2238 ret < 16 x i32> %res
2241 define <16 x i32> @test_mask_sub_epi32_rrkz(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
2242 ; X86-LABEL: test_mask_sub_epi32_rrkz:
2244 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2245 ; X86-NEXT: vpsubd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfa,0xc1]
2246 ; X86-NEXT: retl ## encoding: [0xc3]
2248 ; X64-LABEL: test_mask_sub_epi32_rrkz:
2250 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2251 ; X64-NEXT: vpsubd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfa,0xc1]
2252 ; X64-NEXT: retq ## encoding: [0xc3]
2253 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
2254 ret < 16 x i32> %res
2257 define <16 x i32> @test_mask_sub_epi32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) {
2258 ; X86-LABEL: test_mask_sub_epi32_rm:
2260 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
2261 ; X86-NEXT: vpsubd (%eax), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfa,0x00]
2262 ; X86-NEXT: retl ## encoding: [0xc3]
2264 ; X64-LABEL: test_mask_sub_epi32_rm:
2266 ; X64-NEXT: vpsubd (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfa,0x07]
2267 ; X64-NEXT: retq ## encoding: [0xc3]
2268 %b = load <16 x i32>, <16 x i32>* %ptr_b
2269 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
2270 ret < 16 x i32> %res
2273 define <16 x i32> @test_mask_sub_epi32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <16 x i32> %passThru, i16 %mask) {
2274 ; X86-LABEL: test_mask_sub_epi32_rmk:
2276 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
2277 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
2278 ; X86-NEXT: vpsubd (%eax), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfa,0x08]
2279 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
2280 ; X86-NEXT: retl ## encoding: [0xc3]
2282 ; X64-LABEL: test_mask_sub_epi32_rmk:
2284 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
2285 ; X64-NEXT: vpsubd (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfa,0x0f]
2286 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
2287 ; X64-NEXT: retq ## encoding: [0xc3]
2288 %b = load <16 x i32>, <16 x i32>* %ptr_b
2289 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
2290 ret < 16 x i32> %res
2293 define <16 x i32> @test_mask_sub_epi32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i16 %mask) {
2294 ; X86-LABEL: test_mask_sub_epi32_rmkz:
2296 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
2297 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
2298 ; X86-NEXT: vpsubd (%eax), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfa,0x00]
2299 ; X86-NEXT: retl ## encoding: [0xc3]
2301 ; X64-LABEL: test_mask_sub_epi32_rmkz:
2303 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
2304 ; X64-NEXT: vpsubd (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfa,0x07]
2305 ; X64-NEXT: retq ## encoding: [0xc3]
2306 %b = load <16 x i32>, <16 x i32>* %ptr_b
2307 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
2308 ret < 16 x i32> %res
2311 define <16 x i32> @test_mask_sub_epi32_rmb(<16 x i32> %a, i32* %ptr_b) {
2312 ; X86-LABEL: test_mask_sub_epi32_rmb:
2314 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
2315 ; X86-NEXT: vpsubd (%eax){1to16}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x58,0xfa,0x00]
2316 ; X86-NEXT: retl ## encoding: [0xc3]
2318 ; X64-LABEL: test_mask_sub_epi32_rmb:
2320 ; X64-NEXT: vpsubd (%rdi){1to16}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x58,0xfa,0x07]
2321 ; X64-NEXT: retq ## encoding: [0xc3]
2322 %q = load i32, i32* %ptr_b
2323 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
2324 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
2325 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
2326 ret < 16 x i32> %res
2329 define <16 x i32> @test_mask_sub_epi32_rmbk(<16 x i32> %a, i32* %ptr_b, <16 x i32> %passThru, i16 %mask) {
2330 ; X86-LABEL: test_mask_sub_epi32_rmbk:
2332 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
2333 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
2334 ; X86-NEXT: vpsubd (%eax){1to16}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x59,0xfa,0x08]
2335 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
2336 ; X86-NEXT: retl ## encoding: [0xc3]
2338 ; X64-LABEL: test_mask_sub_epi32_rmbk:
2340 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
2341 ; X64-NEXT: vpsubd (%rdi){1to16}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x59,0xfa,0x0f]
2342 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
2343 ; X64-NEXT: retq ## encoding: [0xc3]
2344 %q = load i32, i32* %ptr_b
2345 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
2346 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
2347 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
2348 ret < 16 x i32> %res
2351 define <16 x i32> @test_mask_sub_epi32_rmbkz(<16 x i32> %a, i32* %ptr_b, i16 %mask) {
2352 ; X86-LABEL: test_mask_sub_epi32_rmbkz:
2354 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
2355 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
2356 ; X86-NEXT: vpsubd (%eax){1to16}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xd9,0xfa,0x00]
2357 ; X86-NEXT: retl ## encoding: [0xc3]
2359 ; X64-LABEL: test_mask_sub_epi32_rmbkz:
2361 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
2362 ; X64-NEXT: vpsubd (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xd9,0xfa,0x07]
2363 ; X64-NEXT: retq ## encoding: [0xc3]
2364 %q = load i32, i32* %ptr_b
2365 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
2366 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
2367 %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
2368 ret < 16 x i32> %res
2371 declare <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
2373 define <8 x i64> @test_mask_add_epi64_rr(<8 x i64> %a, <8 x i64> %b) {
2374 ; CHECK-LABEL: test_mask_add_epi64_rr:
2376 ; CHECK-NEXT: vpaddq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc1]
2377 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
2378 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
2382 define <8 x i64> @test_mask_add_epi64_rrk(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
2383 ; X86-LABEL: test_mask_add_epi64_rrk:
2385 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
2386 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
2387 ; X86-NEXT: vpaddq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xd4,0xd1]
2388 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
2389 ; X86-NEXT: retl ## encoding: [0xc3]
2391 ; X64-LABEL: test_mask_add_epi64_rrk:
2393 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2394 ; X64-NEXT: vpaddq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xd4,0xd1]
2395 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
2396 ; X64-NEXT: retq ## encoding: [0xc3]
2397 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
2401 define <8 x i64> @test_mask_add_epi64_rrkz(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
2402 ; X86-LABEL: test_mask_add_epi64_rrkz:
2404 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
2405 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
2406 ; X86-NEXT: vpaddq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xd4,0xc1]
2407 ; X86-NEXT: retl ## encoding: [0xc3]
2409 ; X64-LABEL: test_mask_add_epi64_rrkz:
2411 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2412 ; X64-NEXT: vpaddq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xd4,0xc1]
2413 ; X64-NEXT: retq ## encoding: [0xc3]
2414 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
2418 define <8 x i64> @test_mask_add_epi64_rm(<8 x i64> %a, <8 x i64>* %ptr_b) {
2419 ; X86-LABEL: test_mask_add_epi64_rm:
2421 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
2422 ; X86-NEXT: vpaddq (%eax), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0x00]
2423 ; X86-NEXT: retl ## encoding: [0xc3]
2425 ; X64-LABEL: test_mask_add_epi64_rm:
2427 ; X64-NEXT: vpaddq (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0x07]
2428 ; X64-NEXT: retq ## encoding: [0xc3]
2429 %b = load <8 x i64>, <8 x i64>* %ptr_b
2430 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
2434 define <8 x i64> @test_mask_add_epi64_rmk(<8 x i64> %a, <8 x i64>* %ptr_b, <8 x i64> %passThru, i8 %mask) {
2435 ; X86-LABEL: test_mask_add_epi64_rmk:
2437 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
2438 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
2439 ; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
2440 ; X86-NEXT: vpaddq (%eax), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xd4,0x08]
2441 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
2442 ; X86-NEXT: retl ## encoding: [0xc3]
2444 ; X64-LABEL: test_mask_add_epi64_rmk:
2446 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
2447 ; X64-NEXT: vpaddq (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xd4,0x0f]
2448 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
2449 ; X64-NEXT: retq ## encoding: [0xc3]
2450 %b = load <8 x i64>, <8 x i64>* %ptr_b
2451 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
2455 define <8 x i64> @test_mask_add_epi64_rmkz(<8 x i64> %a, <8 x i64>* %ptr_b, i8 %mask) {
2456 ; X86-LABEL: test_mask_add_epi64_rmkz:
2458 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
2459 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
2460 ; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
2461 ; X86-NEXT: vpaddq (%eax), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xd4,0x00]
2462 ; X86-NEXT: retl ## encoding: [0xc3]
2464 ; X64-LABEL: test_mask_add_epi64_rmkz:
2466 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
2467 ; X64-NEXT: vpaddq (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xd4,0x07]
2468 ; X64-NEXT: retq ## encoding: [0xc3]
2469 %b = load <8 x i64>, <8 x i64>* %ptr_b
2470 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
2474 define <8 x i64> @test_mask_add_epi64_rmb(<8 x i64> %a, i64* %ptr_b) {
2475 ; X86-LABEL: test_mask_add_epi64_rmb:
2477 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
2478 ; X86-NEXT: vpaddq (%eax){1to8}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x58,0xd4,0x00]
2479 ; X86-NEXT: retl ## encoding: [0xc3]
2481 ; X64-LABEL: test_mask_add_epi64_rmb:
2483 ; X64-NEXT: vpaddq (%rdi){1to8}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x58,0xd4,0x07]
2484 ; X64-NEXT: retq ## encoding: [0xc3]
2485 %q = load i64, i64* %ptr_b
2486 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
2487 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
2488 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
2492 define <8 x i64> @test_mask_add_epi64_rmbk(<8 x i64> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) {
2493 ; X86-LABEL: test_mask_add_epi64_rmbk:
2495 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
2496 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
2497 ; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
2498 ; X86-NEXT: vpaddq (%eax){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x59,0xd4,0x08]
2499 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
2500 ; X86-NEXT: retl ## encoding: [0xc3]
2502 ; X64-LABEL: test_mask_add_epi64_rmbk:
2504 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
2505 ; X64-NEXT: vpaddq (%rdi){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x59,0xd4,0x0f]
2506 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
2507 ; X64-NEXT: retq ## encoding: [0xc3]
2508 %q = load i64, i64* %ptr_b
2509 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
2510 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
2511 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
2515 define <8 x i64> @test_mask_add_epi64_rmbkz(<8 x i64> %a, i64* %ptr_b, i8 %mask) {
2516 ; X86-LABEL: test_mask_add_epi64_rmbkz:
2518 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
2519 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
2520 ; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
2521 ; X86-NEXT: vpaddq (%eax){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xd9,0xd4,0x00]
2522 ; X86-NEXT: retl ## encoding: [0xc3]
2524 ; X64-LABEL: test_mask_add_epi64_rmbkz:
2526 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
2527 ; X64-NEXT: vpaddq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xd9,0xd4,0x07]
2528 ; X64-NEXT: retq ## encoding: [0xc3]
2529 %q = load i64, i64* %ptr_b
2530 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
2531 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
2532 %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
2536 declare <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
2538 define <8 x i64> @test_mask_sub_epi64_rr(<8 x i64> %a, <8 x i64> %b) {
2539 ; CHECK-LABEL: test_mask_sub_epi64_rr:
2541 ; CHECK-NEXT: vpsubq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xfb,0xc1]
2542 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
2543 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
2547 define <8 x i64> @test_mask_sub_epi64_rrk(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
2548 ; X86-LABEL: test_mask_sub_epi64_rrk:
2550 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
2551 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
2552 ; X86-NEXT: vpsubq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xfb,0xd1]
2553 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
2554 ; X86-NEXT: retl ## encoding: [0xc3]
2556 ; X64-LABEL: test_mask_sub_epi64_rrk:
2558 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2559 ; X64-NEXT: vpsubq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xfb,0xd1]
2560 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
2561 ; X64-NEXT: retq ## encoding: [0xc3]
2562 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
2566 define <8 x i64> @test_mask_sub_epi64_rrkz(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
2567 ; X86-LABEL: test_mask_sub_epi64_rrkz:
2569 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
2570 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
2571 ; X86-NEXT: vpsubq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xfb,0xc1]
2572 ; X86-NEXT: retl ## encoding: [0xc3]
2574 ; X64-LABEL: test_mask_sub_epi64_rrkz:
2576 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2577 ; X64-NEXT: vpsubq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xfb,0xc1]
2578 ; X64-NEXT: retq ## encoding: [0xc3]
2579 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
2583 define <8 x i64> @test_mask_sub_epi64_rm(<8 x i64> %a, <8 x i64>* %ptr_b) {
2584 ; X86-LABEL: test_mask_sub_epi64_rm:
2586 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
2587 ; X86-NEXT: vpsubq (%eax), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xfb,0x00]
2588 ; X86-NEXT: retl ## encoding: [0xc3]
2590 ; X64-LABEL: test_mask_sub_epi64_rm:
2592 ; X64-NEXT: vpsubq (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xfb,0x07]
2593 ; X64-NEXT: retq ## encoding: [0xc3]
2594 %b = load <8 x i64>, <8 x i64>* %ptr_b
2595 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
2599 define <8 x i64> @test_mask_sub_epi64_rmk(<8 x i64> %a, <8 x i64>* %ptr_b, <8 x i64> %passThru, i8 %mask) {
2600 ; X86-LABEL: test_mask_sub_epi64_rmk:
2602 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
2603 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
2604 ; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
2605 ; X86-NEXT: vpsubq (%eax), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xfb,0x08]
2606 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
2607 ; X86-NEXT: retl ## encoding: [0xc3]
2609 ; X64-LABEL: test_mask_sub_epi64_rmk:
2611 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
2612 ; X64-NEXT: vpsubq (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xfb,0x0f]
2613 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
2614 ; X64-NEXT: retq ## encoding: [0xc3]
2615 %b = load <8 x i64>, <8 x i64>* %ptr_b
2616 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
2620 define <8 x i64> @test_mask_sub_epi64_rmkz(<8 x i64> %a, <8 x i64>* %ptr_b, i8 %mask) {
2621 ; X86-LABEL: test_mask_sub_epi64_rmkz:
2623 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
2624 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
2625 ; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
2626 ; X86-NEXT: vpsubq (%eax), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xfb,0x00]
2627 ; X86-NEXT: retl ## encoding: [0xc3]
2629 ; X64-LABEL: test_mask_sub_epi64_rmkz:
2631 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
2632 ; X64-NEXT: vpsubq (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xfb,0x07]
2633 ; X64-NEXT: retq ## encoding: [0xc3]
2634 %b = load <8 x i64>, <8 x i64>* %ptr_b
2635 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
2639 define <8 x i64> @test_mask_sub_epi64_rmb(<8 x i64> %a, i64* %ptr_b) {
2640 ; X86-LABEL: test_mask_sub_epi64_rmb:
2642 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
2643 ; X86-NEXT: vpsubq (%eax){1to8}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x58,0xfb,0x00]
2644 ; X86-NEXT: retl ## encoding: [0xc3]
2646 ; X64-LABEL: test_mask_sub_epi64_rmb:
2648 ; X64-NEXT: vpsubq (%rdi){1to8}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x58,0xfb,0x07]
2649 ; X64-NEXT: retq ## encoding: [0xc3]
2650 %q = load i64, i64* %ptr_b
2651 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
2652 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
2653 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
2657 define <8 x i64> @test_mask_sub_epi64_rmbk(<8 x i64> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) {
2658 ; X86-LABEL: test_mask_sub_epi64_rmbk:
2660 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
2661 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
2662 ; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
2663 ; X86-NEXT: vpsubq (%eax){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x59,0xfb,0x08]
2664 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
2665 ; X86-NEXT: retl ## encoding: [0xc3]
2667 ; X64-LABEL: test_mask_sub_epi64_rmbk:
2669 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
2670 ; X64-NEXT: vpsubq (%rdi){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x59,0xfb,0x0f]
2671 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
2672 ; X64-NEXT: retq ## encoding: [0xc3]
2673 %q = load i64, i64* %ptr_b
2674 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
2675 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
2676 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
2680 define <8 x i64> @test_mask_sub_epi64_rmbkz(<8 x i64> %a, i64* %ptr_b, i8 %mask) {
2681 ; X86-LABEL: test_mask_sub_epi64_rmbkz:
2683 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
2684 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
2685 ; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
2686 ; X86-NEXT: vpsubq (%eax){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xd9,0xfb,0x00]
2687 ; X86-NEXT: retl ## encoding: [0xc3]
2689 ; X64-LABEL: test_mask_sub_epi64_rmbkz:
2691 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
2692 ; X64-NEXT: vpsubq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xd9,0xfb,0x07]
2693 ; X64-NEXT: retq ## encoding: [0xc3]
2694 %q = load i64, i64* %ptr_b
2695 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
2696 %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
2697 %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
2701 declare <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
2703 define <16 x i32> @test_mask_mullo_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) {
2704 ; CHECK-LABEL: test_mask_mullo_epi32_rr_512:
2706 ; CHECK-NEXT: vpmulld %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x40,0xc1]
2707 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
2708 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
2712 define <16 x i32> @test_mask_mullo_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
2713 ; X86-LABEL: test_mask_mullo_epi32_rrk_512:
2715 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2716 ; X86-NEXT: vpmulld %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x40,0xd1]
2717 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
2718 ; X86-NEXT: retl ## encoding: [0xc3]
2720 ; X64-LABEL: test_mask_mullo_epi32_rrk_512:
2722 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2723 ; X64-NEXT: vpmulld %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x40,0xd1]
2724 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
2725 ; X64-NEXT: retq ## encoding: [0xc3]
2726 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
2727 ret < 16 x i32> %res
2730 define <16 x i32> @test_mask_mullo_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
2731 ; X86-LABEL: test_mask_mullo_epi32_rrkz_512:
2733 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2734 ; X86-NEXT: vpmulld %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x40,0xc1]
2735 ; X86-NEXT: retl ## encoding: [0xc3]
2737 ; X64-LABEL: test_mask_mullo_epi32_rrkz_512:
2739 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2740 ; X64-NEXT: vpmulld %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x40,0xc1]
2741 ; X64-NEXT: retq ## encoding: [0xc3]
2742 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
2743 ret < 16 x i32> %res
2746 define <16 x i32> @test_mask_mullo_epi32_rm_512(<16 x i32> %a, <16 x i32>* %ptr_b) {
2747 ; X86-LABEL: test_mask_mullo_epi32_rm_512:
2749 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
2750 ; X86-NEXT: vpmulld (%eax), %zmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x40,0x00]
2751 ; X86-NEXT: retl ## encoding: [0xc3]
2753 ; X64-LABEL: test_mask_mullo_epi32_rm_512:
2755 ; X64-NEXT: vpmulld (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x40,0x07]
2756 ; X64-NEXT: retq ## encoding: [0xc3]
2757 %b = load <16 x i32>, <16 x i32>* %ptr_b
2758 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
2759 ret < 16 x i32> %res
2762 define <16 x i32> @test_mask_mullo_epi32_rmk_512(<16 x i32> %a, <16 x i32>* %ptr_b, <16 x i32> %passThru, i16 %mask) {
2763 ; X86-LABEL: test_mask_mullo_epi32_rmk_512:
2765 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
2766 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
2767 ; X86-NEXT: vpmulld (%eax), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x40,0x08]
2768 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
2769 ; X86-NEXT: retl ## encoding: [0xc3]
2771 ; X64-LABEL: test_mask_mullo_epi32_rmk_512:
2773 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
2774 ; X64-NEXT: vpmulld (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x40,0x0f]
2775 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
2776 ; X64-NEXT: retq ## encoding: [0xc3]
2777 %b = load <16 x i32>, <16 x i32>* %ptr_b
2778 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
2779 ret < 16 x i32> %res
2782 define <16 x i32> @test_mask_mullo_epi32_rmkz_512(<16 x i32> %a, <16 x i32>* %ptr_b, i16 %mask) {
2783 ; X86-LABEL: test_mask_mullo_epi32_rmkz_512:
2785 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
2786 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
2787 ; X86-NEXT: vpmulld (%eax), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x40,0x00]
2788 ; X86-NEXT: retl ## encoding: [0xc3]
2790 ; X64-LABEL: test_mask_mullo_epi32_rmkz_512:
2792 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
2793 ; X64-NEXT: vpmulld (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x40,0x07]
2794 ; X64-NEXT: retq ## encoding: [0xc3]
2795 %b = load <16 x i32>, <16 x i32>* %ptr_b
2796 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
2797 ret < 16 x i32> %res
2800 define <16 x i32> @test_mask_mullo_epi32_rmb_512(<16 x i32> %a, i32* %ptr_b) {
2801 ; X86-LABEL: test_mask_mullo_epi32_rmb_512:
2803 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
2804 ; X86-NEXT: vpmulld (%eax){1to16}, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x58,0x40,0x00]
2805 ; X86-NEXT: retl ## encoding: [0xc3]
2807 ; X64-LABEL: test_mask_mullo_epi32_rmb_512:
2809 ; X64-NEXT: vpmulld (%rdi){1to16}, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x58,0x40,0x07]
2810 ; X64-NEXT: retq ## encoding: [0xc3]
2811 %q = load i32, i32* %ptr_b
2812 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
2813 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
2814 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
2815 ret < 16 x i32> %res
2818 define <16 x i32> @test_mask_mullo_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, <16 x i32> %passThru, i16 %mask) {
2819 ; X86-LABEL: test_mask_mullo_epi32_rmbk_512:
2821 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
2822 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
2823 ; X86-NEXT: vpmulld (%eax){1to16}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x59,0x40,0x08]
2824 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
2825 ; X86-NEXT: retl ## encoding: [0xc3]
2827 ; X64-LABEL: test_mask_mullo_epi32_rmbk_512:
2829 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
2830 ; X64-NEXT: vpmulld (%rdi){1to16}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x59,0x40,0x0f]
2831 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
2832 ; X64-NEXT: retq ## encoding: [0xc3]
2833 %q = load i32, i32* %ptr_b
2834 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
2835 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
2836 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
2837 ret < 16 x i32> %res
2840 define <16 x i32> @test_mask_mullo_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b, i16 %mask) {
2841 ; X86-LABEL: test_mask_mullo_epi32_rmbkz_512:
2843 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
2844 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
2845 ; X86-NEXT: vpmulld (%eax){1to16}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xd9,0x40,0x00]
2846 ; X86-NEXT: retl ## encoding: [0xc3]
2848 ; X64-LABEL: test_mask_mullo_epi32_rmbkz_512:
2850 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
2851 ; X64-NEXT: vpmulld (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xd9,0x40,0x07]
2852 ; X64-NEXT: retq ## encoding: [0xc3]
2853 %q = load i32, i32* %ptr_b
2854 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
2855 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
2856 %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
2857 ret < 16 x i32> %res
2860 declare <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
2863 declare <16 x float> @llvm.x86.avx512.mask.shuf.f32x4(<16 x float>, <16 x float>, i32, <16 x float>, i16)
2865 define <16 x float>@test_int_x86_avx512_shuf_f32x4(<16 x float> %x0, <16 x float> %x1, <16 x float> %x3) {
2866 ; CHECK-LABEL: test_int_x86_avx512_shuf_f32x4:
2868 ; CHECK-NEXT: vshuff64x2 $22, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf3,0xfd,0x48,0x23,0xc1,0x16]
2869 ; CHECK-NEXT: ## zmm0 = zmm0[4,5,2,3],zmm1[2,3,0,1]
2870 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
2871 %res = call <16 x float> @llvm.x86.avx512.mask.shuf.f32x4(<16 x float> %x0, <16 x float> %x1, i32 22, <16 x float> %x3, i16 -1)
2872 ret <16 x float> %res
2875 define <16 x float>@test_int_x86_avx512_mask_shuf_f32x4(<16 x float> %x0, <16 x float> %x1, <16 x float> %x3, i16 %x4) {
2876 ; X86-LABEL: test_int_x86_avx512_mask_shuf_f32x4:
2878 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2879 ; X86-NEXT: vshuff32x4 $22, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x23,0xd1,0x16]
2880 ; X86-NEXT: ## zmm2 {%k1} = zmm0[8,9,10,11,4,5,6,7],zmm1[4,5,6,7,0,1,2,3]
2881 ; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
2882 ; X86-NEXT: retl ## encoding: [0xc3]
2884 ; X64-LABEL: test_int_x86_avx512_mask_shuf_f32x4:
2886 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2887 ; X64-NEXT: vshuff32x4 $22, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x23,0xd1,0x16]
2888 ; X64-NEXT: ## zmm2 {%k1} = zmm0[8,9,10,11,4,5,6,7],zmm1[4,5,6,7,0,1,2,3]
2889 ; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
2890 ; X64-NEXT: retq ## encoding: [0xc3]
2891 %res = call <16 x float> @llvm.x86.avx512.mask.shuf.f32x4(<16 x float> %x0, <16 x float> %x1, i32 22, <16 x float> %x3, i16 %x4)
2892 ret <16 x float> %res
2895 declare <8 x double> @llvm.x86.avx512.mask.shuf.f64x2(<8 x double>, <8 x double>, i32, <8 x double>, i8)
2897 define <8 x double>@test_int_x86_avx512_shuf_f64x2(<8 x double> %x0, <8 x double> %x1, <8 x double> %x3) {
2898 ; CHECK-LABEL: test_int_x86_avx512_shuf_f64x2:
2900 ; CHECK-NEXT: vshuff64x2 $22, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf3,0xfd,0x48,0x23,0xc1,0x16]
2901 ; CHECK-NEXT: ## zmm0 = zmm0[4,5,2,3],zmm1[2,3,0,1]
2902 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
2903 %res = call <8 x double> @llvm.x86.avx512.mask.shuf.f64x2(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> %x3, i8 -1)
2904 ret <8 x double> %res
2907 define <8 x double>@test_int_x86_avx512_mask_shuf_f64x2(<8 x double> %x0, <8 x double> %x1, <8 x double> %x3, i8 %x4) {
2908 ; X86-LABEL: test_int_x86_avx512_mask_shuf_f64x2:
2910 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
2911 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
2912 ; X86-NEXT: vshuff64x2 $22, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x23,0xd1,0x16]
2913 ; X86-NEXT: ## zmm2 {%k1} = zmm0[4,5,2,3],zmm1[2,3,0,1]
2914 ; X86-NEXT: vmovapd %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2]
2915 ; X86-NEXT: retl ## encoding: [0xc3]
2917 ; X64-LABEL: test_int_x86_avx512_mask_shuf_f64x2:
2919 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2920 ; X64-NEXT: vshuff64x2 $22, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x23,0xd1,0x16]
2921 ; X64-NEXT: ## zmm2 {%k1} = zmm0[4,5,2,3],zmm1[2,3,0,1]
2922 ; X64-NEXT: vmovapd %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2]
2923 ; X64-NEXT: retq ## encoding: [0xc3]
2924 %res = call <8 x double> @llvm.x86.avx512.mask.shuf.f64x2(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> %x3, i8 %x4)
2925 ret <8 x double> %res
2928 define <8 x double>@test_int_x86_avx512_maskz_shuf_f64x2(<8 x double> %x0, <8 x double> %x1, i8 %x4) {
2929 ; X86-LABEL: test_int_x86_avx512_maskz_shuf_f64x2:
2931 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
2932 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
2933 ; X86-NEXT: vshuff64x2 $22, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xc9,0x23,0xc1,0x16]
2934 ; X86-NEXT: ## zmm0 {%k1} {z} = zmm0[4,5,2,3],zmm1[2,3,0,1]
2935 ; X86-NEXT: retl ## encoding: [0xc3]
2937 ; X64-LABEL: test_int_x86_avx512_maskz_shuf_f64x2:
2939 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2940 ; X64-NEXT: vshuff64x2 $22, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xc9,0x23,0xc1,0x16]
2941 ; X64-NEXT: ## zmm0 {%k1} {z} = zmm0[4,5,2,3],zmm1[2,3,0,1]
2942 ; X64-NEXT: retq ## encoding: [0xc3]
2943 %res = call <8 x double> @llvm.x86.avx512.mask.shuf.f64x2(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> zeroinitializer, i8 %x4)
2944 ret <8 x double> %res
2947 declare <16 x i32> @llvm.x86.avx512.mask.shuf.i32x4(<16 x i32>, <16 x i32>, i32, <16 x i32>, i16)
2949 define <16 x i32>@test_int_x86_avx512_shuf_i32x4(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x3) {
2950 ; CHECK-LABEL: test_int_x86_avx512_shuf_i32x4:
2952 ; CHECK-NEXT: vshufi64x2 $22, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf3,0xfd,0x48,0x43,0xc1,0x16]
2953 ; CHECK-NEXT: ## zmm0 = zmm0[4,5,2,3],zmm1[2,3,0,1]
2954 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
2955 %res = call <16 x i32> @llvm.x86.avx512.mask.shuf.i32x4(<16 x i32> %x0, <16 x i32> %x1, i32 22, <16 x i32> %x3, i16 -1)
2959 define <16 x i32>@test_int_x86_avx512_mask_shuf_i32x4(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x3, i16 %x4) {
2960 ; X86-LABEL: test_int_x86_avx512_mask_shuf_i32x4:
2962 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
2963 ; X86-NEXT: vshufi32x4 $22, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x43,0xd1,0x16]
2964 ; X86-NEXT: ## zmm2 {%k1} = zmm0[8,9,10,11,4,5,6,7],zmm1[4,5,6,7,0,1,2,3]
2965 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
2966 ; X86-NEXT: retl ## encoding: [0xc3]
2968 ; X64-LABEL: test_int_x86_avx512_mask_shuf_i32x4:
2970 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2971 ; X64-NEXT: vshufi32x4 $22, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x43,0xd1,0x16]
2972 ; X64-NEXT: ## zmm2 {%k1} = zmm0[8,9,10,11,4,5,6,7],zmm1[4,5,6,7,0,1,2,3]
2973 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
2974 ; X64-NEXT: retq ## encoding: [0xc3]
2975 %res = call <16 x i32> @llvm.x86.avx512.mask.shuf.i32x4(<16 x i32> %x0, <16 x i32> %x1, i32 22, <16 x i32> %x3, i16 %x4)
2979 declare <8 x i64> @llvm.x86.avx512.mask.shuf.i64x2(<8 x i64>, <8 x i64>, i32, <8 x i64>, i8)
2981 define <8 x i64>@test_int_x86_avx512_shuf_i64x2(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x3) {
2982 ; CHECK-LABEL: test_int_x86_avx512_shuf_i64x2:
2984 ; CHECK-NEXT: vshufi64x2 $22, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf3,0xfd,0x48,0x43,0xc1,0x16]
2985 ; CHECK-NEXT: ## zmm0 = zmm0[4,5,2,3],zmm1[2,3,0,1]
2986 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
2987 %res = call <8 x i64> @llvm.x86.avx512.mask.shuf.i64x2(<8 x i64> %x0, <8 x i64> %x1, i32 22, <8 x i64> %x3, i8 -1)
2991 define <8 x i64>@test_int_x86_avx512_mask_shuf_i64x2(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x3, i8 %x4) {
2992 ; X86-LABEL: test_int_x86_avx512_mask_shuf_i64x2:
2994 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
2995 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
2996 ; X86-NEXT: vshufi64x2 $22, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x43,0xd1,0x16]
2997 ; X86-NEXT: ## zmm2 {%k1} = zmm0[4,5,2,3],zmm1[2,3,0,1]
2998 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
2999 ; X86-NEXT: retl ## encoding: [0xc3]
3001 ; X64-LABEL: test_int_x86_avx512_mask_shuf_i64x2:
3003 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3004 ; X64-NEXT: vshufi64x2 $22, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x43,0xd1,0x16]
3005 ; X64-NEXT: ## zmm2 {%k1} = zmm0[4,5,2,3],zmm1[2,3,0,1]
3006 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
3007 ; X64-NEXT: retq ## encoding: [0xc3]
3008 %res = call <8 x i64> @llvm.x86.avx512.mask.shuf.i64x2(<8 x i64> %x0, <8 x i64> %x1, i32 22, <8 x i64> %x3, i8 %x4)
3012 declare <8 x double> @llvm.x86.avx512.mask.shuf.pd.512(<8 x double>, <8 x double>, i32, <8 x double>, i8)
3014 define <8 x double>@test_int_x86_avx512_shuf_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x3) {
3015 ; CHECK-LABEL: test_int_x86_avx512_shuf_pd_512:
3017 ; CHECK-NEXT: vshufpd $22, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xc6,0xc1,0x16]
3018 ; CHECK-NEXT: ## zmm0 = zmm0[0],zmm1[1],zmm0[3],zmm1[2],zmm0[5],zmm1[4],zmm0[6],zmm1[6]
3019 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
3020 %res = call <8 x double> @llvm.x86.avx512.mask.shuf.pd.512(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> %x3, i8 -1)
3021 ret <8 x double> %res
3024 define <8 x double>@test_int_x86_avx512_mask_shuf_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x3, i8 %x4) {
3025 ; X86-LABEL: test_int_x86_avx512_mask_shuf_pd_512:
3027 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
3028 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
3029 ; X86-NEXT: vshufpd $22, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xc6,0xd1,0x16]
3030 ; X86-NEXT: ## zmm2 {%k1} = zmm0[0],zmm1[1],zmm0[3],zmm1[2],zmm0[5],zmm1[4],zmm0[6],zmm1[6]
3031 ; X86-NEXT: vmovapd %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2]
3032 ; X86-NEXT: retl ## encoding: [0xc3]
3034 ; X64-LABEL: test_int_x86_avx512_mask_shuf_pd_512:
3036 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3037 ; X64-NEXT: vshufpd $22, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xc6,0xd1,0x16]
3038 ; X64-NEXT: ## zmm2 {%k1} = zmm0[0],zmm1[1],zmm0[3],zmm1[2],zmm0[5],zmm1[4],zmm0[6],zmm1[6]
3039 ; X64-NEXT: vmovapd %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2]
3040 ; X64-NEXT: retq ## encoding: [0xc3]
3041 %res = call <8 x double> @llvm.x86.avx512.mask.shuf.pd.512(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> %x3, i8 %x4)
3042 ret <8 x double> %res
3045 define <8 x double>@test_int_x86_avx512_maskz_shuf_pd_512(<8 x double> %x0, <8 x double> %x1, i8 %x4) {
3046 ; X86-LABEL: test_int_x86_avx512_maskz_shuf_pd_512:
3048 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
3049 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
3050 ; X86-NEXT: vshufpd $22, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xc6,0xc1,0x16]
3051 ; X86-NEXT: ## zmm0 {%k1} {z} = zmm0[0],zmm1[1],zmm0[3],zmm1[2],zmm0[5],zmm1[4],zmm0[6],zmm1[6]
3052 ; X86-NEXT: retl ## encoding: [0xc3]
3054 ; X64-LABEL: test_int_x86_avx512_maskz_shuf_pd_512:
3056 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3057 ; X64-NEXT: vshufpd $22, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xc6,0xc1,0x16]
3058 ; X64-NEXT: ## zmm0 {%k1} {z} = zmm0[0],zmm1[1],zmm0[3],zmm1[2],zmm0[5],zmm1[4],zmm0[6],zmm1[6]
3059 ; X64-NEXT: retq ## encoding: [0xc3]
3060 %res = call <8 x double> @llvm.x86.avx512.mask.shuf.pd.512(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> zeroinitializer, i8 %x4)
3061 ret <8 x double> %res
3064 declare <16 x float> @llvm.x86.avx512.mask.shuf.ps.512(<16 x float>, <16 x float>, i32, <16 x float>, i16)
3066 define <16 x float>@test_int_x86_avx512_shuf_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x3) {
3067 ; CHECK-LABEL: test_int_x86_avx512_shuf_ps_512:
3069 ; CHECK-NEXT: vshufps $22, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0xc6,0xc1,0x16]
3070 ; CHECK-NEXT: ## zmm0 = zmm0[2,1],zmm1[1,0],zmm0[6,5],zmm1[5,4],zmm0[10,9],zmm1[9,8],zmm0[14,13],zmm1[13,12]
3071 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
3072 %res = call <16 x float> @llvm.x86.avx512.mask.shuf.ps.512(<16 x float> %x0, <16 x float> %x1, i32 22, <16 x float> %x3, i16 -1)
3073 ret <16 x float> %res
3076 define <16 x float>@test_int_x86_avx512_mask_shuf_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x3, i16 %x4) {
3077 ; X86-LABEL: test_int_x86_avx512_mask_shuf_ps_512:
3079 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
3080 ; X86-NEXT: vshufps $22, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0xc6,0xd1,0x16]
3081 ; X86-NEXT: ## zmm2 {%k1} = zmm0[2,1],zmm1[1,0],zmm0[6,5],zmm1[5,4],zmm0[10,9],zmm1[9,8],zmm0[14,13],zmm1[13,12]
3082 ; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
3083 ; X86-NEXT: retl ## encoding: [0xc3]
3085 ; X64-LABEL: test_int_x86_avx512_mask_shuf_ps_512:
3087 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3088 ; X64-NEXT: vshufps $22, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0xc6,0xd1,0x16]
3089 ; X64-NEXT: ## zmm2 {%k1} = zmm0[2,1],zmm1[1,0],zmm0[6,5],zmm1[5,4],zmm0[10,9],zmm1[9,8],zmm0[14,13],zmm1[13,12]
3090 ; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
3091 ; X64-NEXT: retq ## encoding: [0xc3]
3092 %res = call <16 x float> @llvm.x86.avx512.mask.shuf.ps.512(<16 x float> %x0, <16 x float> %x1, i32 22, <16 x float> %x3, i16 %x4)
3093 ret <16 x float> %res
3096 declare <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
3098 define <16 x i32>@test_int_x86_avx512_pmaxs_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) {
3099 ; CHECK-LABEL: test_int_x86_avx512_pmaxs_d_512:
3101 ; CHECK-NEXT: vpmaxsd %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x3d,0xc1]
3102 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
3103 %res = call <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
3107 define <16 x i32>@test_int_x86_avx512_mask_pmaxs_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
3108 ; X86-LABEL: test_int_x86_avx512_mask_pmaxs_d_512:
3110 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
3111 ; X86-NEXT: vpmaxsd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x3d,0xd1]
3112 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
3113 ; X86-NEXT: retl ## encoding: [0xc3]
3115 ; X64-LABEL: test_int_x86_avx512_mask_pmaxs_d_512:
3117 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3118 ; X64-NEXT: vpmaxsd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x3d,0xd1]
3119 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
3120 ; X64-NEXT: retq ## encoding: [0xc3]
3121 %res = call <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
3125 declare <8 x i64> @llvm.x86.avx512.mask.pmaxs.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
3127 define <8 x i64>@test_int_x86_avx512_pmaxs_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2) {
3128 ; CHECK-LABEL: test_int_x86_avx512_pmaxs_q_512:
3130 ; CHECK-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x3d,0xc1]
3131 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
3132 %res = call <8 x i64> @llvm.x86.avx512.mask.pmaxs.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
3136 define <8 x i64>@test_int_x86_avx512_mask_pmaxs_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
3137 ; X86-LABEL: test_int_x86_avx512_mask_pmaxs_q_512:
3139 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
3140 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
3141 ; X86-NEXT: vpmaxsq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x3d,0xd1]
3142 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
3143 ; X86-NEXT: retl ## encoding: [0xc3]
3145 ; X64-LABEL: test_int_x86_avx512_mask_pmaxs_q_512:
3147 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3148 ; X64-NEXT: vpmaxsq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x3d,0xd1]
3149 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
3150 ; X64-NEXT: retq ## encoding: [0xc3]
3151 %res = call <8 x i64> @llvm.x86.avx512.mask.pmaxs.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
3155 declare <16 x i32> @llvm.x86.avx512.mask.pmaxu.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
3157 define <16 x i32>@test_int_x86_avx512_pmaxu_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) {
3158 ; CHECK-LABEL: test_int_x86_avx512_pmaxu_d_512:
3160 ; CHECK-NEXT: vpmaxud %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x3f,0xc1]
3161 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
3162 %res = call <16 x i32> @llvm.x86.avx512.mask.pmaxu.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
3166 define <16 x i32>@test_int_x86_avx512_mask_pmaxu_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
3167 ; X86-LABEL: test_int_x86_avx512_mask_pmaxu_d_512:
3169 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
3170 ; X86-NEXT: vpmaxud %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x3f,0xd1]
3171 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
3172 ; X86-NEXT: retl ## encoding: [0xc3]
3174 ; X64-LABEL: test_int_x86_avx512_mask_pmaxu_d_512:
3176 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3177 ; X64-NEXT: vpmaxud %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x3f,0xd1]
3178 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
3179 ; X64-NEXT: retq ## encoding: [0xc3]
3180 %res = call <16 x i32> @llvm.x86.avx512.mask.pmaxu.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
3184 declare <8 x i64> @llvm.x86.avx512.mask.pmaxu.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
3186 define <8 x i64>@test_int_x86_avx512_pmaxu_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2) {
3187 ; CHECK-LABEL: test_int_x86_avx512_pmaxu_q_512:
3189 ; CHECK-NEXT: vpmaxuq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x3f,0xc1]
3190 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
3191 %res = call <8 x i64> @llvm.x86.avx512.mask.pmaxu.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
3195 define <8 x i64>@test_int_x86_avx512_mask_pmaxu_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
3196 ; X86-LABEL: test_int_x86_avx512_mask_pmaxu_q_512:
3198 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
3199 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
3200 ; X86-NEXT: vpmaxuq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x3f,0xd1]
3201 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
3202 ; X86-NEXT: retl ## encoding: [0xc3]
3204 ; X64-LABEL: test_int_x86_avx512_mask_pmaxu_q_512:
3206 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3207 ; X64-NEXT: vpmaxuq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x3f,0xd1]
3208 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
3209 ; X64-NEXT: retq ## encoding: [0xc3]
3210 %res = call <8 x i64> @llvm.x86.avx512.mask.pmaxu.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
3214 declare <16 x i32> @llvm.x86.avx512.mask.pmins.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
3216 define <16 x i32>@test_int_x86_avx512_pmins_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) {
3217 ; CHECK-LABEL: test_int_x86_avx512_pmins_d_512:
3219 ; CHECK-NEXT: vpminsd %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x39,0xc1]
3220 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
3221 %res = call <16 x i32> @llvm.x86.avx512.mask.pmins.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
3225 define <16 x i32>@test_int_x86_avx512_mask_pmins_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
3226 ; X86-LABEL: test_int_x86_avx512_mask_pmins_d_512:
3228 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
3229 ; X86-NEXT: vpminsd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x39,0xd1]
3230 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
3231 ; X86-NEXT: retl ## encoding: [0xc3]
3233 ; X64-LABEL: test_int_x86_avx512_mask_pmins_d_512:
3235 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3236 ; X64-NEXT: vpminsd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x39,0xd1]
3237 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
3238 ; X64-NEXT: retq ## encoding: [0xc3]
3239 %res = call <16 x i32> @llvm.x86.avx512.mask.pmins.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
3243 declare <8 x i64> @llvm.x86.avx512.mask.pmins.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
3245 define <8 x i64>@test_int_x86_avx512_pmins_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2) {
3246 ; CHECK-LABEL: test_int_x86_avx512_pmins_q_512:
3248 ; CHECK-NEXT: vpminsq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x39,0xc1]
3249 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
3250 %res = call <8 x i64> @llvm.x86.avx512.mask.pmins.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
3254 define <8 x i64>@test_int_x86_avx512_mask_pmins_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
3255 ; X86-LABEL: test_int_x86_avx512_mask_pmins_q_512:
3257 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
3258 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
3259 ; X86-NEXT: vpminsq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x39,0xd1]
3260 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
3261 ; X86-NEXT: retl ## encoding: [0xc3]
3263 ; X64-LABEL: test_int_x86_avx512_mask_pmins_q_512:
3265 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3266 ; X64-NEXT: vpminsq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x39,0xd1]
3267 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
3268 ; X64-NEXT: retq ## encoding: [0xc3]
3269 %res = call <8 x i64> @llvm.x86.avx512.mask.pmins.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
3273 declare <16 x i32> @llvm.x86.avx512.mask.pminu.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
3275 define <16 x i32>@test_int_x86_avx512_pminu_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) {
3276 ; CHECK-LABEL: test_int_x86_avx512_pminu_d_512:
3278 ; CHECK-NEXT: vpminud %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x3b,0xc1]
3279 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
3280 %res = call <16 x i32> @llvm.x86.avx512.mask.pminu.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
3284 define <16 x i32>@test_int_x86_avx512_mask_pminu_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
3285 ; X86-LABEL: test_int_x86_avx512_mask_pminu_d_512:
3287 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
3288 ; X86-NEXT: vpminud %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x3b,0xd1]
3289 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
3290 ; X86-NEXT: retl ## encoding: [0xc3]
3292 ; X64-LABEL: test_int_x86_avx512_mask_pminu_d_512:
3294 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3295 ; X64-NEXT: vpminud %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x3b,0xd1]
3296 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
3297 ; X64-NEXT: retq ## encoding: [0xc3]
3298 %res = call <16 x i32> @llvm.x86.avx512.mask.pminu.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
3302 declare <8 x i64> @llvm.x86.avx512.mask.pminu.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
3304 define <8 x i64>@test_int_x86_avx512_pminu_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2) {
3305 ; CHECK-LABEL: test_int_x86_avx512_pminu_q_512:
3307 ; CHECK-NEXT: vpminuq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x3b,0xc1]
3308 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
3309 %res = call <8 x i64> @llvm.x86.avx512.mask.pminu.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
3313 define <8 x i64>@test_int_x86_avx512_mask_pminu_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
3314 ; X86-LABEL: test_int_x86_avx512_mask_pminu_q_512:
3316 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
3317 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
3318 ; X86-NEXT: vpminuq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x3b,0xd1]
3319 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
3320 ; X86-NEXT: retl ## encoding: [0xc3]
3322 ; X64-LABEL: test_int_x86_avx512_mask_pminu_q_512:
3324 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3325 ; X64-NEXT: vpminuq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x3b,0xd1]
3326 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
3327 ; X64-NEXT: retq ## encoding: [0xc3]
3328 %res = call <8 x i64> @llvm.x86.avx512.mask.pminu.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
3332 define <4 x float> @test_mm_mask_move_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
3333 ; X86-LABEL: test_mm_mask_move_ss:
3334 ; X86: ## %bb.0: ## %entry
3335 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
3336 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
3337 ; X86-NEXT: vmovss %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0x76,0x09,0x10,0xc2]
3338 ; X86-NEXT: retl ## encoding: [0xc3]
3340 ; X64-LABEL: test_mm_mask_move_ss:
3341 ; X64: ## %bb.0: ## %entry
3342 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3343 ; X64-NEXT: vmovss %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0x76,0x09,0x10,0xc2]
3344 ; X64-NEXT: retq ## encoding: [0xc3]
3346 %res = call <4 x float> @llvm.x86.avx512.mask.move.ss(<4 x float> %__A, <4 x float> %__B, <4 x float> %__W, i8 %__U)
3347 ret <4 x float> %res
3351 define <4 x float> @test_mm_maskz_move_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
3352 ; X86-LABEL: test_mm_maskz_move_ss:
3353 ; X86: ## %bb.0: ## %entry
3354 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
3355 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
3356 ; X86-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0x89,0x10,0xc1]
3357 ; X86-NEXT: retl ## encoding: [0xc3]
3359 ; X64-LABEL: test_mm_maskz_move_ss:
3360 ; X64: ## %bb.0: ## %entry
3361 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3362 ; X64-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0x89,0x10,0xc1]
3363 ; X64-NEXT: retq ## encoding: [0xc3]
3365 %res = call <4 x float> @llvm.x86.avx512.mask.move.ss(<4 x float> %__A, <4 x float> %__B, <4 x float> zeroinitializer, i8 %__U)
3366 ret <4 x float> %res
3369 define <2 x double> @test_mm_mask_move_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
3370 ; X86-LABEL: test_mm_mask_move_sd:
3371 ; X86: ## %bb.0: ## %entry
3372 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
3373 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
3374 ; X86-NEXT: vmovsd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0xf7,0x09,0x10,0xc2]
3375 ; X86-NEXT: retl ## encoding: [0xc3]
3377 ; X64-LABEL: test_mm_mask_move_sd:
3378 ; X64: ## %bb.0: ## %entry
3379 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3380 ; X64-NEXT: vmovsd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0xf7,0x09,0x10,0xc2]
3381 ; X64-NEXT: retq ## encoding: [0xc3]
3383 %res = call <2 x double> @llvm.x86.avx512.mask.move.sd(<2 x double> %__A, <2 x double> %__B, <2 x double> %__W, i8 %__U)
3384 ret <2 x double> %res
3387 define <2 x double> @test_mm_maskz_move_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
3388 ; X86-LABEL: test_mm_maskz_move_sd:
3389 ; X86: ## %bb.0: ## %entry
3390 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
3391 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
3392 ; X86-NEXT: vmovsd %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xff,0x89,0x10,0xc1]
3393 ; X86-NEXT: retl ## encoding: [0xc3]
3395 ; X64-LABEL: test_mm_maskz_move_sd:
3396 ; X64: ## %bb.0: ## %entry
3397 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3398 ; X64-NEXT: vmovsd %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xff,0x89,0x10,0xc1]
3399 ; X64-NEXT: retq ## encoding: [0xc3]
3401 %res = call <2 x double> @llvm.x86.avx512.mask.move.sd(<2 x double> %__A, <2 x double> %__B, <2 x double> zeroinitializer, i8 %__U)
3402 ret <2 x double> %res
3405 declare <4 x float> @llvm.x86.avx512.mask.move.ss(<4 x float>, <4 x float>, <4 x float>, i8)
3406 declare <2 x double> @llvm.x86.avx512.mask.move.sd(<2 x double>, <2 x double>, <2 x double>, i8)
3408 declare <16 x i32> @llvm.x86.avx512.mask.pmovzxb.d.512(<16 x i8>, <16 x i32>, i16)
3410 define <16 x i32>@test_int_x86_avx512_pmovzxb_d_512(<16 x i8> %x0, <16 x i32> %x1) {
3411 ; CHECK-LABEL: test_int_x86_avx512_pmovzxb_d_512:
3413 ; CHECK-NEXT: vpmovzxbd %xmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x31,0xc0]
3414 ; CHECK-NEXT: ## zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
3415 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
3416 %res = call <16 x i32> @llvm.x86.avx512.mask.pmovzxb.d.512(<16 x i8> %x0, <16 x i32> %x1, i16 -1)
3420 define <16 x i32>@test_int_x86_avx512_mask_pmovzxb_d_512(<16 x i8> %x0, <16 x i32> %x1, i16 %x2) {
3421 ; X86-LABEL: test_int_x86_avx512_mask_pmovzxb_d_512:
3423 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
3424 ; X86-NEXT: vpmovzxbd %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x31,0xc8]
3425 ; X86-NEXT: ## zmm1 {%k1} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
3426 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
3427 ; X86-NEXT: retl ## encoding: [0xc3]
3429 ; X64-LABEL: test_int_x86_avx512_mask_pmovzxb_d_512:
3431 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3432 ; X64-NEXT: vpmovzxbd %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x31,0xc8]
3433 ; X64-NEXT: ## zmm1 {%k1} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
3434 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
3435 ; X64-NEXT: retq ## encoding: [0xc3]
3436 %res = call <16 x i32> @llvm.x86.avx512.mask.pmovzxb.d.512(<16 x i8> %x0, <16 x i32> %x1, i16 %x2)
3440 define <16 x i32>@test_int_x86_avx512_maskz_pmovzxb_d_512(<16 x i8> %x0, i16 %x2) {
3441 ; X86-LABEL: test_int_x86_avx512_maskz_pmovzxb_d_512:
3443 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
3444 ; X86-NEXT: vpmovzxbd %xmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x31,0xc0]
3445 ; X86-NEXT: ## zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
3446 ; X86-NEXT: retl ## encoding: [0xc3]
3448 ; X64-LABEL: test_int_x86_avx512_maskz_pmovzxb_d_512:
3450 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3451 ; X64-NEXT: vpmovzxbd %xmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x31,0xc0]
3452 ; X64-NEXT: ## zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
3453 ; X64-NEXT: retq ## encoding: [0xc3]
3454 %res = call <16 x i32> @llvm.x86.avx512.mask.pmovzxb.d.512(<16 x i8> %x0, <16 x i32> zeroinitializer, i16 %x2)
3458 declare <8 x i64> @llvm.x86.avx512.mask.pmovzxb.q.512(<16 x i8>, <8 x i64>, i8)
3460 define <8 x i64>@test_int_x86_avx512_pmovzxb_q_512(<16 x i8> %x0, <8 x i64> %x1) {
3461 ; CHECK-LABEL: test_int_x86_avx512_pmovzxb_q_512:
3463 ; CHECK-NEXT: vpmovzxbq %xmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x32,0xc0]
3464 ; CHECK-NEXT: ## zmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,xmm0[4],zero,zero,zero,zero,zero,zero,zero,xmm0[5],zero,zero,zero,zero,zero,zero,zero,xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero
3465 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
3466 %res = call <8 x i64> @llvm.x86.avx512.mask.pmovzxb.q.512(<16 x i8> %x0, <8 x i64> %x1, i8 -1)
3470 define <8 x i64>@test_int_x86_avx512_mask_pmovzxb_q_512(<16 x i8> %x0, <8 x i64> %x1, i8 %x2) {
3471 ; X86-LABEL: test_int_x86_avx512_mask_pmovzxb_q_512:
3473 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
3474 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
3475 ; X86-NEXT: vpmovzxbq %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x32,0xc8]
3476 ; X86-NEXT: ## zmm1 {%k1} = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,xmm0[4],zero,zero,zero,zero,zero,zero,zero,xmm0[5],zero,zero,zero,zero,zero,zero,zero,xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero
3477 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
3478 ; X86-NEXT: retl ## encoding: [0xc3]
3480 ; X64-LABEL: test_int_x86_avx512_mask_pmovzxb_q_512:
3482 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3483 ; X64-NEXT: vpmovzxbq %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x32,0xc8]
3484 ; X64-NEXT: ## zmm1 {%k1} = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,xmm0[4],zero,zero,zero,zero,zero,zero,zero,xmm0[5],zero,zero,zero,zero,zero,zero,zero,xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero
3485 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
3486 ; X64-NEXT: retq ## encoding: [0xc3]
3487 %res = call <8 x i64> @llvm.x86.avx512.mask.pmovzxb.q.512(<16 x i8> %x0, <8 x i64> %x1, i8 %x2)
3491 define <8 x i64>@test_int_x86_avx512_maskz_pmovzxb_q_512(<16 x i8> %x0, i8 %x2) {
3492 ; X86-LABEL: test_int_x86_avx512_maskz_pmovzxb_q_512:
3494 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
3495 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
3496 ; X86-NEXT: vpmovzxbq %xmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x32,0xc0]
3497 ; X86-NEXT: ## zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,xmm0[4],zero,zero,zero,zero,zero,zero,zero,xmm0[5],zero,zero,zero,zero,zero,zero,zero,xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero
3498 ; X86-NEXT: retl ## encoding: [0xc3]
3500 ; X64-LABEL: test_int_x86_avx512_maskz_pmovzxb_q_512:
3502 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3503 ; X64-NEXT: vpmovzxbq %xmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x32,0xc0]
3504 ; X64-NEXT: ## zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,xmm0[4],zero,zero,zero,zero,zero,zero,zero,xmm0[5],zero,zero,zero,zero,zero,zero,zero,xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero
3505 ; X64-NEXT: retq ## encoding: [0xc3]
3506 %res = call <8 x i64> @llvm.x86.avx512.mask.pmovzxb.q.512(<16 x i8> %x0, <8 x i64> zeroinitializer, i8 %x2)
3510 declare <8 x i64> @llvm.x86.avx512.mask.pmovzxd.q.512(<8 x i32>, <8 x i64>, i8)
3512 define <8 x i64>@test_int_x86_avx512_pmovzxd_q_512(<8 x i32> %x0, <8 x i64> %x1) {
3513 ; CHECK-LABEL: test_int_x86_avx512_pmovzxd_q_512:
3515 ; CHECK-NEXT: vpmovzxdq %ymm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x35,0xc0]
3516 ; CHECK-NEXT: ## zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
3517 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
3518 %res = call <8 x i64> @llvm.x86.avx512.mask.pmovzxd.q.512(<8 x i32> %x0, <8 x i64> %x1, i8 -1)
3522 define <8 x i64>@test_int_x86_avx512_mask_pmovzxd_q_512(<8 x i32> %x0, <8 x i64> %x1, i8 %x2) {
3523 ; X86-LABEL: test_int_x86_avx512_mask_pmovzxd_q_512:
3525 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
3526 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
3527 ; X86-NEXT: vpmovzxdq %ymm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x35,0xc8]
3528 ; X86-NEXT: ## zmm1 {%k1} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
3529 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
3530 ; X86-NEXT: retl ## encoding: [0xc3]
3532 ; X64-LABEL: test_int_x86_avx512_mask_pmovzxd_q_512:
3534 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3535 ; X64-NEXT: vpmovzxdq %ymm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x35,0xc8]
3536 ; X64-NEXT: ## zmm1 {%k1} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
3537 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
3538 ; X64-NEXT: retq ## encoding: [0xc3]
3539 %res = call <8 x i64> @llvm.x86.avx512.mask.pmovzxd.q.512(<8 x i32> %x0, <8 x i64> %x1, i8 %x2)
3543 define <8 x i64>@test_int_x86_avx512_maskz_pmovzxd_q_512(<8 x i32> %x0, i8 %x2) {
3544 ; X86-LABEL: test_int_x86_avx512_maskz_pmovzxd_q_512:
3546 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
3547 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
3548 ; X86-NEXT: vpmovzxdq %ymm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x35,0xc0]
3549 ; X86-NEXT: ## zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
3550 ; X86-NEXT: retl ## encoding: [0xc3]
3552 ; X64-LABEL: test_int_x86_avx512_maskz_pmovzxd_q_512:
3554 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3555 ; X64-NEXT: vpmovzxdq %ymm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x35,0xc0]
3556 ; X64-NEXT: ## zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
3557 ; X64-NEXT: retq ## encoding: [0xc3]
3558 %res = call <8 x i64> @llvm.x86.avx512.mask.pmovzxd.q.512(<8 x i32> %x0, <8 x i64> zeroinitializer, i8 %x2)
3562 declare <16 x i32> @llvm.x86.avx512.mask.pmovzxw.d.512(<16 x i16>, <16 x i32>, i16)
3564 define <16 x i32>@test_int_x86_avx512_pmovzxw_d_512(<16 x i16> %x0, <16 x i32> %x1) {
3565 ; CHECK-LABEL: test_int_x86_avx512_pmovzxw_d_512:
3567 ; CHECK-NEXT: vpmovzxwd %ymm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x33,0xc0]
3568 ; CHECK-NEXT: ## zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
3569 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
3570 %res = call <16 x i32> @llvm.x86.avx512.mask.pmovzxw.d.512(<16 x i16> %x0, <16 x i32> %x1, i16 -1)
3574 define <16 x i32>@test_int_x86_avx512_mask_pmovzxw_d_512(<16 x i16> %x0, <16 x i32> %x1, i16 %x2) {
3575 ; X86-LABEL: test_int_x86_avx512_mask_pmovzxw_d_512:
3577 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
3578 ; X86-NEXT: vpmovzxwd %ymm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x33,0xc8]
3579 ; X86-NEXT: ## zmm1 {%k1} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
3580 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
3581 ; X86-NEXT: retl ## encoding: [0xc3]
3583 ; X64-LABEL: test_int_x86_avx512_mask_pmovzxw_d_512:
3585 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3586 ; X64-NEXT: vpmovzxwd %ymm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x33,0xc8]
3587 ; X64-NEXT: ## zmm1 {%k1} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
3588 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
3589 ; X64-NEXT: retq ## encoding: [0xc3]
3590 %res = call <16 x i32> @llvm.x86.avx512.mask.pmovzxw.d.512(<16 x i16> %x0, <16 x i32> %x1, i16 %x2)
3594 define <16 x i32>@test_int_x86_avx512_maskz_pmovzxw_d_512(<16 x i16> %x0, i16 %x2) {
3595 ; X86-LABEL: test_int_x86_avx512_maskz_pmovzxw_d_512:
3597 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
3598 ; X86-NEXT: vpmovzxwd %ymm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x33,0xc0]
3599 ; X86-NEXT: ## zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
3600 ; X86-NEXT: retl ## encoding: [0xc3]
3602 ; X64-LABEL: test_int_x86_avx512_maskz_pmovzxw_d_512:
3604 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3605 ; X64-NEXT: vpmovzxwd %ymm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x33,0xc0]
3606 ; X64-NEXT: ## zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
3607 ; X64-NEXT: retq ## encoding: [0xc3]
3608 %res = call <16 x i32> @llvm.x86.avx512.mask.pmovzxw.d.512(<16 x i16> %x0, <16 x i32> zeroinitializer, i16 %x2)
3612 declare <8 x i64> @llvm.x86.avx512.mask.pmovzxw.q.512(<8 x i16>, <8 x i64>, i8)
3614 define <8 x i64>@test_int_x86_avx512_pmovzxw_q_512(<8 x i16> %x0, <8 x i64> %x1) {
3615 ; CHECK-LABEL: test_int_x86_avx512_pmovzxw_q_512:
3617 ; CHECK-NEXT: vpmovzxwq %xmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x34,0xc0]
3618 ; CHECK-NEXT: ## zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
3619 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
3620 %res = call <8 x i64> @llvm.x86.avx512.mask.pmovzxw.q.512(<8 x i16> %x0, <8 x i64> %x1, i8 -1)
3624 define <8 x i64>@test_int_x86_avx512_mask_pmovzxw_q_512(<8 x i16> %x0, <8 x i64> %x1, i8 %x2) {
3625 ; X86-LABEL: test_int_x86_avx512_mask_pmovzxw_q_512:
3627 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
3628 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
3629 ; X86-NEXT: vpmovzxwq %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x34,0xc8]
3630 ; X86-NEXT: ## zmm1 {%k1} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
3631 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
3632 ; X86-NEXT: retl ## encoding: [0xc3]
3634 ; X64-LABEL: test_int_x86_avx512_mask_pmovzxw_q_512:
3636 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3637 ; X64-NEXT: vpmovzxwq %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x34,0xc8]
3638 ; X64-NEXT: ## zmm1 {%k1} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
3639 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
3640 ; X64-NEXT: retq ## encoding: [0xc3]
3641 %res = call <8 x i64> @llvm.x86.avx512.mask.pmovzxw.q.512(<8 x i16> %x0, <8 x i64> %x1, i8 %x2)
3645 define <8 x i64>@test_int_x86_avx512_maskz_pmovzxw_q_512(<8 x i16> %x0, i8 %x2) {
3646 ; X86-LABEL: test_int_x86_avx512_maskz_pmovzxw_q_512:
3648 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
3649 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
3650 ; X86-NEXT: vpmovzxwq %xmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x34,0xc0]
3651 ; X86-NEXT: ## zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
3652 ; X86-NEXT: retl ## encoding: [0xc3]
3654 ; X64-LABEL: test_int_x86_avx512_maskz_pmovzxw_q_512:
3656 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3657 ; X64-NEXT: vpmovzxwq %xmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x34,0xc0]
3658 ; X64-NEXT: ## zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
3659 ; X64-NEXT: retq ## encoding: [0xc3]
3660 %res = call <8 x i64> @llvm.x86.avx512.mask.pmovzxw.q.512(<8 x i16> %x0, <8 x i64> zeroinitializer, i8 %x2)
3664 declare <16 x i32> @llvm.x86.avx512.mask.pmovsxb.d.512(<16 x i8>, <16 x i32>, i16)
3666 define <16 x i32>@test_int_x86_avx512_pmovsxb_d_512(<16 x i8> %x0, <16 x i32> %x1) {
3667 ; CHECK-LABEL: test_int_x86_avx512_pmovsxb_d_512:
3669 ; CHECK-NEXT: vpmovsxbd %xmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x21,0xc0]
3670 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
3671 %res = call <16 x i32> @llvm.x86.avx512.mask.pmovsxb.d.512(<16 x i8> %x0, <16 x i32> %x1, i16 -1)
3675 define <16 x i32>@test_int_x86_avx512_mask_pmovsxb_d_512(<16 x i8> %x0, <16 x i32> %x1, i16 %x2) {
3676 ; X86-LABEL: test_int_x86_avx512_mask_pmovsxb_d_512:
3678 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
3679 ; X86-NEXT: vpmovsxbd %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x21,0xc8]
3680 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
3681 ; X86-NEXT: retl ## encoding: [0xc3]
3683 ; X64-LABEL: test_int_x86_avx512_mask_pmovsxb_d_512:
3685 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3686 ; X64-NEXT: vpmovsxbd %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x21,0xc8]
3687 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
3688 ; X64-NEXT: retq ## encoding: [0xc3]
3689 %res = call <16 x i32> @llvm.x86.avx512.mask.pmovsxb.d.512(<16 x i8> %x0, <16 x i32> %x1, i16 %x2)
3693 define <16 x i32>@test_int_x86_avx512_maskz_pmovsxb_d_512(<16 x i8> %x0, i16 %x2) {
3694 ; X86-LABEL: test_int_x86_avx512_maskz_pmovsxb_d_512:
3696 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
3697 ; X86-NEXT: vpmovsxbd %xmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x21,0xc0]
3698 ; X86-NEXT: retl ## encoding: [0xc3]
3700 ; X64-LABEL: test_int_x86_avx512_maskz_pmovsxb_d_512:
3702 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3703 ; X64-NEXT: vpmovsxbd %xmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x21,0xc0]
3704 ; X64-NEXT: retq ## encoding: [0xc3]
3705 %res = call <16 x i32> @llvm.x86.avx512.mask.pmovsxb.d.512(<16 x i8> %x0, <16 x i32> zeroinitializer, i16 %x2)
3709 declare <8 x i64> @llvm.x86.avx512.mask.pmovsxb.q.512(<16 x i8>, <8 x i64>, i8)
3711 define <8 x i64>@test_int_x86_avx512_pmovsxb_q_512(<16 x i8> %x0, <8 x i64> %x1) {
3712 ; CHECK-LABEL: test_int_x86_avx512_pmovsxb_q_512:
3714 ; CHECK-NEXT: vpmovsxbq %xmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x22,0xc0]
3715 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
3716 %res = call <8 x i64> @llvm.x86.avx512.mask.pmovsxb.q.512(<16 x i8> %x0, <8 x i64> %x1, i8 -1)
3720 define <8 x i64>@test_int_x86_avx512_mask_pmovsxb_q_512(<16 x i8> %x0, <8 x i64> %x1, i8 %x2) {
3721 ; X86-LABEL: test_int_x86_avx512_mask_pmovsxb_q_512:
3723 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
3724 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
3725 ; X86-NEXT: vpmovsxbq %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x22,0xc8]
3726 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
3727 ; X86-NEXT: retl ## encoding: [0xc3]
3729 ; X64-LABEL: test_int_x86_avx512_mask_pmovsxb_q_512:
3731 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3732 ; X64-NEXT: vpmovsxbq %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x22,0xc8]
3733 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
3734 ; X64-NEXT: retq ## encoding: [0xc3]
3735 %res = call <8 x i64> @llvm.x86.avx512.mask.pmovsxb.q.512(<16 x i8> %x0, <8 x i64> %x1, i8 %x2)
3739 define <8 x i64>@test_int_x86_avx512_maskz_pmovsxb_q_512(<16 x i8> %x0, i8 %x2) {
3740 ; X86-LABEL: test_int_x86_avx512_maskz_pmovsxb_q_512:
3742 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
3743 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
3744 ; X86-NEXT: vpmovsxbq %xmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x22,0xc0]
3745 ; X86-NEXT: retl ## encoding: [0xc3]
3747 ; X64-LABEL: test_int_x86_avx512_maskz_pmovsxb_q_512:
3749 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3750 ; X64-NEXT: vpmovsxbq %xmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x22,0xc0]
3751 ; X64-NEXT: retq ## encoding: [0xc3]
3752 %res = call <8 x i64> @llvm.x86.avx512.mask.pmovsxb.q.512(<16 x i8> %x0, <8 x i64> zeroinitializer, i8 %x2)
3756 declare <8 x i64> @llvm.x86.avx512.mask.pmovsxd.q.512(<8 x i32>, <8 x i64>, i8)
3758 define <8 x i64>@test_int_x86_avx512_pmovsxd_q_512(<8 x i32> %x0, <8 x i64> %x1) {
3759 ; CHECK-LABEL: test_int_x86_avx512_pmovsxd_q_512:
3761 ; CHECK-NEXT: vpmovsxdq %ymm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x25,0xc0]
3762 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
3763 %res = call <8 x i64> @llvm.x86.avx512.mask.pmovsxd.q.512(<8 x i32> %x0, <8 x i64> %x1, i8 -1)
3767 define <8 x i64>@test_int_x86_avx512_mask_pmovsxd_q_512(<8 x i32> %x0, <8 x i64> %x1, i8 %x2) {
3768 ; X86-LABEL: test_int_x86_avx512_mask_pmovsxd_q_512:
3770 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
3771 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
3772 ; X86-NEXT: vpmovsxdq %ymm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x25,0xc8]
3773 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
3774 ; X86-NEXT: retl ## encoding: [0xc3]
3776 ; X64-LABEL: test_int_x86_avx512_mask_pmovsxd_q_512:
3778 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3779 ; X64-NEXT: vpmovsxdq %ymm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x25,0xc8]
3780 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
3781 ; X64-NEXT: retq ## encoding: [0xc3]
3782 %res = call <8 x i64> @llvm.x86.avx512.mask.pmovsxd.q.512(<8 x i32> %x0, <8 x i64> %x1, i8 %x2)
3786 define <8 x i64>@test_int_x86_avx512_maskz_pmovsxd_q_512(<8 x i32> %x0, i8 %x2) {
3787 ; X86-LABEL: test_int_x86_avx512_maskz_pmovsxd_q_512:
3789 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
3790 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
3791 ; X86-NEXT: vpmovsxdq %ymm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x25,0xc0]
3792 ; X86-NEXT: retl ## encoding: [0xc3]
3794 ; X64-LABEL: test_int_x86_avx512_maskz_pmovsxd_q_512:
3796 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3797 ; X64-NEXT: vpmovsxdq %ymm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x25,0xc0]
3798 ; X64-NEXT: retq ## encoding: [0xc3]
3799 %res = call <8 x i64> @llvm.x86.avx512.mask.pmovsxd.q.512(<8 x i32> %x0, <8 x i64> zeroinitializer, i8 %x2)
3803 declare <16 x i32> @llvm.x86.avx512.mask.pmovsxw.d.512(<16 x i16>, <16 x i32>, i16)
3805 define <16 x i32>@test_int_x86_avx512_pmovsxw_d_512(<16 x i16> %x0, <16 x i32> %x1) {
3806 ; CHECK-LABEL: test_int_x86_avx512_pmovsxw_d_512:
3808 ; CHECK-NEXT: vpmovsxwd %ymm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x23,0xc0]
3809 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
3810 %res = call <16 x i32> @llvm.x86.avx512.mask.pmovsxw.d.512(<16 x i16> %x0, <16 x i32> %x1, i16 -1)
3814 define <16 x i32>@test_int_x86_avx512_mask_pmovsxw_d_512(<16 x i16> %x0, <16 x i32> %x1, i16 %x2) {
3815 ; X86-LABEL: test_int_x86_avx512_mask_pmovsxw_d_512:
3817 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
3818 ; X86-NEXT: vpmovsxwd %ymm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x23,0xc8]
3819 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
3820 ; X86-NEXT: retl ## encoding: [0xc3]
3822 ; X64-LABEL: test_int_x86_avx512_mask_pmovsxw_d_512:
3824 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3825 ; X64-NEXT: vpmovsxwd %ymm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x23,0xc8]
3826 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
3827 ; X64-NEXT: retq ## encoding: [0xc3]
3828 %res = call <16 x i32> @llvm.x86.avx512.mask.pmovsxw.d.512(<16 x i16> %x0, <16 x i32> %x1, i16 %x2)
3832 define <16 x i32>@test_int_x86_avx512_maskz_pmovsxw_d_512(<16 x i16> %x0, i16 %x2) {
3833 ; X86-LABEL: test_int_x86_avx512_maskz_pmovsxw_d_512:
3835 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
3836 ; X86-NEXT: vpmovsxwd %ymm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x23,0xc0]
3837 ; X86-NEXT: retl ## encoding: [0xc3]
3839 ; X64-LABEL: test_int_x86_avx512_maskz_pmovsxw_d_512:
3841 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3842 ; X64-NEXT: vpmovsxwd %ymm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x23,0xc0]
3843 ; X64-NEXT: retq ## encoding: [0xc3]
3844 %res = call <16 x i32> @llvm.x86.avx512.mask.pmovsxw.d.512(<16 x i16> %x0, <16 x i32> zeroinitializer, i16 %x2)
3848 declare <8 x i64> @llvm.x86.avx512.mask.pmovsxw.q.512(<8 x i16>, <8 x i64>, i8)
3850 define <8 x i64>@test_int_x86_avx512_pmovsxw_q_512(<8 x i16> %x0, <8 x i64> %x1) {
3851 ; CHECK-LABEL: test_int_x86_avx512_pmovsxw_q_512:
3853 ; CHECK-NEXT: vpmovsxwq %xmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x24,0xc0]
3854 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
3855 %res = call <8 x i64> @llvm.x86.avx512.mask.pmovsxw.q.512(<8 x i16> %x0, <8 x i64> %x1, i8 -1)
3859 define <8 x i64>@test_int_x86_avx512_mask_pmovsxw_q_512(<8 x i16> %x0, <8 x i64> %x1, i8 %x2) {
3860 ; X86-LABEL: test_int_x86_avx512_mask_pmovsxw_q_512:
3862 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
3863 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
3864 ; X86-NEXT: vpmovsxwq %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x24,0xc8]
3865 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
3866 ; X86-NEXT: retl ## encoding: [0xc3]
3868 ; X64-LABEL: test_int_x86_avx512_mask_pmovsxw_q_512:
3870 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3871 ; X64-NEXT: vpmovsxwq %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x24,0xc8]
3872 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
3873 ; X64-NEXT: retq ## encoding: [0xc3]
3874 %res = call <8 x i64> @llvm.x86.avx512.mask.pmovsxw.q.512(<8 x i16> %x0, <8 x i64> %x1, i8 %x2)
3878 define <8 x i64>@test_int_x86_avx512_maskz_pmovsxw_q_512(<8 x i16> %x0, i8 %x2) {
3879 ; X86-LABEL: test_int_x86_avx512_maskz_pmovsxw_q_512:
3881 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
3882 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
3883 ; X86-NEXT: vpmovsxwq %xmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x24,0xc0]
3884 ; X86-NEXT: retl ## encoding: [0xc3]
3886 ; X64-LABEL: test_int_x86_avx512_maskz_pmovsxw_q_512:
3888 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3889 ; X64-NEXT: vpmovsxwq %xmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x24,0xc0]
3890 ; X64-NEXT: retq ## encoding: [0xc3]
3891 %res = call <8 x i64> @llvm.x86.avx512.mask.pmovsxw.q.512(<8 x i16> %x0, <8 x i64> zeroinitializer, i8 %x2)
3895 declare <16 x i32> @llvm.x86.avx512.prolv.d.512(<16 x i32>, <16 x i32>)
3897 define <16 x i32>@test_int_x86_avx512_prolv_d_512(<16 x i32> %x0, <16 x i32> %x1) {
3898 ; CHECK-LABEL: test_int_x86_avx512_prolv_d_512:
3900 ; CHECK-NEXT: vprolvd %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x15,0xc1]
3901 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
3902 %1 = call <16 x i32> @llvm.x86.avx512.prolv.d.512(<16 x i32> %x0, <16 x i32> %x1)
3906 define <16 x i32>@test_int_x86_avx512_mask_prolv_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
3907 ; X86-LABEL: test_int_x86_avx512_mask_prolv_d_512:
3909 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
3910 ; X86-NEXT: vprolvd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x15,0xd1]
3911 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
3912 ; X86-NEXT: retl ## encoding: [0xc3]
3914 ; X64-LABEL: test_int_x86_avx512_mask_prolv_d_512:
3916 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3917 ; X64-NEXT: vprolvd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x15,0xd1]
3918 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
3919 ; X64-NEXT: retq ## encoding: [0xc3]
3920 %1 = call <16 x i32> @llvm.x86.avx512.prolv.d.512(<16 x i32> %x0, <16 x i32> %x1)
3921 %2 = bitcast i16 %x3 to <16 x i1>
3922 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x2
3926 define <16 x i32>@test_int_x86_avx512_maskz_prolv_d_512(<16 x i32> %x0, <16 x i32> %x1, i16 %x3) {
3927 ; X86-LABEL: test_int_x86_avx512_maskz_prolv_d_512:
3929 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
3930 ; X86-NEXT: vprolvd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x15,0xc1]
3931 ; X86-NEXT: retl ## encoding: [0xc3]
3933 ; X64-LABEL: test_int_x86_avx512_maskz_prolv_d_512:
3935 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3936 ; X64-NEXT: vprolvd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x15,0xc1]
3937 ; X64-NEXT: retq ## encoding: [0xc3]
3938 %1 = call <16 x i32> @llvm.x86.avx512.prolv.d.512(<16 x i32> %x0, <16 x i32> %x1)
3939 %2 = bitcast i16 %x3 to <16 x i1>
3940 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer
3944 declare <8 x i64> @llvm.x86.avx512.prolv.q.512(<8 x i64>, <8 x i64>)
3946 define <8 x i64>@test_int_x86_avx512_prolv_q_512(<8 x i64> %x0, <8 x i64> %x1) {
3947 ; CHECK-LABEL: test_int_x86_avx512_prolv_q_512:
3949 ; CHECK-NEXT: vprolvq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x15,0xc1]
3950 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
3951 %1 = call <8 x i64> @llvm.x86.avx512.prolv.q.512(<8 x i64> %x0, <8 x i64> %x1)
3955 define <8 x i64>@test_int_x86_avx512_mask_prolv_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
3956 ; X86-LABEL: test_int_x86_avx512_mask_prolv_q_512:
3958 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
3959 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
3960 ; X86-NEXT: vprolvq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x15,0xd1]
3961 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
3962 ; X86-NEXT: retl ## encoding: [0xc3]
3964 ; X64-LABEL: test_int_x86_avx512_mask_prolv_q_512:
3966 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3967 ; X64-NEXT: vprolvq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x15,0xd1]
3968 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
3969 ; X64-NEXT: retq ## encoding: [0xc3]
3970 %1 = call <8 x i64> @llvm.x86.avx512.prolv.q.512(<8 x i64> %x0, <8 x i64> %x1)
3971 %2 = bitcast i8 %x3 to <8 x i1>
3972 %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x2
3976 define <8 x i64>@test_int_x86_avx512_maskz_prolv_q_512(<8 x i64> %x0, <8 x i64> %x1, i8 %x3) {
3977 ; X86-LABEL: test_int_x86_avx512_maskz_prolv_q_512:
3979 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
3980 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
3981 ; X86-NEXT: vprolvq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x15,0xc1]
3982 ; X86-NEXT: retl ## encoding: [0xc3]
3984 ; X64-LABEL: test_int_x86_avx512_maskz_prolv_q_512:
3986 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3987 ; X64-NEXT: vprolvq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x15,0xc1]
3988 ; X64-NEXT: retq ## encoding: [0xc3]
3989 %1 = call <8 x i64> @llvm.x86.avx512.prolv.q.512(<8 x i64> %x0, <8 x i64> %x1)
3990 %2 = bitcast i8 %x3 to <8 x i1>
3991 %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> zeroinitializer
3995 declare <16 x i32> @llvm.x86.avx512.prorv.d.512(<16 x i32>, <16 x i32>)
3997 define <16 x i32>@test_int_x86_avx512_prorv_d_512(<16 x i32> %x0, <16 x i32> %x1) {
3998 ; CHECK-LABEL: test_int_x86_avx512_prorv_d_512:
4000 ; CHECK-NEXT: vprorvd %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x14,0xc1]
4001 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
4002 %1 = call <16 x i32> @llvm.x86.avx512.prorv.d.512(<16 x i32> %x0, <16 x i32> %x1)
4006 define <16 x i32>@test_int_x86_avx512_mask_prorv_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
4007 ; X86-LABEL: test_int_x86_avx512_mask_prorv_d_512:
4009 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
4010 ; X86-NEXT: vprorvd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x14,0xd1]
4011 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
4012 ; X86-NEXT: retl ## encoding: [0xc3]
4014 ; X64-LABEL: test_int_x86_avx512_mask_prorv_d_512:
4016 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4017 ; X64-NEXT: vprorvd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x14,0xd1]
4018 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
4019 ; X64-NEXT: retq ## encoding: [0xc3]
4020 %1 = call <16 x i32> @llvm.x86.avx512.prorv.d.512(<16 x i32> %x0, <16 x i32> %x1)
4021 %2 = bitcast i16 %x3 to <16 x i1>
4022 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x2
4026 define <16 x i32>@test_int_x86_avx512_maskz_prorv_d_512(<16 x i32> %x0, <16 x i32> %x1, i16 %x3) {
4027 ; X86-LABEL: test_int_x86_avx512_maskz_prorv_d_512:
4029 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
4030 ; X86-NEXT: vprorvd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x14,0xc1]
4031 ; X86-NEXT: retl ## encoding: [0xc3]
4033 ; X64-LABEL: test_int_x86_avx512_maskz_prorv_d_512:
4035 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4036 ; X64-NEXT: vprorvd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x14,0xc1]
4037 ; X64-NEXT: retq ## encoding: [0xc3]
4038 %1 = call <16 x i32> @llvm.x86.avx512.prorv.d.512(<16 x i32> %x0, <16 x i32> %x1)
4039 %2 = bitcast i16 %x3 to <16 x i1>
4040 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer
4044 declare <8 x i64> @llvm.x86.avx512.prorv.q.512(<8 x i64>, <8 x i64>)
4046 define <8 x i64>@test_int_x86_avx512_prorv_q_512(<8 x i64> %x0, <8 x i64> %x1) {
4047 ; CHECK-LABEL: test_int_x86_avx512_prorv_q_512:
4049 ; CHECK-NEXT: vprorvq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x14,0xc1]
4050 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
4051 %1 = call <8 x i64> @llvm.x86.avx512.prorv.q.512(<8 x i64> %x0, <8 x i64> %x1)
4055 define <8 x i64>@test_int_x86_avx512_mask_prorv_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
4056 ; X86-LABEL: test_int_x86_avx512_mask_prorv_q_512:
4058 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
4059 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
4060 ; X86-NEXT: vprorvq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x14,0xd1]
4061 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
4062 ; X86-NEXT: retl ## encoding: [0xc3]
4064 ; X64-LABEL: test_int_x86_avx512_mask_prorv_q_512:
4066 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4067 ; X64-NEXT: vprorvq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x14,0xd1]
4068 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
4069 ; X64-NEXT: retq ## encoding: [0xc3]
4070 %1 = call <8 x i64> @llvm.x86.avx512.prorv.q.512(<8 x i64> %x0, <8 x i64> %x1)
4071 %2 = bitcast i8 %x3 to <8 x i1>
4072 %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x2
4076 define <8 x i64>@test_int_x86_avx512_maskz_prorv_q_512(<8 x i64> %x0, <8 x i64> %x1, i8 %x3) {
4077 ; X86-LABEL: test_int_x86_avx512_maskz_prorv_q_512:
4079 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
4080 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
4081 ; X86-NEXT: vprorvq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x14,0xc1]
4082 ; X86-NEXT: retl ## encoding: [0xc3]
4084 ; X64-LABEL: test_int_x86_avx512_maskz_prorv_q_512:
4086 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4087 ; X64-NEXT: vprorvq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x14,0xc1]
4088 ; X64-NEXT: retq ## encoding: [0xc3]
4089 %1 = call <8 x i64> @llvm.x86.avx512.prorv.q.512(<8 x i64> %x0, <8 x i64> %x1)
4090 %2 = bitcast i8 %x3 to <8 x i1>
4091 %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> zeroinitializer
4095 declare <16 x i32> @llvm.x86.avx512.prol.d.512(<16 x i32>, i32)
4097 define <16 x i32>@test_int_x86_avx512_prol_d_512(<16 x i32> %x0, i32 %x1, <16 x i32> %x2, i16 %x3) {
4098 ; X86-LABEL: test_int_x86_avx512_prol_d_512:
4100 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
4101 ; X86-NEXT: vprold $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xc8,0x03]
4102 ; X86-NEXT: vprold $4, %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0x6d,0xc9,0x72,0xc8,0x04]
4103 ; X86-NEXT: vprold $5, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0x72,0xc8,0x05]
4104 ; X86-NEXT: vpaddd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc0]
4105 ; X86-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0]
4106 ; X86-NEXT: retl ## encoding: [0xc3]
4108 ; X64-LABEL: test_int_x86_avx512_prol_d_512:
4110 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
4111 ; X64-NEXT: vprold $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xc8,0x03]
4112 ; X64-NEXT: vprold $4, %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0x6d,0xc9,0x72,0xc8,0x04]
4113 ; X64-NEXT: vprold $5, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0x72,0xc8,0x05]
4114 ; X64-NEXT: vpaddd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc0]
4115 ; X64-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0]
4116 ; X64-NEXT: retq ## encoding: [0xc3]
4117 %1 = call <16 x i32> @llvm.x86.avx512.prol.d.512(<16 x i32> %x0, i32 3)
4118 %2 = bitcast i16 %x3 to <16 x i1>
4119 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x2
4120 %4 = call <16 x i32> @llvm.x86.avx512.prol.d.512(<16 x i32> %x0, i32 4)
4121 %5 = bitcast i16 %x3 to <16 x i1>
4122 %6 = select <16 x i1> %5, <16 x i32> %4, <16 x i32> zeroinitializer
4123 %7 = call <16 x i32> @llvm.x86.avx512.prol.d.512(<16 x i32> %x0, i32 5)
4124 %res3 = add <16 x i32> %3, %6
4125 %res4 = add <16 x i32> %res3, %7
4126 ret <16 x i32> %res4
4129 declare <8 x i64> @llvm.x86.avx512.prol.q.512(<8 x i64>, i32)
4131 define <8 x i64>@test_int_x86_avx512_prol_q_512(<8 x i64> %x0, i32 %x1, <8 x i64> %x2, i8 %x3) {
4132 ; X86-LABEL: test_int_x86_avx512_prol_q_512:
4134 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x08]
4135 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
4136 ; X86-NEXT: vprolq $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x72,0xc8,0x03]
4137 ; X86-NEXT: vprolq $4, %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0xed,0xc9,0x72,0xc8,0x04]
4138 ; X86-NEXT: vprolq $5, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x72,0xc8,0x05]
4139 ; X86-NEXT: vpaddq %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0]
4140 ; X86-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
4141 ; X86-NEXT: retl ## encoding: [0xc3]
4143 ; X64-LABEL: test_int_x86_avx512_prol_q_512:
4145 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
4146 ; X64-NEXT: vprolq $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x72,0xc8,0x03]
4147 ; X64-NEXT: vprolq $4, %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0xed,0xc9,0x72,0xc8,0x04]
4148 ; X64-NEXT: vprolq $5, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x72,0xc8,0x05]
4149 ; X64-NEXT: vpaddq %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0]
4150 ; X64-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
4151 ; X64-NEXT: retq ## encoding: [0xc3]
4152 %1 = call <8 x i64> @llvm.x86.avx512.prol.q.512(<8 x i64> %x0, i32 3)
4153 %2 = bitcast i8 %x3 to <8 x i1>
4154 %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x2
4155 %4 = call <8 x i64> @llvm.x86.avx512.prol.q.512(<8 x i64> %x0, i32 4)
4156 %5 = bitcast i8 %x3 to <8 x i1>
4157 %6 = select <8 x i1> %5, <8 x i64> %4, <8 x i64> zeroinitializer
4158 %7 = call <8 x i64> @llvm.x86.avx512.prol.q.512(<8 x i64> %x0, i32 5)
4159 %res3 = add <8 x i64> %3, %6
4160 %res4 = add <8 x i64> %res3, %7
4164 declare <16 x i32> @llvm.x86.avx512.pror.d.512(<16 x i32>, i32)
4166 define <16 x i32>@test_int_x86_avx512_pror_d_512(<16 x i32> %x0, i32 %x1, <16 x i32> %x2, i16 %x3) {
4167 ; X86-LABEL: test_int_x86_avx512_pror_d_512:
4169 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
4170 ; X86-NEXT: vprord $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xc0,0x03]
4171 ; X86-NEXT: vprord $4, %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0x6d,0xc9,0x72,0xc0,0x04]
4172 ; X86-NEXT: vprord $5, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0x72,0xc0,0x05]
4173 ; X86-NEXT: vpaddd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc0]
4174 ; X86-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0]
4175 ; X86-NEXT: retl ## encoding: [0xc3]
4177 ; X64-LABEL: test_int_x86_avx512_pror_d_512:
4179 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
4180 ; X64-NEXT: vprord $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xc0,0x03]
4181 ; X64-NEXT: vprord $4, %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0x6d,0xc9,0x72,0xc0,0x04]
4182 ; X64-NEXT: vprord $5, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0x72,0xc0,0x05]
4183 ; X64-NEXT: vpaddd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc0]
4184 ; X64-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0]
4185 ; X64-NEXT: retq ## encoding: [0xc3]
4186 %1 = call <16 x i32> @llvm.x86.avx512.pror.d.512(<16 x i32> %x0, i32 3)
4187 %2 = bitcast i16 %x3 to <16 x i1>
4188 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x2
4189 %4 = call <16 x i32> @llvm.x86.avx512.pror.d.512(<16 x i32> %x0, i32 4)
4190 %5 = bitcast i16 %x3 to <16 x i1>
4191 %6 = select <16 x i1> %5, <16 x i32> %4, <16 x i32> zeroinitializer
4192 %7 = call <16 x i32> @llvm.x86.avx512.pror.d.512(<16 x i32> %x0, i32 5)
4193 %res3 = add <16 x i32> %3, %6
4194 %res4 = add <16 x i32> %res3, %7
4195 ret <16 x i32> %res4
4198 declare <8 x i64> @llvm.x86.avx512.pror.q.512(<8 x i64>, i32)
4200 define <8 x i64>@test_int_x86_avx512_pror_q_512(<8 x i64> %x0, i32 %x1, <8 x i64> %x2, i8 %x3) {
4201 ; X86-LABEL: test_int_x86_avx512_pror_q_512:
4203 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x08]
4204 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
4205 ; X86-NEXT: vprorq $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x72,0xc0,0x03]
4206 ; X86-NEXT: vprorq $4, %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0xed,0xc9,0x72,0xc0,0x04]
4207 ; X86-NEXT: vprorq $5, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x72,0xc0,0x05]
4208 ; X86-NEXT: vpaddq %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0]
4209 ; X86-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
4210 ; X86-NEXT: retl ## encoding: [0xc3]
4212 ; X64-LABEL: test_int_x86_avx512_pror_q_512:
4214 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
4215 ; X64-NEXT: vprorq $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x72,0xc0,0x03]
4216 ; X64-NEXT: vprorq $4, %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0xed,0xc9,0x72,0xc0,0x04]
4217 ; X64-NEXT: vprorq $5, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x72,0xc0,0x05]
4218 ; X64-NEXT: vpaddq %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0]
4219 ; X64-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
4220 ; X64-NEXT: retq ## encoding: [0xc3]
4221 %1 = call <8 x i64> @llvm.x86.avx512.pror.q.512(<8 x i64> %x0, i32 3)
4222 %2 = bitcast i8 %x3 to <8 x i1>
4223 %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x2
4224 %4 = call <8 x i64> @llvm.x86.avx512.pror.q.512(<8 x i64> %x0, i32 4)
4225 %5 = bitcast i8 %x3 to <8 x i1>
4226 %6 = select <8 x i1> %5, <8 x i64> %4, <8 x i64> zeroinitializer
4227 %7 = call <8 x i64> @llvm.x86.avx512.pror.q.512(<8 x i64> %x0, i32 5)
4228 %res3 = add <8 x i64> %3, %6
4229 %res4 = add <8 x i64> %res3, %7
4233 declare <8 x i64> @llvm.x86.avx512.mask.psrl.qi.512(<8 x i64>, i32, <8 x i64>, i8)
4235 define <8 x i64>@test_int_x86_avx512_mask_psrl_qi_512(<8 x i64> %x0, i32 %x1, <8 x i64> %x2, i8 %x3) {
4236 ; X86-LABEL: test_int_x86_avx512_mask_psrl_qi_512:
4238 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x08]
4239 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
4240 ; X86-NEXT: vpsrlq $4, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x73,0xd0,0x04]
4241 ; X86-NEXT: vpsrlq $5, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0xed,0x48,0x73,0xd0,0x05]
4242 ; X86-NEXT: vpsrlq $6, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x73,0xd0,0x06]
4243 ; X86-NEXT: vpaddq %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0]
4244 ; X86-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
4245 ; X86-NEXT: retl ## encoding: [0xc3]
4247 ; X64-LABEL: test_int_x86_avx512_mask_psrl_qi_512:
4249 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
4250 ; X64-NEXT: vpsrlq $4, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x73,0xd0,0x04]
4251 ; X64-NEXT: vpsrlq $5, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0xed,0x48,0x73,0xd0,0x05]
4252 ; X64-NEXT: vpsrlq $6, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x73,0xd0,0x06]
4253 ; X64-NEXT: vpaddq %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0]
4254 ; X64-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
4255 ; X64-NEXT: retq ## encoding: [0xc3]
4256 %res = call <8 x i64> @llvm.x86.avx512.mask.psrl.qi.512(<8 x i64> %x0, i32 4, <8 x i64> %x2, i8 %x3)
4257 %res1 = call <8 x i64> @llvm.x86.avx512.mask.psrl.qi.512(<8 x i64> %x0, i32 5, <8 x i64> %x2, i8 -1)
4258 %res2 = call <8 x i64> @llvm.x86.avx512.mask.psrl.qi.512(<8 x i64> %x0, i32 6, <8 x i64> zeroinitializer, i8 %x3)
4259 %res3 = add <8 x i64> %res, %res1
4260 %res4 = add <8 x i64> %res3, %res2
4264 declare <16 x i32> @llvm.x86.avx512.mask.psrl.di.512(<16 x i32>, i32, <16 x i32>, i16)
4266 define <16 x i32>@test_int_x86_avx512_mask_psrl_di_512(<16 x i32> %x0, i32 %x1, <16 x i32> %x2, i16 %x3) {
4267 ; X86-LABEL: test_int_x86_avx512_mask_psrl_di_512:
4269 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
4270 ; X86-NEXT: vpsrld $4, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xd0,0x04]
4271 ; X86-NEXT: vpsrld $5, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0x72,0xd0,0x05]
4272 ; X86-NEXT: vpsrld $6, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x72,0xd0,0x06]
4273 ; X86-NEXT: vpaddd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc0]
4274 ; X86-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0]
4275 ; X86-NEXT: retl ## encoding: [0xc3]
4277 ; X64-LABEL: test_int_x86_avx512_mask_psrl_di_512:
4279 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
4280 ; X64-NEXT: vpsrld $4, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xd0,0x04]
4281 ; X64-NEXT: vpsrld $5, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0x72,0xd0,0x05]
4282 ; X64-NEXT: vpsrld $6, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x72,0xd0,0x06]
4283 ; X64-NEXT: vpaddd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc0]
4284 ; X64-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0]
4285 ; X64-NEXT: retq ## encoding: [0xc3]
4286 %res = call <16 x i32> @llvm.x86.avx512.mask.psrl.di.512(<16 x i32> %x0, i32 4, <16 x i32> %x2, i16 %x3)
4287 %res1 = call <16 x i32> @llvm.x86.avx512.mask.psrl.di.512(<16 x i32> %x0, i32 5, <16 x i32> %x2, i16 -1)
4288 %res2 = call <16 x i32> @llvm.x86.avx512.mask.psrl.di.512(<16 x i32> %x0, i32 6, <16 x i32> zeroinitializer, i16 %x3)
4289 %res3 = add <16 x i32> %res, %res1
4290 %res4 = add <16 x i32> %res3, %res2
4291 ret <16 x i32> %res4
4294 declare <16 x i32> @llvm.x86.avx512.mask.psra.di.512(<16 x i32>, i32, <16 x i32>, i16)
4296 define <16 x i32>@test_int_x86_avx512_mask_psra_di_512(<16 x i32> %x0, i32 %x1, <16 x i32> %x2, i16 %x3) {
4297 ; X86-LABEL: test_int_x86_avx512_mask_psra_di_512:
4299 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
4300 ; X86-NEXT: vpsrad $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xe0,0x03]
4301 ; X86-NEXT: vpsrad $4, %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0x6d,0xc9,0x72,0xe0,0x04]
4302 ; X86-NEXT: vpsrad $5, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0x72,0xe0,0x05]
4303 ; X86-NEXT: vpaddd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc0]
4304 ; X86-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0]
4305 ; X86-NEXT: retl ## encoding: [0xc3]
4307 ; X64-LABEL: test_int_x86_avx512_mask_psra_di_512:
4309 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
4310 ; X64-NEXT: vpsrad $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xe0,0x03]
4311 ; X64-NEXT: vpsrad $4, %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0x6d,0xc9,0x72,0xe0,0x04]
4312 ; X64-NEXT: vpsrad $5, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0x72,0xe0,0x05]
4313 ; X64-NEXT: vpaddd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc0]
4314 ; X64-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0]
4315 ; X64-NEXT: retq ## encoding: [0xc3]
4316 %res = call <16 x i32> @llvm.x86.avx512.mask.psra.di.512(<16 x i32> %x0, i32 3, <16 x i32> %x2, i16 %x3)
4317 %res1 = call <16 x i32> @llvm.x86.avx512.mask.psra.di.512(<16 x i32> %x0, i32 4, <16 x i32> zeroinitializer, i16 %x3)
4318 %res2 = call <16 x i32> @llvm.x86.avx512.mask.psra.di.512(<16 x i32> %x0, i32 5, <16 x i32> %x2, i16 -1)
4319 %res3 = add <16 x i32> %res, %res1
4320 %res4 = add <16 x i32> %res3, %res2
4321 ret <16 x i32> %res4
4324 declare <8 x i64> @llvm.x86.avx512.mask.psra.qi.512(<8 x i64>, i32, <8 x i64>, i8)
4326 define <8 x i64>@test_int_x86_avx512_mask_psra_qi_512(<8 x i64> %x0, i32 %x1, <8 x i64> %x2, i8 %x3) {
4327 ; X86-LABEL: test_int_x86_avx512_mask_psra_qi_512:
4329 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x08]
4330 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
4331 ; X86-NEXT: vpsraq $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x72,0xe0,0x03]
4332 ; X86-NEXT: vpsraq $4, %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0xed,0xc9,0x72,0xe0,0x04]
4333 ; X86-NEXT: vpsraq $5, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x72,0xe0,0x05]
4334 ; X86-NEXT: vpaddq %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0]
4335 ; X86-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
4336 ; X86-NEXT: retl ## encoding: [0xc3]
4338 ; X64-LABEL: test_int_x86_avx512_mask_psra_qi_512:
4340 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
4341 ; X64-NEXT: vpsraq $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x72,0xe0,0x03]
4342 ; X64-NEXT: vpsraq $4, %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0xed,0xc9,0x72,0xe0,0x04]
4343 ; X64-NEXT: vpsraq $5, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x72,0xe0,0x05]
4344 ; X64-NEXT: vpaddq %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0]
4345 ; X64-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
4346 ; X64-NEXT: retq ## encoding: [0xc3]
4347 %res = call <8 x i64> @llvm.x86.avx512.mask.psra.qi.512(<8 x i64> %x0, i32 3, <8 x i64> %x2, i8 %x3)
4348 %res1 = call <8 x i64> @llvm.x86.avx512.mask.psra.qi.512(<8 x i64> %x0, i32 4, <8 x i64> zeroinitializer, i8 %x3)
4349 %res2 = call <8 x i64> @llvm.x86.avx512.mask.psra.qi.512(<8 x i64> %x0, i32 5, <8 x i64> %x2, i8 -1)
4350 %res3 = add <8 x i64> %res, %res1
4351 %res4 = add <8 x i64> %res3, %res2
4355 declare <16 x i32> @llvm.x86.avx512.mask.psll.di.512(<16 x i32>, i32, <16 x i32>, i16)
4357 define <16 x i32>@test_int_x86_avx512_mask_psll_di_512(<16 x i32> %x0, i32 %x1, <16 x i32> %x2, i16 %x3) {
4358 ; X86-LABEL: test_int_x86_avx512_mask_psll_di_512:
4360 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
4361 ; X86-NEXT: vpslld $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xf0,0x03]
4362 ; X86-NEXT: vpslld $4, %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0x6d,0xc9,0x72,0xf0,0x04]
4363 ; X86-NEXT: vpslld $5, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0x72,0xf0,0x05]
4364 ; X86-NEXT: vpaddd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc0]
4365 ; X86-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0]
4366 ; X86-NEXT: retl ## encoding: [0xc3]
4368 ; X64-LABEL: test_int_x86_avx512_mask_psll_di_512:
4370 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
4371 ; X64-NEXT: vpslld $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xf0,0x03]
4372 ; X64-NEXT: vpslld $4, %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0x6d,0xc9,0x72,0xf0,0x04]
4373 ; X64-NEXT: vpslld $5, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0x72,0xf0,0x05]
4374 ; X64-NEXT: vpaddd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc0]
4375 ; X64-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0]
4376 ; X64-NEXT: retq ## encoding: [0xc3]
4377 %res = call <16 x i32> @llvm.x86.avx512.mask.psll.di.512(<16 x i32> %x0, i32 3, <16 x i32> %x2, i16 %x3)
4378 %res1 = call <16 x i32> @llvm.x86.avx512.mask.psll.di.512(<16 x i32> %x0, i32 4, <16 x i32> zeroinitializer, i16 %x3)
4379 %res2 = call <16 x i32> @llvm.x86.avx512.mask.psll.di.512(<16 x i32> %x0, i32 5, <16 x i32> %x2, i16 -1)
4380 %res3 = add <16 x i32> %res, %res1
4381 %res4 = add <16 x i32> %res3, %res2
4382 ret <16 x i32> %res4
4385 declare <8 x i64> @llvm.x86.avx512.mask.psll.qi.512(<8 x i64>, i32, <8 x i64>, i8)
4387 define <8 x i64>@test_int_x86_avx512_mask_psll_qi_512(<8 x i64> %x0, i32 %x1, <8 x i64> %x2, i8 %x3) {
4388 ; X86-LABEL: test_int_x86_avx512_mask_psll_qi_512:
4390 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x08]
4391 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
4392 ; X86-NEXT: vpsllq $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x73,0xf0,0x03]
4393 ; X86-NEXT: vpsllq $4, %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0xed,0xc9,0x73,0xf0,0x04]
4394 ; X86-NEXT: vpsllq $5, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x73,0xf0,0x05]
4395 ; X86-NEXT: vpaddq %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0]
4396 ; X86-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
4397 ; X86-NEXT: retl ## encoding: [0xc3]
4399 ; X64-LABEL: test_int_x86_avx512_mask_psll_qi_512:
4401 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
4402 ; X64-NEXT: vpsllq $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x73,0xf0,0x03]
4403 ; X64-NEXT: vpsllq $4, %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0xed,0xc9,0x73,0xf0,0x04]
4404 ; X64-NEXT: vpsllq $5, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x73,0xf0,0x05]
4405 ; X64-NEXT: vpaddq %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0]
4406 ; X64-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
4407 ; X64-NEXT: retq ## encoding: [0xc3]
4408 %res = call <8 x i64> @llvm.x86.avx512.mask.psll.qi.512(<8 x i64> %x0, i32 3, <8 x i64> %x2, i8 %x3)
4409 %res1 = call <8 x i64> @llvm.x86.avx512.mask.psll.qi.512(<8 x i64> %x0, i32 4, <8 x i64> zeroinitializer, i8 %x3)
4410 %res2 = call <8 x i64> @llvm.x86.avx512.mask.psll.qi.512(<8 x i64> %x0, i32 5, <8 x i64> %x2, i8 -1)
4411 %res3 = add <8 x i64> %res, %res1
4412 %res4 = add <8 x i64> %res3, %res2
4416 define <16 x i32> @test_x86_avx512_psll_d(<16 x i32> %a0, <4 x i32> %a1) {
4417 ; CHECK-LABEL: test_x86_avx512_psll_d:
4419 ; CHECK-NEXT: vpslld %xmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xf2,0xc1]
4420 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
4421 %res = call <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
4425 define <16 x i32> @test_x86_avx512_mask_psll_d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) {
4426 ; X86-LABEL: test_x86_avx512_mask_psll_d:
4428 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
4429 ; X86-NEXT: vpslld %xmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xf2,0xd1]
4430 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
4431 ; X86-NEXT: retl ## encoding: [0xc3]
4433 ; X64-LABEL: test_x86_avx512_mask_psll_d:
4435 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4436 ; X64-NEXT: vpslld %xmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xf2,0xd1]
4437 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
4438 ; X64-NEXT: retq ## encoding: [0xc3]
4439 %res = call <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask)
4443 define <16 x i32> @test_x86_avx512_maskz_psll_d(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) {
4444 ; X86-LABEL: test_x86_avx512_maskz_psll_d:
4446 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
4447 ; X86-NEXT: vpslld %xmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xf2,0xc1]
4448 ; X86-NEXT: retl ## encoding: [0xc3]
4450 ; X64-LABEL: test_x86_avx512_maskz_psll_d:
4452 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4453 ; X64-NEXT: vpslld %xmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xf2,0xc1]
4454 ; X64-NEXT: retq ## encoding: [0xc3]
4455 %res = call <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
4459 declare <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32>, <4 x i32>, <16 x i32>, i16) nounwind readnone
4461 define <8 x i64> @test_x86_avx512_psll_q(<8 x i64> %a0, <2 x i64> %a1) {
4462 ; CHECK-LABEL: test_x86_avx512_psll_q:
4464 ; CHECK-NEXT: vpsllq %xmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xf3,0xc1]
4465 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
4466 %res = call <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
4470 define <8 x i64> @test_x86_avx512_mask_psll_q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) {
4471 ; X86-LABEL: test_x86_avx512_mask_psll_q:
4473 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
4474 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
4475 ; X86-NEXT: vpsllq %xmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xf3,0xd1]
4476 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
4477 ; X86-NEXT: retl ## encoding: [0xc3]
4479 ; X64-LABEL: test_x86_avx512_mask_psll_q:
4481 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4482 ; X64-NEXT: vpsllq %xmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xf3,0xd1]
4483 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
4484 ; X64-NEXT: retq ## encoding: [0xc3]
4485 %res = call <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask)
4489 define <8 x i64> @test_x86_avx512_maskz_psll_q(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) {
4490 ; X86-LABEL: test_x86_avx512_maskz_psll_q:
4492 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
4493 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
4494 ; X86-NEXT: vpsllq %xmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xf3,0xc1]
4495 ; X86-NEXT: retl ## encoding: [0xc3]
4497 ; X64-LABEL: test_x86_avx512_maskz_psll_q:
4499 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4500 ; X64-NEXT: vpsllq %xmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xf3,0xc1]
4501 ; X64-NEXT: retq ## encoding: [0xc3]
4502 %res = call <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
4506 declare <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64>, <2 x i64>, <8 x i64>, i8) nounwind readnone
4508 define <16 x i32> @test_x86_avx512_psrl_d(<16 x i32> %a0, <4 x i32> %a1) {
4509 ; CHECK-LABEL: test_x86_avx512_psrl_d:
4511 ; CHECK-NEXT: vpsrld %xmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xd2,0xc1]
4512 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
4513 %res = call <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
4517 define <16 x i32> @test_x86_avx512_mask_psrl_d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) {
4518 ; X86-LABEL: test_x86_avx512_mask_psrl_d:
4520 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
4521 ; X86-NEXT: vpsrld %xmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xd2,0xd1]
4522 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
4523 ; X86-NEXT: retl ## encoding: [0xc3]
4525 ; X64-LABEL: test_x86_avx512_mask_psrl_d:
4527 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4528 ; X64-NEXT: vpsrld %xmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xd2,0xd1]
4529 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
4530 ; X64-NEXT: retq ## encoding: [0xc3]
4531 %res = call <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask)
4535 define <16 x i32> @test_x86_avx512_maskz_psrl_d(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) {
4536 ; X86-LABEL: test_x86_avx512_maskz_psrl_d:
4538 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
4539 ; X86-NEXT: vpsrld %xmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xd2,0xc1]
4540 ; X86-NEXT: retl ## encoding: [0xc3]
4542 ; X64-LABEL: test_x86_avx512_maskz_psrl_d:
4544 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4545 ; X64-NEXT: vpsrld %xmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xd2,0xc1]
4546 ; X64-NEXT: retq ## encoding: [0xc3]
4547 %res = call <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
4551 declare <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32>, <4 x i32>, <16 x i32>, i16) nounwind readnone
4553 define <8 x i64> @test_x86_avx512_psrl_q(<8 x i64> %a0, <2 x i64> %a1) {
4554 ; CHECK-LABEL: test_x86_avx512_psrl_q:
4556 ; CHECK-NEXT: vpsrlq %xmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd3,0xc1]
4557 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
4558 %res = call <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
4562 define <8 x i64> @test_x86_avx512_mask_psrl_q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) {
4563 ; X86-LABEL: test_x86_avx512_mask_psrl_q:
4565 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
4566 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
4567 ; X86-NEXT: vpsrlq %xmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xd3,0xd1]
4568 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
4569 ; X86-NEXT: retl ## encoding: [0xc3]
4571 ; X64-LABEL: test_x86_avx512_mask_psrl_q:
4573 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4574 ; X64-NEXT: vpsrlq %xmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xd3,0xd1]
4575 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
4576 ; X64-NEXT: retq ## encoding: [0xc3]
4577 %res = call <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask)
4581 define <8 x i64> @test_x86_avx512_maskz_psrl_q(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) {
4582 ; X86-LABEL: test_x86_avx512_maskz_psrl_q:
4584 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
4585 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
4586 ; X86-NEXT: vpsrlq %xmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xd3,0xc1]
4587 ; X86-NEXT: retl ## encoding: [0xc3]
4589 ; X64-LABEL: test_x86_avx512_maskz_psrl_q:
4591 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4592 ; X64-NEXT: vpsrlq %xmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xd3,0xc1]
4593 ; X64-NEXT: retq ## encoding: [0xc3]
4594 %res = call <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
4598 declare <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64>, <2 x i64>, <8 x i64>, i8) nounwind readnone
4600 define <16 x i32> @test_x86_avx512_psra_d(<16 x i32> %a0, <4 x i32> %a1) {
4601 ; CHECK-LABEL: test_x86_avx512_psra_d:
4603 ; CHECK-NEXT: vpsrad %xmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xe2,0xc1]
4604 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
4605 %res = call <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
4609 define <16 x i32> @test_x86_avx512_mask_psra_d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) {
4610 ; X86-LABEL: test_x86_avx512_mask_psra_d:
4612 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
4613 ; X86-NEXT: vpsrad %xmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xe2,0xd1]
4614 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
4615 ; X86-NEXT: retl ## encoding: [0xc3]
4617 ; X64-LABEL: test_x86_avx512_mask_psra_d:
4619 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4620 ; X64-NEXT: vpsrad %xmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xe2,0xd1]
4621 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
4622 ; X64-NEXT: retq ## encoding: [0xc3]
4623 %res = call <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask)
4627 define <16 x i32> @test_x86_avx512_maskz_psra_d(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) {
4628 ; X86-LABEL: test_x86_avx512_maskz_psra_d:
4630 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
4631 ; X86-NEXT: vpsrad %xmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xe2,0xc1]
4632 ; X86-NEXT: retl ## encoding: [0xc3]
4634 ; X64-LABEL: test_x86_avx512_maskz_psra_d:
4636 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4637 ; X64-NEXT: vpsrad %xmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xe2,0xc1]
4638 ; X64-NEXT: retq ## encoding: [0xc3]
4639 %res = call <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
4643 declare <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32>, <4 x i32>, <16 x i32>, i16) nounwind readnone
4645 define <8 x i64> @test_x86_avx512_psra_q(<8 x i64> %a0, <2 x i64> %a1) {
4646 ; CHECK-LABEL: test_x86_avx512_psra_q:
4648 ; CHECK-NEXT: vpsraq %xmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xe2,0xc1]
4649 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
4650 %res = call <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
4654 define <8 x i64> @test_x86_avx512_mask_psra_q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) {
4655 ; X86-LABEL: test_x86_avx512_mask_psra_q:
4657 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
4658 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
4659 ; X86-NEXT: vpsraq %xmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xe2,0xd1]
4660 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
4661 ; X86-NEXT: retl ## encoding: [0xc3]
4663 ; X64-LABEL: test_x86_avx512_mask_psra_q:
4665 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4666 ; X64-NEXT: vpsraq %xmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xe2,0xd1]
4667 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
4668 ; X64-NEXT: retq ## encoding: [0xc3]
4669 %res = call <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask)
4673 define <8 x i64> @test_x86_avx512_maskz_psra_q(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) {
4674 ; X86-LABEL: test_x86_avx512_maskz_psra_q:
4676 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
4677 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
4678 ; X86-NEXT: vpsraq %xmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xe2,0xc1]
4679 ; X86-NEXT: retl ## encoding: [0xc3]
4681 ; X64-LABEL: test_x86_avx512_maskz_psra_q:
4683 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4684 ; X64-NEXT: vpsraq %xmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xe2,0xc1]
4685 ; X64-NEXT: retq ## encoding: [0xc3]
4686 %res = call <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
4690 declare <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64>, <2 x i64>, <8 x i64>, i8) nounwind readnone
4692 define <16 x i32> @test_x86_avx512_psllv_d(<16 x i32> %a0, <16 x i32> %a1) {
4693 ; CHECK-LABEL: test_x86_avx512_psllv_d:
4695 ; CHECK-NEXT: vpsllvd %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x47,0xc1]
4696 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
4697 %res = call <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
4701 define <16 x i32> @test_x86_avx512_mask_psllv_d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) {
4702 ; X86-LABEL: test_x86_avx512_mask_psllv_d:
4704 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
4705 ; X86-NEXT: vpsllvd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x47,0xd1]
4706 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
4707 ; X86-NEXT: retl ## encoding: [0xc3]
4709 ; X64-LABEL: test_x86_avx512_mask_psllv_d:
4711 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4712 ; X64-NEXT: vpsllvd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x47,0xd1]
4713 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
4714 ; X64-NEXT: retq ## encoding: [0xc3]
4715 %res = call <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask)
4719 define <16 x i32> @test_x86_avx512_maskz_psllv_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
4720 ; X86-LABEL: test_x86_avx512_maskz_psllv_d:
4722 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
4723 ; X86-NEXT: vpsllvd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x47,0xc1]
4724 ; X86-NEXT: retl ## encoding: [0xc3]
4726 ; X64-LABEL: test_x86_avx512_maskz_psllv_d:
4728 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4729 ; X64-NEXT: vpsllvd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x47,0xc1]
4730 ; X64-NEXT: retq ## encoding: [0xc3]
4731 %res = call <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
4735 declare <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32>, <16 x i32>, <16 x i32>, i16) nounwind readnone
4737 define <8 x i64> @test_x86_avx512_psllv_q(<8 x i64> %a0, <8 x i64> %a1) {
4738 ; CHECK-LABEL: test_x86_avx512_psllv_q:
4740 ; CHECK-NEXT: vpsllvq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x47,0xc1]
4741 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
4742 %res = call <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
4746 define <8 x i64> @test_x86_avx512_mask_psllv_q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) {
4747 ; X86-LABEL: test_x86_avx512_mask_psllv_q:
4749 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
4750 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
4751 ; X86-NEXT: vpsllvq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x47,0xd1]
4752 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
4753 ; X86-NEXT: retl ## encoding: [0xc3]
4755 ; X64-LABEL: test_x86_avx512_mask_psllv_q:
4757 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4758 ; X64-NEXT: vpsllvq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x47,0xd1]
4759 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
4760 ; X64-NEXT: retq ## encoding: [0xc3]
4761 %res = call <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask)
4765 define <8 x i64> @test_x86_avx512_maskz_psllv_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
4766 ; X86-LABEL: test_x86_avx512_maskz_psllv_q:
4768 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
4769 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
4770 ; X86-NEXT: vpsllvq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x47,0xc1]
4771 ; X86-NEXT: retl ## encoding: [0xc3]
4773 ; X64-LABEL: test_x86_avx512_maskz_psllv_q:
4775 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4776 ; X64-NEXT: vpsllvq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x47,0xc1]
4777 ; X64-NEXT: retq ## encoding: [0xc3]
4778 %res = call <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
4782 declare <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64>, <8 x i64>, <8 x i64>, i8) nounwind readnone
4785 define <16 x i32> @test_x86_avx512_psrav_d(<16 x i32> %a0, <16 x i32> %a1) {
4786 ; CHECK-LABEL: test_x86_avx512_psrav_d:
4788 ; CHECK-NEXT: vpsravd %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x46,0xc1]
4789 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
4790 %res = call <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
4794 define <16 x i32> @test_x86_avx512_mask_psrav_d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) {
4795 ; X86-LABEL: test_x86_avx512_mask_psrav_d:
4797 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
4798 ; X86-NEXT: vpsravd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x46,0xd1]
4799 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
4800 ; X86-NEXT: retl ## encoding: [0xc3]
4802 ; X64-LABEL: test_x86_avx512_mask_psrav_d:
4804 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4805 ; X64-NEXT: vpsravd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x46,0xd1]
4806 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
4807 ; X64-NEXT: retq ## encoding: [0xc3]
4808 %res = call <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask)
4812 define <16 x i32> @test_x86_avx512_maskz_psrav_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
4813 ; X86-LABEL: test_x86_avx512_maskz_psrav_d:
4815 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
4816 ; X86-NEXT: vpsravd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x46,0xc1]
4817 ; X86-NEXT: retl ## encoding: [0xc3]
4819 ; X64-LABEL: test_x86_avx512_maskz_psrav_d:
4821 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4822 ; X64-NEXT: vpsravd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x46,0xc1]
4823 ; X64-NEXT: retq ## encoding: [0xc3]
4824 %res = call <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
4828 declare <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32>, <16 x i32>, <16 x i32>, i16) nounwind readnone
4830 define <8 x i64> @test_x86_avx512_psrav_q(<8 x i64> %a0, <8 x i64> %a1) {
4831 ; CHECK-LABEL: test_x86_avx512_psrav_q:
4833 ; CHECK-NEXT: vpsravq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x46,0xc1]
4834 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
4835 %res = call <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
4839 define <8 x i64> @test_x86_avx512_mask_psrav_q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) {
4840 ; X86-LABEL: test_x86_avx512_mask_psrav_q:
4842 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
4843 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
4844 ; X86-NEXT: vpsravq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x46,0xd1]
4845 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
4846 ; X86-NEXT: retl ## encoding: [0xc3]
4848 ; X64-LABEL: test_x86_avx512_mask_psrav_q:
4850 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4851 ; X64-NEXT: vpsravq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x46,0xd1]
4852 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
4853 ; X64-NEXT: retq ## encoding: [0xc3]
4854 %res = call <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask)
4858 define <8 x i64> @test_x86_avx512_maskz_psrav_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
4859 ; X86-LABEL: test_x86_avx512_maskz_psrav_q:
4861 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
4862 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
4863 ; X86-NEXT: vpsravq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x46,0xc1]
4864 ; X86-NEXT: retl ## encoding: [0xc3]
4866 ; X64-LABEL: test_x86_avx512_maskz_psrav_q:
4868 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4869 ; X64-NEXT: vpsravq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x46,0xc1]
4870 ; X64-NEXT: retq ## encoding: [0xc3]
4871 %res = call <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
4875 declare <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64>, <8 x i64>, <8 x i64>, i8) nounwind readnone
4877 define <16 x i32> @test_x86_avx512_psrlv_d(<16 x i32> %a0, <16 x i32> %a1) {
4878 ; CHECK-LABEL: test_x86_avx512_psrlv_d:
4880 ; CHECK-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x45,0xc1]
4881 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
4882 %res = call <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
4886 define <16 x i32> @test_x86_avx512_mask_psrlv_d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) {
4887 ; X86-LABEL: test_x86_avx512_mask_psrlv_d:
4889 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
4890 ; X86-NEXT: vpsrlvd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x45,0xd1]
4891 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
4892 ; X86-NEXT: retl ## encoding: [0xc3]
4894 ; X64-LABEL: test_x86_avx512_mask_psrlv_d:
4896 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4897 ; X64-NEXT: vpsrlvd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x45,0xd1]
4898 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
4899 ; X64-NEXT: retq ## encoding: [0xc3]
4900 %res = call <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask)
4904 define <16 x i32> @test_x86_avx512_maskz_psrlv_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
4905 ; X86-LABEL: test_x86_avx512_maskz_psrlv_d:
4907 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
4908 ; X86-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x45,0xc1]
4909 ; X86-NEXT: retl ## encoding: [0xc3]
4911 ; X64-LABEL: test_x86_avx512_maskz_psrlv_d:
4913 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4914 ; X64-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x45,0xc1]
4915 ; X64-NEXT: retq ## encoding: [0xc3]
4916 %res = call <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
4920 declare <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32>, <16 x i32>, <16 x i32>, i16) nounwind readnone
4922 define <8 x i64> @test_x86_avx512_psrlv_q(<8 x i64> %a0, <8 x i64> %a1) {
4923 ; CHECK-LABEL: test_x86_avx512_psrlv_q:
4925 ; CHECK-NEXT: vpsrlvq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x45,0xc1]
4926 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
4927 %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
4931 define <8 x i64> @test_x86_avx512_mask_psrlv_q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) {
4932 ; X86-LABEL: test_x86_avx512_mask_psrlv_q:
4934 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
4935 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
4936 ; X86-NEXT: vpsrlvq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x45,0xd1]
4937 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
4938 ; X86-NEXT: retl ## encoding: [0xc3]
4940 ; X64-LABEL: test_x86_avx512_mask_psrlv_q:
4942 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4943 ; X64-NEXT: vpsrlvq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x45,0xd1]
4944 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
4945 ; X64-NEXT: retq ## encoding: [0xc3]
4946 %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask)
4950 define <8 x i64> @test_x86_avx512_maskz_psrlv_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
4951 ; X86-LABEL: test_x86_avx512_maskz_psrlv_q:
4953 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
4954 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
4955 ; X86-NEXT: vpsrlvq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x45,0xc1]
4956 ; X86-NEXT: retl ## encoding: [0xc3]
4958 ; X64-LABEL: test_x86_avx512_maskz_psrlv_q:
4960 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
4961 ; X64-NEXT: vpsrlvq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x45,0xc1]
4962 ; X64-NEXT: retq ## encoding: [0xc3]
4963 %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
4967 declare <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64>, <8 x i64>, <8 x i64>, i8) nounwind readnone
4969 define <8 x i64> @test_x86_avx512_psrlv_q_memop(<8 x i64> %a0, <8 x i64>* %ptr) {
4970 ; X86-LABEL: test_x86_avx512_psrlv_q_memop:
4972 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
4973 ; X86-NEXT: vpsrlvq (%eax), %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x45,0x00]
4974 ; X86-NEXT: retl ## encoding: [0xc3]
4976 ; X64-LABEL: test_x86_avx512_psrlv_q_memop:
4978 ; X64-NEXT: vpsrlvq (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x45,0x07]
4979 ; X64-NEXT: retq ## encoding: [0xc3]
4980 %b = load <8 x i64>, <8 x i64>* %ptr
4981 %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
4985 declare <8 x double> @llvm.x86.avx512.mask.cvtdq2pd.512(<8 x i32>, <8 x double>, i8)
4987 define <8 x double>@test_int_x86_avx512_cvt_dq2pd_512(<8 x i32> %x0, <8 x double> %x1) {
4988 ; CHECK-LABEL: test_int_x86_avx512_cvt_dq2pd_512:
4990 ; CHECK-NEXT: vcvtdq2pd %ymm0, %zmm0 ## encoding: [0x62,0xf1,0x7e,0x48,0xe6,0xc0]
4991 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
4992 %res = call <8 x double> @llvm.x86.avx512.mask.cvtdq2pd.512(<8 x i32> %x0, <8 x double> %x1, i8 -1)
4993 ret <8 x double> %res
4996 define <8 x double>@test_int_x86_avx512_mask_cvt_dq2pd_512(<8 x i32> %x0, <8 x double> %x1, i8 %x2) {
4997 ; X86-LABEL: test_int_x86_avx512_mask_cvt_dq2pd_512:
4999 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
5000 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
5001 ; X86-NEXT: vcvtdq2pd %ymm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x49,0xe6,0xc8]
5002 ; X86-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
5003 ; X86-NEXT: retl ## encoding: [0xc3]
5005 ; X64-LABEL: test_int_x86_avx512_mask_cvt_dq2pd_512:
5007 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5008 ; X64-NEXT: vcvtdq2pd %ymm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x49,0xe6,0xc8]
5009 ; X64-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
5010 ; X64-NEXT: retq ## encoding: [0xc3]
5011 %res = call <8 x double> @llvm.x86.avx512.mask.cvtdq2pd.512(<8 x i32> %x0, <8 x double> %x1, i8 %x2)
5012 ret <8 x double> %res
5015 declare <8 x double> @llvm.x86.avx512.mask.cvtudq2pd.512(<8 x i32>, <8 x double>, i8)
5017 define <8 x double>@test_int_x86_avx512_cvt_udq2pd_512(<8 x i32> %x0, <8 x double> %x1) {
5018 ; CHECK-LABEL: test_int_x86_avx512_cvt_udq2pd_512:
5020 ; CHECK-NEXT: vcvtudq2pd %ymm0, %zmm0 ## encoding: [0x62,0xf1,0x7e,0x48,0x7a,0xc0]
5021 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
5022 %res = call <8 x double> @llvm.x86.avx512.mask.cvtudq2pd.512(<8 x i32> %x0, <8 x double> %x1, i8 -1)
5023 ret <8 x double> %res
5026 define <8 x double>@test_int_x86_avx512_mask_cvt_udq2pd_512(<8 x i32> %x0, <8 x double> %x1, i8 %x2) {
5027 ; X86-LABEL: test_int_x86_avx512_mask_cvt_udq2pd_512:
5029 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
5030 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
5031 ; X86-NEXT: vcvtudq2pd %ymm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x49,0x7a,0xc8]
5032 ; X86-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
5033 ; X86-NEXT: retl ## encoding: [0xc3]
5035 ; X64-LABEL: test_int_x86_avx512_mask_cvt_udq2pd_512:
5037 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5038 ; X64-NEXT: vcvtudq2pd %ymm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x49,0x7a,0xc8]
5039 ; X64-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
5040 ; X64-NEXT: retq ## encoding: [0xc3]
5041 %res = call <8 x double> @llvm.x86.avx512.mask.cvtudq2pd.512(<8 x i32> %x0, <8 x double> %x1, i8 %x2)
5042 ret <8 x double> %res
5045 define <16 x float> @test_x86_vcvtph2ps_512(<16 x i16> %a0) {
5046 ; CHECK-LABEL: test_x86_vcvtph2ps_512:
5048 ; CHECK-NEXT: vcvtph2ps %ymm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x13,0xc0]
5049 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
5050 %res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> zeroinitializer, i16 -1, i32 4)
5051 ret <16 x float> %res
5054 define <16 x float> @test_x86_vcvtph2ps_512_sae(<16 x i16> %a0) {
5055 ; CHECK-LABEL: test_x86_vcvtph2ps_512_sae:
5057 ; CHECK-NEXT: vcvtph2ps {sae}, %ymm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x18,0x13,0xc0]
5058 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
5059 %res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> zeroinitializer, i16 -1, i32 8)
5060 ret <16 x float> %res
5063 define <16 x float> @test_x86_vcvtph2ps_512_rrk(<16 x i16> %a0,<16 x float> %a1, i16 %mask) {
5064 ; X86-LABEL: test_x86_vcvtph2ps_512_rrk:
5066 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
5067 ; X86-NEXT: vcvtph2ps %ymm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x13,0xc8]
5068 ; X86-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
5069 ; X86-NEXT: retl ## encoding: [0xc3]
5071 ; X64-LABEL: test_x86_vcvtph2ps_512_rrk:
5073 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5074 ; X64-NEXT: vcvtph2ps %ymm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x13,0xc8]
5075 ; X64-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
5076 ; X64-NEXT: retq ## encoding: [0xc3]
5077 %res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> %a1, i16 %mask, i32 4)
5078 ret <16 x float> %res
5081 define <16 x float> @test_x86_vcvtph2ps_512_sae_rrkz(<16 x i16> %a0, i16 %mask) {
5082 ; X86-LABEL: test_x86_vcvtph2ps_512_sae_rrkz:
5084 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
5085 ; X86-NEXT: vcvtph2ps {sae}, %ymm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x99,0x13,0xc0]
5086 ; X86-NEXT: retl ## encoding: [0xc3]
5088 ; X64-LABEL: test_x86_vcvtph2ps_512_sae_rrkz:
5090 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5091 ; X64-NEXT: vcvtph2ps {sae}, %ymm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x99,0x13,0xc0]
5092 ; X64-NEXT: retq ## encoding: [0xc3]
5093 %res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> zeroinitializer, i16 %mask, i32 8)
5094 ret <16 x float> %res
5097 define <16 x float> @test_x86_vcvtph2ps_512_rrkz(<16 x i16> %a0, i16 %mask) {
5098 ; X86-LABEL: test_x86_vcvtph2ps_512_rrkz:
5100 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
5101 ; X86-NEXT: vcvtph2ps %ymm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x13,0xc0]
5102 ; X86-NEXT: retl ## encoding: [0xc3]
5104 ; X64-LABEL: test_x86_vcvtph2ps_512_rrkz:
5106 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5107 ; X64-NEXT: vcvtph2ps %ymm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x13,0xc0]
5108 ; X64-NEXT: retq ## encoding: [0xc3]
5109 %res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> zeroinitializer, i16 %mask, i32 4)
5110 ret <16 x float> %res
5113 declare <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16>, <16 x float>, i16, i32) nounwind readonly
5115 define <8 x i64> @test_valign_q(<8 x i64> %a, <8 x i64> %b) {
5116 ; CHECK-LABEL: test_valign_q:
5118 ; CHECK-NEXT: valignq $2, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf3,0xfd,0x48,0x03,0xc1,0x02]
5119 ; CHECK-NEXT: ## zmm0 = zmm1[2,3,4,5,6,7],zmm0[0,1]
5120 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
5121 %res = call <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64> %a, <8 x i64> %b, i32 2, <8 x i64> zeroinitializer, i8 -1)
5125 define <8 x i64> @test_mask_valign_q(<8 x i64> %a, <8 x i64> %b, <8 x i64> %src, i8 %mask) {
5126 ; X86-LABEL: test_mask_valign_q:
5128 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
5129 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
5130 ; X86-NEXT: valignq $2, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x03,0xd1,0x02]
5131 ; X86-NEXT: ## zmm2 {%k1} = zmm1[2,3,4,5,6,7],zmm0[0,1]
5132 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
5133 ; X86-NEXT: retl ## encoding: [0xc3]
5135 ; X64-LABEL: test_mask_valign_q:
5137 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5138 ; X64-NEXT: valignq $2, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x03,0xd1,0x02]
5139 ; X64-NEXT: ## zmm2 {%k1} = zmm1[2,3,4,5,6,7],zmm0[0,1]
5140 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
5141 ; X64-NEXT: retq ## encoding: [0xc3]
5142 %res = call <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64> %a, <8 x i64> %b, i32 2, <8 x i64> %src, i8 %mask)
5146 declare <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64>, <8 x i64>, i32, <8 x i64>, i8)
5148 define <16 x i32> @test_maskz_valign_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
5149 ; X86-LABEL: test_maskz_valign_d:
5151 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
5152 ; X86-NEXT: valignd $5, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xc9,0x03,0xc1,0x05]
5153 ; X86-NEXT: ## zmm0 {%k1} {z} = zmm1[5,6,7,8,9,10,11,12,13,14,15],zmm0[0,1,2,3,4]
5154 ; X86-NEXT: retl ## encoding: [0xc3]
5156 ; X64-LABEL: test_maskz_valign_d:
5158 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5159 ; X64-NEXT: valignd $5, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xc9,0x03,0xc1,0x05]
5160 ; X64-NEXT: ## zmm0 {%k1} {z} = zmm1[5,6,7,8,9,10,11,12,13,14,15],zmm0[0,1,2,3,4]
5161 ; X64-NEXT: retq ## encoding: [0xc3]
5162 %res = call <16 x i32> @llvm.x86.avx512.mask.valign.d.512(<16 x i32> %a, <16 x i32> %b, i32 5, <16 x i32> zeroinitializer, i16 %mask)
5166 declare <16 x i32> @llvm.x86.avx512.mask.valign.d.512(<16 x i32>, <16 x i32>, i32, <16 x i32>, i16)
5168 declare <8 x double> @llvm.x86.avx512.mask.vpermilvar.pd.512(<8 x double>, <8 x i64>, <8 x double>, i8)
5170 define <8 x double>@test_int_x86_avx512_vpermilvar_pd_512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2) {
5171 ; CHECK-LABEL: test_int_x86_avx512_vpermilvar_pd_512:
5173 ; CHECK-NEXT: vpermilpd %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x0d,0xc1]
5174 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
5175 %res = call <8 x double> @llvm.x86.avx512.mask.vpermilvar.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 -1)
5176 ret <8 x double> %res
5179 define <8 x double>@test_int_x86_avx512_mask_vpermilvar_pd_512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3) {
5180 ; X86-LABEL: test_int_x86_avx512_mask_vpermilvar_pd_512:
5182 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
5183 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
5184 ; X86-NEXT: vpermilpd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x0d,0xd1]
5185 ; X86-NEXT: vmovapd %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2]
5186 ; X86-NEXT: retl ## encoding: [0xc3]
5188 ; X64-LABEL: test_int_x86_avx512_mask_vpermilvar_pd_512:
5190 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5191 ; X64-NEXT: vpermilpd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x0d,0xd1]
5192 ; X64-NEXT: vmovapd %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2]
5193 ; X64-NEXT: retq ## encoding: [0xc3]
5194 %res = call <8 x double> @llvm.x86.avx512.mask.vpermilvar.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3)
5195 ret <8 x double> %res
5198 define <8 x double>@test_int_x86_avx512_maskz_vpermilvar_pd_512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3) {
5199 ; X86-LABEL: test_int_x86_avx512_maskz_vpermilvar_pd_512:
5201 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
5202 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
5203 ; X86-NEXT: vpermilpd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x0d,0xc1]
5204 ; X86-NEXT: retl ## encoding: [0xc3]
5206 ; X64-LABEL: test_int_x86_avx512_maskz_vpermilvar_pd_512:
5208 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5209 ; X64-NEXT: vpermilpd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x0d,0xc1]
5210 ; X64-NEXT: retq ## encoding: [0xc3]
5211 %res = call <8 x double> @llvm.x86.avx512.mask.vpermilvar.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> zeroinitializer, i8 %x3)
5212 ret <8 x double> %res
5215 declare <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float>, <16 x i32>, <16 x float>, i16)
5217 define <16 x float>@test_int_x86_avx512_vpermilvar_ps_512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2) {
5218 ; CHECK-LABEL: test_int_x86_avx512_vpermilvar_ps_512:
5220 ; CHECK-NEXT: vpermilps %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x0c,0xc1]
5221 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
5222 %res = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 -1)
5223 ret <16 x float> %res
5226 define <16 x float>@test_int_x86_avx512_mask_vpermilvar_ps_512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3) {
5227 ; X86-LABEL: test_int_x86_avx512_mask_vpermilvar_ps_512:
5229 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
5230 ; X86-NEXT: vpermilps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x0c,0xd1]
5231 ; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
5232 ; X86-NEXT: retl ## encoding: [0xc3]
5234 ; X64-LABEL: test_int_x86_avx512_mask_vpermilvar_ps_512:
5236 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5237 ; X64-NEXT: vpermilps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x0c,0xd1]
5238 ; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
5239 ; X64-NEXT: retq ## encoding: [0xc3]
5240 %res = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3)
5241 ret <16 x float> %res
5245 define <16 x float>@test_int_x86_avx512_maskz_vpermilvar_ps_512(<16 x float> %x0, <16 x i32> %x1, i16 %x3) {
5246 ; X86-LABEL: test_int_x86_avx512_maskz_vpermilvar_ps_512:
5248 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
5249 ; X86-NEXT: vpermilps %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x0c,0xc1]
5250 ; X86-NEXT: retl ## encoding: [0xc3]
5252 ; X64-LABEL: test_int_x86_avx512_maskz_vpermilvar_ps_512:
5254 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5255 ; X64-NEXT: vpermilps %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x0c,0xc1]
5256 ; X64-NEXT: retq ## encoding: [0xc3]
5257 %res = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> zeroinitializer, i16 %x3)
5258 ret <16 x float> %res
5261 ; Test case to make sure we can print shuffle decode comments for constant pool loads.
5262 define <16 x float>@test_int_x86_avx512_mask_vpermilvar_ps_512_constant_pool(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3) {
5263 ; X86-LABEL: test_int_x86_avx512_mask_vpermilvar_ps_512_constant_pool:
5265 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
5266 ; X86-NEXT: vpermilps {{.*#+}} zmm2 {%k1} = zmm0[2,3,0,1,7,6,5,4,9,8,11,10,12,13,14,15]
5267 ; X86-NEXT: ## encoding: [0x62,0xf2,0x7d,0x49,0x0c,0x15,A,A,A,A]
5268 ; X86-NEXT: ## fixup A - offset: 6, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
5269 ; X86-NEXT: vpermilps {{.*#+}} zmm1 {%k1} {z} = zmm0[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15]
5270 ; X86-NEXT: ## encoding: [0x62,0xf2,0x7d,0xc9,0x0c,0x0d,A,A,A,A]
5271 ; X86-NEXT: ## fixup A - offset: 6, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
5272 ; X86-NEXT: vaddps %zmm1, %zmm2, %zmm1 ## encoding: [0x62,0xf1,0x6c,0x48,0x58,0xc9]
5273 ; X86-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[1,0,3,2,4,5,6,7,10,11,8,9,14,15,13,12]
5274 ; X86-NEXT: ## encoding: [0x62,0xf2,0x7d,0x48,0x0c,0x05,A,A,A,A]
5275 ; X86-NEXT: ## fixup A - offset: 6, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
5276 ; X86-NEXT: vaddps %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x58,0xc1]
5277 ; X86-NEXT: retl ## encoding: [0xc3]
5279 ; X64-LABEL: test_int_x86_avx512_mask_vpermilvar_ps_512_constant_pool:
5281 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5282 ; X64-NEXT: vpermilps {{.*#+}} zmm2 {%k1} = zmm0[2,3,0,1,7,6,5,4,9,8,11,10,12,13,14,15]
5283 ; X64-NEXT: ## encoding: [0x62,0xf2,0x7d,0x49,0x0c,0x15,A,A,A,A]
5284 ; X64-NEXT: ## fixup A - offset: 6, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
5285 ; X64-NEXT: vpermilps {{.*#+}} zmm1 {%k1} {z} = zmm0[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15]
5286 ; X64-NEXT: ## encoding: [0x62,0xf2,0x7d,0xc9,0x0c,0x0d,A,A,A,A]
5287 ; X64-NEXT: ## fixup A - offset: 6, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
5288 ; X64-NEXT: vaddps %zmm1, %zmm2, %zmm1 ## encoding: [0x62,0xf1,0x6c,0x48,0x58,0xc9]
5289 ; X64-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[1,0,3,2,4,5,6,7,10,11,8,9,14,15,13,12]
5290 ; X64-NEXT: ## encoding: [0x62,0xf2,0x7d,0x48,0x0c,0x05,A,A,A,A]
5291 ; X64-NEXT: ## fixup A - offset: 6, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
5292 ; X64-NEXT: vaddps %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x58,0xc1]
5293 ; X64-NEXT: retq ## encoding: [0xc3]
5294 %res = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 3, i32 2, i32 1, i32 0, i32 1, i32 0, i32 3, i32 2, i32 0, i32 1, i32 2, i32 3>, <16 x float> %x2, i16 %x3)
5295 %res1 = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 0, i32 1, i32 2, i32 3, i32 1, i32 0, i32 3, i32 2, i32 0, i32 1, i32 2, i32 3>, <16 x float> zeroinitializer, i16 %x3)
5296 %res2 = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 1, i32 0>, <16 x float> %x2, i16 -1)
5297 %res3 = fadd <16 x float> %res, %res1
5298 %res4 = fadd <16 x float> %res2, %res3
5299 ret <16 x float> %res4
5302 define <8 x i64> @test_mask_mul_epi32_rr(<16 x i32> %a, <16 x i32> %b) {
5303 ; CHECK-LABEL: test_mask_mul_epi32_rr:
5305 ; CHECK-NEXT: vpmuldq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x28,0xc1]
5306 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
5307 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
5311 define <8 x i64> @test_mask_mul_epi32_rrk(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask) {
5312 ; X86-LABEL: test_mask_mul_epi32_rrk:
5314 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
5315 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
5316 ; X86-NEXT: vpmuldq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x28,0xd1]
5317 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
5318 ; X86-NEXT: retl ## encoding: [0xc3]
5320 ; X64-LABEL: test_mask_mul_epi32_rrk:
5322 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5323 ; X64-NEXT: vpmuldq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x28,0xd1]
5324 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
5325 ; X64-NEXT: retq ## encoding: [0xc3]
5326 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
5330 define <8 x i64> @test_mask_mul_epi32_rrkz(<16 x i32> %a, <16 x i32> %b, i8 %mask) {
5331 ; X86-LABEL: test_mask_mul_epi32_rrkz:
5333 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
5334 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
5335 ; X86-NEXT: vpmuldq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x28,0xc1]
5336 ; X86-NEXT: retl ## encoding: [0xc3]
5338 ; X64-LABEL: test_mask_mul_epi32_rrkz:
5340 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5341 ; X64-NEXT: vpmuldq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x28,0xc1]
5342 ; X64-NEXT: retq ## encoding: [0xc3]
5343 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
5347 define <8 x i64> @test_mask_mul_epi32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) {
5348 ; X86-LABEL: test_mask_mul_epi32_rm:
5350 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
5351 ; X86-NEXT: vpmuldq (%eax), %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x28,0x00]
5352 ; X86-NEXT: retl ## encoding: [0xc3]
5354 ; X64-LABEL: test_mask_mul_epi32_rm:
5356 ; X64-NEXT: vpmuldq (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x28,0x07]
5357 ; X64-NEXT: retq ## encoding: [0xc3]
5358 %b = load <16 x i32>, <16 x i32>* %ptr_b
5359 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
5363 define <8 x i64> @test_mask_mul_epi32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <8 x i64> %passThru, i8 %mask) {
5364 ; X86-LABEL: test_mask_mul_epi32_rmk:
5366 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
5367 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
5368 ; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
5369 ; X86-NEXT: vpmuldq (%eax), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x28,0x08]
5370 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
5371 ; X86-NEXT: retl ## encoding: [0xc3]
5373 ; X64-LABEL: test_mask_mul_epi32_rmk:
5375 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
5376 ; X64-NEXT: vpmuldq (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x28,0x0f]
5377 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
5378 ; X64-NEXT: retq ## encoding: [0xc3]
5379 %b = load <16 x i32>, <16 x i32>* %ptr_b
5380 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
5384 define <8 x i64> @test_mask_mul_epi32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i8 %mask) {
5385 ; X86-LABEL: test_mask_mul_epi32_rmkz:
5387 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
5388 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
5389 ; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
5390 ; X86-NEXT: vpmuldq (%eax), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x28,0x00]
5391 ; X86-NEXT: retl ## encoding: [0xc3]
5393 ; X64-LABEL: test_mask_mul_epi32_rmkz:
5395 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
5396 ; X64-NEXT: vpmuldq (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x28,0x07]
5397 ; X64-NEXT: retq ## encoding: [0xc3]
5398 %b = load <16 x i32>, <16 x i32>* %ptr_b
5399 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
5403 define <8 x i64> @test_mask_mul_epi32_rmb(<16 x i32> %a, i64* %ptr_b) {
5404 ; X86-LABEL: test_mask_mul_epi32_rmb:
5406 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
5407 ; X86-NEXT: vpmuldq (%eax){1to8}, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x58,0x28,0x00]
5408 ; X86-NEXT: retl ## encoding: [0xc3]
5410 ; X64-LABEL: test_mask_mul_epi32_rmb:
5412 ; X64-NEXT: vpmuldq (%rdi){1to8}, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x58,0x28,0x07]
5413 ; X64-NEXT: retq ## encoding: [0xc3]
5414 %q = load i64, i64* %ptr_b
5415 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
5416 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
5417 %b = bitcast <8 x i64> %b64 to <16 x i32>
5418 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
5422 define <8 x i64> @test_mask_mul_epi32_rmbk(<16 x i32> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) {
5423 ; X86-LABEL: test_mask_mul_epi32_rmbk:
5425 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
5426 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
5427 ; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
5428 ; X86-NEXT: vpmuldq (%eax){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x59,0x28,0x08]
5429 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
5430 ; X86-NEXT: retl ## encoding: [0xc3]
5432 ; X64-LABEL: test_mask_mul_epi32_rmbk:
5434 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
5435 ; X64-NEXT: vpmuldq (%rdi){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x59,0x28,0x0f]
5436 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
5437 ; X64-NEXT: retq ## encoding: [0xc3]
5438 %q = load i64, i64* %ptr_b
5439 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
5440 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
5441 %b = bitcast <8 x i64> %b64 to <16 x i32>
5442 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
5446 define <8 x i64> @test_mask_mul_epi32_rmbkz(<16 x i32> %a, i64* %ptr_b, i8 %mask) {
5447 ; X86-LABEL: test_mask_mul_epi32_rmbkz:
5449 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
5450 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
5451 ; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
5452 ; X86-NEXT: vpmuldq (%eax){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xd9,0x28,0x00]
5453 ; X86-NEXT: retl ## encoding: [0xc3]
5455 ; X64-LABEL: test_mask_mul_epi32_rmbkz:
5457 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
5458 ; X64-NEXT: vpmuldq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xd9,0x28,0x07]
5459 ; X64-NEXT: retq ## encoding: [0xc3]
5460 %q = load i64, i64* %ptr_b
5461 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
5462 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
5463 %b = bitcast <8 x i64> %b64 to <16 x i32>
5464 %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
5468 declare <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32>, <16 x i32>, <8 x i64>, i8)
5470 define <8 x i64> @test_mask_mul_epu32_rr(<16 x i32> %a, <16 x i32> %b) {
5471 ; CHECK-LABEL: test_mask_mul_epu32_rr:
5473 ; CHECK-NEXT: vpmuludq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xf4,0xc1]
5474 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
5475 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
5479 define <8 x i64> @test_mask_mul_epu32_rrk(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask) {
5480 ; X86-LABEL: test_mask_mul_epu32_rrk:
5482 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
5483 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
5484 ; X86-NEXT: vpmuludq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xf4,0xd1]
5485 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
5486 ; X86-NEXT: retl ## encoding: [0xc3]
5488 ; X64-LABEL: test_mask_mul_epu32_rrk:
5490 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5491 ; X64-NEXT: vpmuludq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xf4,0xd1]
5492 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
5493 ; X64-NEXT: retq ## encoding: [0xc3]
5494 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
5498 define <8 x i64> @test_mask_mul_epu32_rrkz(<16 x i32> %a, <16 x i32> %b, i8 %mask) {
5499 ; X86-LABEL: test_mask_mul_epu32_rrkz:
5501 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
5502 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
5503 ; X86-NEXT: vpmuludq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xf4,0xc1]
5504 ; X86-NEXT: retl ## encoding: [0xc3]
5506 ; X64-LABEL: test_mask_mul_epu32_rrkz:
5508 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5509 ; X64-NEXT: vpmuludq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xf4,0xc1]
5510 ; X64-NEXT: retq ## encoding: [0xc3]
5511 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
5515 define <8 x i64> @test_mask_mul_epu32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) {
5516 ; X86-LABEL: test_mask_mul_epu32_rm:
5518 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
5519 ; X86-NEXT: vpmuludq (%eax), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xf4,0x00]
5520 ; X86-NEXT: retl ## encoding: [0xc3]
5522 ; X64-LABEL: test_mask_mul_epu32_rm:
5524 ; X64-NEXT: vpmuludq (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xf4,0x07]
5525 ; X64-NEXT: retq ## encoding: [0xc3]
5526 %b = load <16 x i32>, <16 x i32>* %ptr_b
5527 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
5531 define <8 x i64> @test_mask_mul_epu32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <8 x i64> %passThru, i8 %mask) {
5532 ; X86-LABEL: test_mask_mul_epu32_rmk:
5534 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
5535 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
5536 ; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
5537 ; X86-NEXT: vpmuludq (%eax), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xf4,0x08]
5538 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
5539 ; X86-NEXT: retl ## encoding: [0xc3]
5541 ; X64-LABEL: test_mask_mul_epu32_rmk:
5543 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
5544 ; X64-NEXT: vpmuludq (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xf4,0x0f]
5545 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
5546 ; X64-NEXT: retq ## encoding: [0xc3]
5547 %b = load <16 x i32>, <16 x i32>* %ptr_b
5548 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
5552 define <8 x i64> @test_mask_mul_epu32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i8 %mask) {
5553 ; X86-LABEL: test_mask_mul_epu32_rmkz:
5555 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
5556 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
5557 ; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
5558 ; X86-NEXT: vpmuludq (%eax), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xf4,0x00]
5559 ; X86-NEXT: retl ## encoding: [0xc3]
5561 ; X64-LABEL: test_mask_mul_epu32_rmkz:
5563 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
5564 ; X64-NEXT: vpmuludq (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xf4,0x07]
5565 ; X64-NEXT: retq ## encoding: [0xc3]
5566 %b = load <16 x i32>, <16 x i32>* %ptr_b
5567 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
5571 define <8 x i64> @test_mask_mul_epu32_rmb(<16 x i32> %a, i64* %ptr_b) {
5572 ; X86-LABEL: test_mask_mul_epu32_rmb:
5574 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
5575 ; X86-NEXT: vpmuludq (%eax){1to8}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x58,0xf4,0x00]
5576 ; X86-NEXT: retl ## encoding: [0xc3]
5578 ; X64-LABEL: test_mask_mul_epu32_rmb:
5580 ; X64-NEXT: vpmuludq (%rdi){1to8}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x58,0xf4,0x07]
5581 ; X64-NEXT: retq ## encoding: [0xc3]
5582 %q = load i64, i64* %ptr_b
5583 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
5584 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
5585 %b = bitcast <8 x i64> %b64 to <16 x i32>
5586 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
5590 define <8 x i64> @test_mask_mul_epu32_rmbk(<16 x i32> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) {
5591 ; X86-LABEL: test_mask_mul_epu32_rmbk:
5593 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
5594 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
5595 ; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
5596 ; X86-NEXT: vpmuludq (%eax){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x59,0xf4,0x08]
5597 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
5598 ; X86-NEXT: retl ## encoding: [0xc3]
5600 ; X64-LABEL: test_mask_mul_epu32_rmbk:
5602 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
5603 ; X64-NEXT: vpmuludq (%rdi){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x59,0xf4,0x0f]
5604 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
5605 ; X64-NEXT: retq ## encoding: [0xc3]
5606 %q = load i64, i64* %ptr_b
5607 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
5608 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
5609 %b = bitcast <8 x i64> %b64 to <16 x i32>
5610 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
5614 define <8 x i64> @test_mask_mul_epu32_rmbkz(<16 x i32> %a, i64* %ptr_b, i8 %mask) {
5615 ; X86-LABEL: test_mask_mul_epu32_rmbkz:
5617 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
5618 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
5619 ; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
5620 ; X86-NEXT: vpmuludq (%eax){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xd9,0xf4,0x00]
5621 ; X86-NEXT: retl ## encoding: [0xc3]
5623 ; X64-LABEL: test_mask_mul_epu32_rmbkz:
5625 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
5626 ; X64-NEXT: vpmuludq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xd9,0xf4,0x07]
5627 ; X64-NEXT: retq ## encoding: [0xc3]
5628 %q = load i64, i64* %ptr_b
5629 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
5630 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
5631 %b = bitcast <8 x i64> %b64 to <16 x i32>
5632 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
5636 declare <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32>, <16 x i32>, <8 x i64>, i8)
5638 define <4 x float> @test_mask_vextractf32x4(<4 x float> %b, <16 x float> %a, i8 %mask) {
5639 ; X86-LABEL: test_mask_vextractf32x4:
5641 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
5642 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
5643 ; X86-NEXT: vextractf32x4 $2, %zmm1, %xmm0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x19,0xc8,0x02]
5644 ; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
5645 ; X86-NEXT: retl ## encoding: [0xc3]
5647 ; X64-LABEL: test_mask_vextractf32x4:
5649 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5650 ; X64-NEXT: vextractf32x4 $2, %zmm1, %xmm0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x19,0xc8,0x02]
5651 ; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
5652 ; X64-NEXT: retq ## encoding: [0xc3]
5653 %res = call <4 x float> @llvm.x86.avx512.mask.vextractf32x4.512(<16 x float> %a, i32 2, <4 x float> %b, i8 %mask)
5654 ret <4 x float> %res
5657 declare <4 x float> @llvm.x86.avx512.mask.vextractf32x4.512(<16 x float>, i32, <4 x float>, i8)
5659 define <4 x i64> @test_mask_vextracti64x4(<4 x i64> %b, <8 x i64> %a, i8 %mask) {
5660 ; X86-LABEL: test_mask_vextracti64x4:
5662 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
5663 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
5664 ; X86-NEXT: vextracti64x4 $1, %zmm1, %ymm0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x3b,0xc8,0x01]
5665 ; X86-NEXT: retl ## encoding: [0xc3]
5667 ; X64-LABEL: test_mask_vextracti64x4:
5669 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5670 ; X64-NEXT: vextracti64x4 $1, %zmm1, %ymm0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x3b,0xc8,0x01]
5671 ; X64-NEXT: retq ## encoding: [0xc3]
5672 %res = call <4 x i64> @llvm.x86.avx512.mask.vextracti64x4.512(<8 x i64> %a, i32 1, <4 x i64> %b, i8 %mask)
5676 declare <4 x i64> @llvm.x86.avx512.mask.vextracti64x4.512(<8 x i64>, i32, <4 x i64>, i8)
5678 define <4 x i32> @test_maskz_vextracti32x4(<16 x i32> %a, i8 %mask) {
5679 ; X86-LABEL: test_maskz_vextracti32x4:
5681 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
5682 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
5683 ; X86-NEXT: vextracti32x4 $2, %zmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xc9,0x39,0xc0,0x02]
5684 ; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
5685 ; X86-NEXT: retl ## encoding: [0xc3]
5687 ; X64-LABEL: test_maskz_vextracti32x4:
5689 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5690 ; X64-NEXT: vextracti32x4 $2, %zmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xc9,0x39,0xc0,0x02]
5691 ; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
5692 ; X64-NEXT: retq ## encoding: [0xc3]
5693 %res = call <4 x i32> @llvm.x86.avx512.mask.vextracti32x4.512(<16 x i32> %a, i32 2, <4 x i32> zeroinitializer, i8 %mask)
5697 declare <4 x i32> @llvm.x86.avx512.mask.vextracti32x4.512(<16 x i32>, i32, <4 x i32>, i8)
5699 define <4 x double> @test_vextractf64x4(<8 x double> %a) {
5700 ; CHECK-LABEL: test_vextractf64x4:
5702 ; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm0 ## encoding: [0x62,0xf3,0xfd,0x48,0x1b,0xc0,0x01]
5703 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
5704 %res = call <4 x double> @llvm.x86.avx512.mask.vextractf64x4.512(<8 x double> %a, i32 1, <4 x double> zeroinitializer, i8 -1)
5705 ret <4 x double> %res
5708 declare <4 x double> @llvm.x86.avx512.mask.vextractf64x4.512(<8 x double>, i32, <4 x double>, i8)
5710 declare <16 x float> @llvm.x86.avx512.mask.insertf32x4.512(<16 x float>, <4 x float>, i32, <16 x float>, i16)
5712 define <16 x float>@test_int_x86_avx512_insertf32x4_512(<16 x float> %x0, <4 x float> %x1, <16 x float> %x3) {
5713 ; CHECK-LABEL: test_int_x86_avx512_insertf32x4_512:
5715 ; CHECK-NEXT: vinsertf32x4 $1, %xmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf3,0x7d,0x48,0x18,0xc1,0x01]
5716 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
5717 %res = call <16 x float> @llvm.x86.avx512.mask.insertf32x4.512(<16 x float> %x0, <4 x float> %x1, i32 1, <16 x float> %x3, i16 -1)
5718 ret <16 x float> %res
5721 define <16 x float>@test_int_x86_avx512_mask_insertf32x4_512(<16 x float> %x0, <4 x float> %x1, <16 x float> %x3, i16 %x4) {
5722 ; X86-LABEL: test_int_x86_avx512_mask_insertf32x4_512:
5724 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
5725 ; X86-NEXT: vinsertf32x4 $1, %xmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x18,0xd1,0x01]
5726 ; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
5727 ; X86-NEXT: retl ## encoding: [0xc3]
5729 ; X64-LABEL: test_int_x86_avx512_mask_insertf32x4_512:
5731 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5732 ; X64-NEXT: vinsertf32x4 $1, %xmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x18,0xd1,0x01]
5733 ; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
5734 ; X64-NEXT: retq ## encoding: [0xc3]
5735 %res = call <16 x float> @llvm.x86.avx512.mask.insertf32x4.512(<16 x float> %x0, <4 x float> %x1, i32 1, <16 x float> %x3, i16 %x4)
5736 ret <16 x float> %res
5739 define <16 x float>@test_int_x86_avx512_maskz_insertf32x4_512(<16 x float> %x0, <4 x float> %x1, i16 %x4) {
5740 ; X86-LABEL: test_int_x86_avx512_maskz_insertf32x4_512:
5742 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
5743 ; X86-NEXT: vinsertf32x4 $1, %xmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xc9,0x18,0xc1,0x01]
5744 ; X86-NEXT: retl ## encoding: [0xc3]
5746 ; X64-LABEL: test_int_x86_avx512_maskz_insertf32x4_512:
5748 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5749 ; X64-NEXT: vinsertf32x4 $1, %xmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xc9,0x18,0xc1,0x01]
5750 ; X64-NEXT: retq ## encoding: [0xc3]
5751 %res = call <16 x float> @llvm.x86.avx512.mask.insertf32x4.512(<16 x float> %x0, <4 x float> %x1, i32 1, <16 x float> zeroinitializer, i16 %x4)
5752 ret <16 x float> %res
5755 declare <16 x i32> @llvm.x86.avx512.mask.inserti32x4.512(<16 x i32>, <4 x i32>, i32, <16 x i32>, i16)
5757 define <16 x i32>@test_int_x86_avx512_inserti32x4_512(<16 x i32> %x0, <4 x i32> %x1, <16 x i32> %x3, i16 %x4) {
5758 ; CHECK-LABEL: test_int_x86_avx512_inserti32x4_512:
5760 ; CHECK-NEXT: vinsertf32x4 $1, %xmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf3,0x7d,0x48,0x18,0xc1,0x01]
5761 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
5762 %res = call <16 x i32> @llvm.x86.avx512.mask.inserti32x4.512(<16 x i32> %x0, <4 x i32> %x1, i32 1, <16 x i32> %x3, i16 -1)
5766 define <16 x i32>@test_int_x86_avx512_mask_inserti32x4_512(<16 x i32> %x0, <4 x i32> %x1, <16 x i32> %x3, i16 %x4) {
5767 ; X86-LABEL: test_int_x86_avx512_mask_inserti32x4_512:
5769 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
5770 ; X86-NEXT: vinserti32x4 $1, %xmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x38,0xd1,0x01]
5771 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
5772 ; X86-NEXT: retl ## encoding: [0xc3]
5774 ; X64-LABEL: test_int_x86_avx512_mask_inserti32x4_512:
5776 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5777 ; X64-NEXT: vinserti32x4 $1, %xmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x38,0xd1,0x01]
5778 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
5779 ; X64-NEXT: retq ## encoding: [0xc3]
5780 %res = call <16 x i32> @llvm.x86.avx512.mask.inserti32x4.512(<16 x i32> %x0, <4 x i32> %x1, i32 1, <16 x i32> %x3, i16 %x4)
5784 define <16 x i32>@test_int_x86_avx512_maskz_inserti32x4_512(<16 x i32> %x0, <4 x i32> %x1, i16 %x4) {
5785 ; X86-LABEL: test_int_x86_avx512_maskz_inserti32x4_512:
5787 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
5788 ; X86-NEXT: vinserti32x4 $1, %xmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xc9,0x38,0xc1,0x01]
5789 ; X86-NEXT: retl ## encoding: [0xc3]
5791 ; X64-LABEL: test_int_x86_avx512_maskz_inserti32x4_512:
5793 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5794 ; X64-NEXT: vinserti32x4 $1, %xmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xc9,0x38,0xc1,0x01]
5795 ; X64-NEXT: retq ## encoding: [0xc3]
5796 %res = call <16 x i32> @llvm.x86.avx512.mask.inserti32x4.512(<16 x i32> %x0, <4 x i32> %x1, i32 1, <16 x i32> zeroinitializer, i16 %x4)
5800 declare <8 x double> @llvm.x86.avx512.mask.insertf64x4.512(<8 x double>, <4 x double>, i32, <8 x double>, i8)
5802 define <8 x double>@test_int_x86_avx512_insertf64x4_512(<8 x double> %x0, <4 x double> %x1, <8 x double> %x3) {
5803 ; CHECK-LABEL: test_int_x86_avx512_insertf64x4_512:
5805 ; CHECK-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 ## encoding: [0x62,0xf3,0xfd,0x48,0x1a,0xc1,0x01]
5806 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
5807 %res = call <8 x double> @llvm.x86.avx512.mask.insertf64x4.512(<8 x double> %x0, <4 x double> %x1, i32 1, <8 x double> %x3, i8 -1)
5808 ret <8 x double> %res
5811 define <8 x double>@test_int_x86_avx512_mask_insertf64x4_512(<8 x double> %x0, <4 x double> %x1, <8 x double> %x3, i8 %x4) {
5812 ; X86-LABEL: test_int_x86_avx512_mask_insertf64x4_512:
5814 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
5815 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
5816 ; X86-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1a,0xd1,0x01]
5817 ; X86-NEXT: vmovapd %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2]
5818 ; X86-NEXT: retl ## encoding: [0xc3]
5820 ; X64-LABEL: test_int_x86_avx512_mask_insertf64x4_512:
5822 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5823 ; X64-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1a,0xd1,0x01]
5824 ; X64-NEXT: vmovapd %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2]
5825 ; X64-NEXT: retq ## encoding: [0xc3]
5826 %res = call <8 x double> @llvm.x86.avx512.mask.insertf64x4.512(<8 x double> %x0, <4 x double> %x1, i32 1, <8 x double> %x3, i8 %x4)
5827 ret <8 x double> %res
5830 define <8 x double>@test_int_x86_avx512_maskz_insertf64x4_512(<8 x double> %x0, <4 x double> %x1, i8 %x4) {
5831 ; X86-LABEL: test_int_x86_avx512_maskz_insertf64x4_512:
5833 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
5834 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
5835 ; X86-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xc9,0x1a,0xc1,0x01]
5836 ; X86-NEXT: retl ## encoding: [0xc3]
5838 ; X64-LABEL: test_int_x86_avx512_maskz_insertf64x4_512:
5840 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5841 ; X64-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xc9,0x1a,0xc1,0x01]
5842 ; X64-NEXT: retq ## encoding: [0xc3]
5843 %res = call <8 x double> @llvm.x86.avx512.mask.insertf64x4.512(<8 x double> %x0, <4 x double> %x1, i32 1, <8 x double> zeroinitializer, i8 %x4)
5844 ret <8 x double> %res
5847 declare <8 x i64> @llvm.x86.avx512.mask.inserti64x4.512(<8 x i64>, <4 x i64>, i32, <8 x i64>, i8)
5849 define <8 x i64>@test_int_x86_avx512_inserti64x4_512(<8 x i64> %x0, <4 x i64> %x1, <8 x i64> %x3) {
5850 ; CHECK-LABEL: test_int_x86_avx512_inserti64x4_512:
5852 ; CHECK-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 ## encoding: [0x62,0xf3,0xfd,0x48,0x1a,0xc1,0x01]
5853 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
5854 %res = call <8 x i64> @llvm.x86.avx512.mask.inserti64x4.512(<8 x i64> %x0, <4 x i64> %x1, i32 1, <8 x i64> %x3, i8 -1)
5858 define <8 x i64>@test_int_x86_avx512_mask_inserti64x4_512(<8 x i64> %x0, <4 x i64> %x1, <8 x i64> %x3, i8 %x4) {
5859 ; X86-LABEL: test_int_x86_avx512_mask_inserti64x4_512:
5861 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
5862 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
5863 ; X86-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x3a,0xd1,0x01]
5864 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
5865 ; X86-NEXT: retl ## encoding: [0xc3]
5867 ; X64-LABEL: test_int_x86_avx512_mask_inserti64x4_512:
5869 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5870 ; X64-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x3a,0xd1,0x01]
5871 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
5872 ; X64-NEXT: retq ## encoding: [0xc3]
5873 %res = call <8 x i64> @llvm.x86.avx512.mask.inserti64x4.512(<8 x i64> %x0, <4 x i64> %x1, i32 1, <8 x i64> %x3, i8 %x4)
5877 define <8 x i64>@test_int_x86_avx512_maskz_inserti64x4_512(<8 x i64> %x0, <4 x i64> %x1, i8 %x4) {
5878 ; X86-LABEL: test_int_x86_avx512_maskz_inserti64x4_512:
5880 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
5881 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
5882 ; X86-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xc9,0x3a,0xc1,0x01]
5883 ; X86-NEXT: retl ## encoding: [0xc3]
5885 ; X64-LABEL: test_int_x86_avx512_maskz_inserti64x4_512:
5887 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5888 ; X64-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xc9,0x3a,0xc1,0x01]
5889 ; X64-NEXT: retq ## encoding: [0xc3]
5890 %res = call <8 x i64> @llvm.x86.avx512.mask.inserti64x4.512(<8 x i64> %x0, <4 x i64> %x1, i32 1, <8 x i64> zeroinitializer, i8 %x4)
5894 define <8 x i64> @test_x86_avx512_movntdqa(i8* %a0) {
5895 ; X86-LABEL: test_x86_avx512_movntdqa:
5897 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
5898 ; X86-NEXT: vmovntdqa (%eax), %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x2a,0x00]
5899 ; X86-NEXT: retl ## encoding: [0xc3]
5901 ; X64-LABEL: test_x86_avx512_movntdqa:
5903 ; X64-NEXT: vmovntdqa (%rdi), %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x2a,0x07]
5904 ; X64-NEXT: retq ## encoding: [0xc3]
5905 %res = call <8 x i64> @llvm.x86.avx512.movntdqa(i8* %a0)
5909 declare <8 x i64> @llvm.x86.avx512.movntdqa(i8*) nounwind readonly
5911 define <8 x i16> @test_cmp_d_512(<16 x i32> %a0, <16 x i32> %a1) {
5912 ; CHECK-LABEL: test_cmp_d_512:
5914 ; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x48,0x76,0xc1]
5915 ; CHECK-NEXT: vpcmpgtd %zmm0, %zmm1, %k1 ## encoding: [0x62,0xf1,0x75,0x48,0x66,0xc8]
5916 ; CHECK-NEXT: vpcmpled %zmm1, %zmm0, %k2 ## encoding: [0x62,0xf3,0x7d,0x48,0x1f,0xd1,0x02]
5917 ; CHECK-NEXT: vpcmpneqd %zmm1, %zmm0, %k3 ## encoding: [0x62,0xf3,0x7d,0x48,0x1f,0xd9,0x04]
5918 ; CHECK-NEXT: vpcmpnltd %zmm1, %zmm0, %k4 ## encoding: [0x62,0xf3,0x7d,0x48,0x1f,0xe1,0x05]
5919 ; CHECK-NEXT: vpcmpgtd %zmm1, %zmm0, %k5 ## encoding: [0x62,0xf1,0x7d,0x48,0x66,0xe9]
5920 ; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
5921 ; CHECK-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
5922 ; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
5923 ; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
5924 ; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
5925 ; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
5926 ; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
5927 ; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
5928 ; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
5929 ; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
5930 ; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
5931 ; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
5932 ; CHECK-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0x76,0xc9]
5933 ; CHECK-NEXT: vpblendw $128, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0e,0xc1,0x80]
5934 ; CHECK-NEXT: ## xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
5935 ; CHECK-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
5936 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
5937 %res0 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 -1)
5938 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
5939 %res1 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 -1)
5940 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
5941 %res2 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 -1)
5942 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
5943 %res3 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 -1)
5944 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
5945 %res4 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 -1)
5946 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
5947 %res5 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 -1)
5948 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
5949 %res6 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 -1)
5950 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
5951 %res7 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 -1)
5952 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
5956 define <8 x i16> @test_mask_cmp_d_512(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
5957 ; X86-LABEL: test_mask_cmp_d_512:
5959 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
5960 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
5961 ; X86-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x76,0xc1]
5962 ; X86-NEXT: vpcmpgtd %zmm0, %zmm1, %k2 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x66,0xd0]
5963 ; X86-NEXT: vpcmpled %zmm1, %zmm0, %k3 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x1f,0xd9,0x02]
5964 ; X86-NEXT: vpcmpneqd %zmm1, %zmm0, %k4 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x1f,0xe1,0x04]
5965 ; X86-NEXT: vpcmpnltd %zmm1, %zmm0, %k5 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x1f,0xe9,0x05]
5966 ; X86-NEXT: vpcmpgtd %zmm1, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x66,0xc9]
5967 ; X86-NEXT: kmovw %k2, %ecx ## encoding: [0xc5,0xf8,0x93,0xca]
5968 ; X86-NEXT: kmovw %k0, %edx ## encoding: [0xc5,0xf8,0x93,0xd0]
5969 ; X86-NEXT: vmovd %edx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc2]
5970 ; X86-NEXT: vpinsrw $1, %ecx, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc1,0x01]
5971 ; X86-NEXT: kmovw %k3, %ecx ## encoding: [0xc5,0xf8,0x93,0xcb]
5972 ; X86-NEXT: vpinsrw $2, %ecx, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc1,0x02]
5973 ; X86-NEXT: kmovw %k4, %ecx ## encoding: [0xc5,0xf8,0x93,0xcc]
5974 ; X86-NEXT: vpinsrw $4, %ecx, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc1,0x04]
5975 ; X86-NEXT: kmovw %k5, %ecx ## encoding: [0xc5,0xf8,0x93,0xcd]
5976 ; X86-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc1,0x05]
5977 ; X86-NEXT: kmovw %k1, %ecx ## encoding: [0xc5,0xf8,0x93,0xc9]
5978 ; X86-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc1,0x06]
5979 ; X86-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
5980 ; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
5981 ; X86-NEXT: retl ## encoding: [0xc3]
5983 ; X64-LABEL: test_mask_cmp_d_512:
5985 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
5986 ; X64-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x76,0xc1]
5987 ; X64-NEXT: vpcmpgtd %zmm0, %zmm1, %k2 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x66,0xd0]
5988 ; X64-NEXT: vpcmpled %zmm1, %zmm0, %k3 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x1f,0xd9,0x02]
5989 ; X64-NEXT: vpcmpneqd %zmm1, %zmm0, %k4 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x1f,0xe1,0x04]
5990 ; X64-NEXT: vpcmpnltd %zmm1, %zmm0, %k5 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x1f,0xe9,0x05]
5991 ; X64-NEXT: vpcmpgtd %zmm1, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x66,0xc9]
5992 ; X64-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
5993 ; X64-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
5994 ; X64-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
5995 ; X64-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
5996 ; X64-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
5997 ; X64-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
5998 ; X64-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
5999 ; X64-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
6000 ; X64-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
6001 ; X64-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
6002 ; X64-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
6003 ; X64-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
6004 ; X64-NEXT: vpinsrw $7, %edi, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc7,0x07]
6005 ; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
6006 ; X64-NEXT: retq ## encoding: [0xc3]
6007 %res0 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 %mask)
6008 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
6009 %res1 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 %mask)
6010 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
6011 %res2 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 %mask)
6012 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
6013 %res3 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 %mask)
6014 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
6015 %res4 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 %mask)
6016 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
6017 %res5 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 %mask)
6018 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
6019 %res6 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 %mask)
6020 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
6021 %res7 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 %mask)
6022 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
6026 declare i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32>, <16 x i32>, i32, i16) nounwind readnone
6028 define <8 x i16> @test_ucmp_d_512(<16 x i32> %a0, <16 x i32> %a1) {
6029 ; CHECK-LABEL: test_ucmp_d_512:
6031 ; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x48,0x76,0xc1]
6032 ; CHECK-NEXT: vpcmpltud %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x48,0x1e,0xc9,0x01]
6033 ; CHECK-NEXT: vpcmpleud %zmm1, %zmm0, %k2 ## encoding: [0x62,0xf3,0x7d,0x48,0x1e,0xd1,0x02]
6034 ; CHECK-NEXT: vpcmpneqd %zmm1, %zmm0, %k3 ## encoding: [0x62,0xf3,0x7d,0x48,0x1f,0xd9,0x04]
6035 ; CHECK-NEXT: vpcmpnltud %zmm1, %zmm0, %k4 ## encoding: [0x62,0xf3,0x7d,0x48,0x1e,0xe1,0x05]
6036 ; CHECK-NEXT: vpcmpnleud %zmm1, %zmm0, %k5 ## encoding: [0x62,0xf3,0x7d,0x48,0x1e,0xe9,0x06]
6037 ; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
6038 ; CHECK-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
6039 ; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
6040 ; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
6041 ; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
6042 ; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
6043 ; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
6044 ; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
6045 ; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
6046 ; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
6047 ; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
6048 ; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
6049 ; CHECK-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0x76,0xc9]
6050 ; CHECK-NEXT: vpblendw $128, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0e,0xc1,0x80]
6051 ; CHECK-NEXT: ## xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
6052 ; CHECK-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
6053 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
6054 %res0 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 -1)
6055 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
6056 %res1 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 -1)
6057 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
6058 %res2 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 -1)
6059 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
6060 %res3 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 -1)
6061 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
6062 %res4 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 -1)
6063 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
6064 %res5 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 -1)
6065 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
6066 %res6 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 -1)
6067 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
6068 %res7 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 -1)
6069 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
6073 define <8 x i16> @test_mask_ucmp_d_512(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
6074 ; X86-LABEL: test_mask_ucmp_d_512:
6076 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
6077 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
6078 ; X86-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x76,0xc1]
6079 ; X86-NEXT: vpcmpltud %zmm1, %zmm0, %k2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x1e,0xd1,0x01]
6080 ; X86-NEXT: vpcmpleud %zmm1, %zmm0, %k3 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x1e,0xd9,0x02]
6081 ; X86-NEXT: vpcmpneqd %zmm1, %zmm0, %k4 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x1f,0xe1,0x04]
6082 ; X86-NEXT: vpcmpnltud %zmm1, %zmm0, %k5 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x1e,0xe9,0x05]
6083 ; X86-NEXT: vpcmpnleud %zmm1, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x1e,0xc9,0x06]
6084 ; X86-NEXT: kmovw %k2, %ecx ## encoding: [0xc5,0xf8,0x93,0xca]
6085 ; X86-NEXT: kmovw %k0, %edx ## encoding: [0xc5,0xf8,0x93,0xd0]
6086 ; X86-NEXT: vmovd %edx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc2]
6087 ; X86-NEXT: vpinsrw $1, %ecx, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc1,0x01]
6088 ; X86-NEXT: kmovw %k3, %ecx ## encoding: [0xc5,0xf8,0x93,0xcb]
6089 ; X86-NEXT: vpinsrw $2, %ecx, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc1,0x02]
6090 ; X86-NEXT: kmovw %k4, %ecx ## encoding: [0xc5,0xf8,0x93,0xcc]
6091 ; X86-NEXT: vpinsrw $4, %ecx, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc1,0x04]
6092 ; X86-NEXT: kmovw %k5, %ecx ## encoding: [0xc5,0xf8,0x93,0xcd]
6093 ; X86-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc1,0x05]
6094 ; X86-NEXT: kmovw %k1, %ecx ## encoding: [0xc5,0xf8,0x93,0xc9]
6095 ; X86-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc1,0x06]
6096 ; X86-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
6097 ; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
6098 ; X86-NEXT: retl ## encoding: [0xc3]
6100 ; X64-LABEL: test_mask_ucmp_d_512:
6102 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
6103 ; X64-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x76,0xc1]
6104 ; X64-NEXT: vpcmpltud %zmm1, %zmm0, %k2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x1e,0xd1,0x01]
6105 ; X64-NEXT: vpcmpleud %zmm1, %zmm0, %k3 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x1e,0xd9,0x02]
6106 ; X64-NEXT: vpcmpneqd %zmm1, %zmm0, %k4 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x1f,0xe1,0x04]
6107 ; X64-NEXT: vpcmpnltud %zmm1, %zmm0, %k5 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x1e,0xe9,0x05]
6108 ; X64-NEXT: vpcmpnleud %zmm1, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x1e,0xc9,0x06]
6109 ; X64-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
6110 ; X64-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
6111 ; X64-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
6112 ; X64-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
6113 ; X64-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
6114 ; X64-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
6115 ; X64-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
6116 ; X64-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
6117 ; X64-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
6118 ; X64-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
6119 ; X64-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
6120 ; X64-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
6121 ; X64-NEXT: vpinsrw $7, %edi, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc7,0x07]
6122 ; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
6123 ; X64-NEXT: retq ## encoding: [0xc3]
6124 %res0 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 %mask)
6125 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
6126 %res1 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 %mask)
6127 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
6128 %res2 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 %mask)
6129 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
6130 %res3 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 %mask)
6131 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
6132 %res4 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 %mask)
6133 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
6134 %res5 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 %mask)
6135 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
6136 %res6 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 %mask)
6137 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
6138 %res7 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 %mask)
6139 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
6143 declare i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32>, <16 x i32>, i32, i16) nounwind readnone
6145 define <8 x i8> @test_cmp_q_512(<8 x i64> %a0, <8 x i64> %a1) {
6146 ; CHECK-LABEL: test_cmp_q_512:
6148 ; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x48,0x29,0xc1]
6149 ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
6150 ; CHECK-NEXT: vpcmpgtq %zmm0, %zmm1, %k0 ## encoding: [0x62,0xf2,0xf5,0x48,0x37,0xc0]
6151 ; CHECK-NEXT: vpcmpleq %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x48,0x1f,0xc9,0x02]
6152 ; CHECK-NEXT: vpcmpneqq %zmm1, %zmm0, %k2 ## encoding: [0x62,0xf3,0xfd,0x48,0x1f,0xd1,0x04]
6153 ; CHECK-NEXT: vpcmpnltq %zmm1, %zmm0, %k3 ## encoding: [0x62,0xf3,0xfd,0x48,0x1f,0xd9,0x05]
6154 ; CHECK-NEXT: vpcmpgtq %zmm1, %zmm0, %k4 ## encoding: [0x62,0xf2,0xfd,0x48,0x37,0xe1]
6155 ; CHECK-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
6156 ; CHECK-NEXT: movzbl %al, %eax ## encoding: [0x0f,0xb6,0xc0]
6157 ; CHECK-NEXT: vmovd %eax, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0]
6158 ; CHECK-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x01]
6159 ; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
6160 ; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
6161 ; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
6162 ; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
6163 ; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
6164 ; CHECK-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05]
6165 ; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
6166 ; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
6167 ; CHECK-NEXT: movl $255, %eax ## encoding: [0xb8,0xff,0x00,0x00,0x00]
6168 ; CHECK-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07]
6169 ; CHECK-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
6170 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
6171 %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 -1)
6172 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
6173 %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 -1)
6174 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
6175 %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 -1)
6176 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
6177 %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 -1)
6178 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
6179 %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 -1)
6180 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
6181 %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 -1)
6182 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
6183 %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 -1)
6184 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
6185 %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 -1)
6186 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
6190 define <8 x i8> @test_mask_cmp_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
6191 ; X86-LABEL: test_mask_cmp_q_512:
6193 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
6194 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
6195 ; X86-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x29,0xc1]
6196 ; X86-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
6197 ; X86-NEXT: vpcmpgtq %zmm0, %zmm1, %k0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x37,0xc0]
6198 ; X86-NEXT: vpcmpleq %zmm1, %zmm0, %k2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1f,0xd1,0x02]
6199 ; X86-NEXT: vpcmpneqq %zmm1, %zmm0, %k3 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1f,0xd9,0x04]
6200 ; X86-NEXT: vpcmpnltq %zmm1, %zmm0, %k4 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1f,0xe1,0x05]
6201 ; X86-NEXT: vpcmpgtq %zmm1, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x37,0xc9]
6202 ; X86-NEXT: kmovw %k0, %edx ## encoding: [0xc5,0xf8,0x93,0xd0]
6203 ; X86-NEXT: movzbl %cl, %ecx ## encoding: [0x0f,0xb6,0xc9]
6204 ; X86-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
6205 ; X86-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc2,0x01]
6206 ; X86-NEXT: kmovw %k2, %ecx ## encoding: [0xc5,0xf8,0x93,0xca]
6207 ; X86-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x02]
6208 ; X86-NEXT: kmovw %k3, %ecx ## encoding: [0xc5,0xf8,0x93,0xcb]
6209 ; X86-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x04]
6210 ; X86-NEXT: kmovw %k4, %ecx ## encoding: [0xc5,0xf8,0x93,0xcc]
6211 ; X86-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x05]
6212 ; X86-NEXT: kmovw %k1, %ecx ## encoding: [0xc5,0xf8,0x93,0xc9]
6213 ; X86-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x06]
6214 ; X86-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07]
6215 ; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
6216 ; X86-NEXT: retl ## encoding: [0xc3]
6218 ; X64-LABEL: test_mask_cmp_q_512:
6220 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
6221 ; X64-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x29,0xc1]
6222 ; X64-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
6223 ; X64-NEXT: vpcmpgtq %zmm0, %zmm1, %k0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x37,0xc0]
6224 ; X64-NEXT: vpcmpleq %zmm1, %zmm0, %k2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1f,0xd1,0x02]
6225 ; X64-NEXT: vpcmpneqq %zmm1, %zmm0, %k3 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1f,0xd9,0x04]
6226 ; X64-NEXT: vpcmpnltq %zmm1, %zmm0, %k4 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1f,0xe1,0x05]
6227 ; X64-NEXT: vpcmpgtq %zmm1, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x37,0xc9]
6228 ; X64-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
6229 ; X64-NEXT: movzbl %al, %eax ## encoding: [0x0f,0xb6,0xc0]
6230 ; X64-NEXT: vmovd %eax, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0]
6231 ; X64-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x01]
6232 ; X64-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
6233 ; X64-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
6234 ; X64-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
6235 ; X64-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
6236 ; X64-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
6237 ; X64-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05]
6238 ; X64-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
6239 ; X64-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
6240 ; X64-NEXT: vpinsrb $7, %edi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc7,0x07]
6241 ; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
6242 ; X64-NEXT: retq ## encoding: [0xc3]
6243 %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 %mask)
6244 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
6245 %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 %mask)
6246 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
6247 %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 %mask)
6248 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
6249 %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 %mask)
6250 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
6251 %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 %mask)
6252 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
6253 %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 %mask)
6254 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
6255 %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 %mask)
6256 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
6257 %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 %mask)
6258 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
6262 declare i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64>, <8 x i64>, i32, i8) nounwind readnone
6264 define <8 x i8> @test_ucmp_q_512(<8 x i64> %a0, <8 x i64> %a1) {
6265 ; CHECK-LABEL: test_ucmp_q_512:
6267 ; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x48,0x29,0xc1]
6268 ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
6269 ; CHECK-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x48,0x1e,0xc1,0x01]
6270 ; CHECK-NEXT: vpcmpleuq %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x48,0x1e,0xc9,0x02]
6271 ; CHECK-NEXT: vpcmpneqq %zmm1, %zmm0, %k2 ## encoding: [0x62,0xf3,0xfd,0x48,0x1f,0xd1,0x04]
6272 ; CHECK-NEXT: vpcmpnltuq %zmm1, %zmm0, %k3 ## encoding: [0x62,0xf3,0xfd,0x48,0x1e,0xd9,0x05]
6273 ; CHECK-NEXT: vpcmpnleuq %zmm1, %zmm0, %k4 ## encoding: [0x62,0xf3,0xfd,0x48,0x1e,0xe1,0x06]
6274 ; CHECK-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
6275 ; CHECK-NEXT: movzbl %al, %eax ## encoding: [0x0f,0xb6,0xc0]
6276 ; CHECK-NEXT: vmovd %eax, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0]
6277 ; CHECK-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x01]
6278 ; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
6279 ; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
6280 ; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
6281 ; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
6282 ; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
6283 ; CHECK-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05]
6284 ; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
6285 ; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
6286 ; CHECK-NEXT: movl $255, %eax ## encoding: [0xb8,0xff,0x00,0x00,0x00]
6287 ; CHECK-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07]
6288 ; CHECK-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
6289 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
6290 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 -1)
6291 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
6292 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 -1)
6293 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
6294 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 -1)
6295 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
6296 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 -1)
6297 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
6298 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 -1)
6299 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
6300 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 -1)
6301 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
6302 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 -1)
6303 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
6304 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 -1)
6305 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
6309 define <8 x i8> @test_mask_ucmp_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
6310 ; X86-LABEL: test_mask_ucmp_q_512:
6312 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
6313 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
6314 ; X86-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x29,0xc1]
6315 ; X86-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
6316 ; X86-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1e,0xc1,0x01]
6317 ; X86-NEXT: vpcmpleuq %zmm1, %zmm0, %k2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1e,0xd1,0x02]
6318 ; X86-NEXT: vpcmpneqq %zmm1, %zmm0, %k3 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1f,0xd9,0x04]
6319 ; X86-NEXT: vpcmpnltuq %zmm1, %zmm0, %k4 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1e,0xe1,0x05]
6320 ; X86-NEXT: vpcmpnleuq %zmm1, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1e,0xc9,0x06]
6321 ; X86-NEXT: kmovw %k0, %edx ## encoding: [0xc5,0xf8,0x93,0xd0]
6322 ; X86-NEXT: movzbl %cl, %ecx ## encoding: [0x0f,0xb6,0xc9]
6323 ; X86-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
6324 ; X86-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc2,0x01]
6325 ; X86-NEXT: kmovw %k2, %ecx ## encoding: [0xc5,0xf8,0x93,0xca]
6326 ; X86-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x02]
6327 ; X86-NEXT: kmovw %k3, %ecx ## encoding: [0xc5,0xf8,0x93,0xcb]
6328 ; X86-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x04]
6329 ; X86-NEXT: kmovw %k4, %ecx ## encoding: [0xc5,0xf8,0x93,0xcc]
6330 ; X86-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x05]
6331 ; X86-NEXT: kmovw %k1, %ecx ## encoding: [0xc5,0xf8,0x93,0xc9]
6332 ; X86-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x06]
6333 ; X86-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07]
6334 ; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
6335 ; X86-NEXT: retl ## encoding: [0xc3]
6337 ; X64-LABEL: test_mask_ucmp_q_512:
6339 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
6340 ; X64-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x29,0xc1]
6341 ; X64-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
6342 ; X64-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1e,0xc1,0x01]
6343 ; X64-NEXT: vpcmpleuq %zmm1, %zmm0, %k2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1e,0xd1,0x02]
6344 ; X64-NEXT: vpcmpneqq %zmm1, %zmm0, %k3 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1f,0xd9,0x04]
6345 ; X64-NEXT: vpcmpnltuq %zmm1, %zmm0, %k4 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1e,0xe1,0x05]
6346 ; X64-NEXT: vpcmpnleuq %zmm1, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1e,0xc9,0x06]
6347 ; X64-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
6348 ; X64-NEXT: movzbl %al, %eax ## encoding: [0x0f,0xb6,0xc0]
6349 ; X64-NEXT: vmovd %eax, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0]
6350 ; X64-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x01]
6351 ; X64-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
6352 ; X64-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
6353 ; X64-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
6354 ; X64-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
6355 ; X64-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
6356 ; X64-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05]
6357 ; X64-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
6358 ; X64-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
6359 ; X64-NEXT: vpinsrb $7, %edi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc7,0x07]
6360 ; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
6361 ; X64-NEXT: retq ## encoding: [0xc3]
6362 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 %mask)
6363 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
6364 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 %mask)
6365 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
6366 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 %mask)
6367 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
6368 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 %mask)
6369 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
6370 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 %mask)
6371 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
6372 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 %mask)
6373 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
6374 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 %mask)
6375 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
6376 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 %mask)
6377 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
6381 declare i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64>, <8 x i64>, i32, i8) nounwind readnone
6383 declare <16 x float> @llvm.x86.avx512.mask.broadcastf32x4.512(<4 x float>, <16 x float>, i16)
6385 define <16 x float>@test_int_x86_avx512_mask_broadcastf32x4_512(<4 x float> %x0, <16 x float> %x2, i16 %mask) {
6386 ; X86-LABEL: test_int_x86_avx512_mask_broadcastf32x4_512:
6388 ; X86-NEXT: ## kill: def $xmm0 killed $xmm0 def $ymm0
6389 ; X86-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ## encoding: [0xc4,0xe3,0x7d,0x18,0xc0,0x01]
6390 ; X86-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0 ## encoding: [0x62,0xf3,0xfd,0x48,0x1a,0xc0,0x01]
6391 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
6392 ; X86-NEXT: vmovaps %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x28,0xc8]
6393 ; X86-NEXT: vmovaps %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x28,0xd0]
6394 ; X86-NEXT: vaddps %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x58,0xc1]
6395 ; X86-NEXT: vaddps %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6c,0x48,0x58,0xc0]
6396 ; X86-NEXT: retl ## encoding: [0xc3]
6398 ; X64-LABEL: test_int_x86_avx512_mask_broadcastf32x4_512:
6400 ; X64-NEXT: ## kill: def $xmm0 killed $xmm0 def $ymm0
6401 ; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ## encoding: [0xc4,0xe3,0x7d,0x18,0xc0,0x01]
6402 ; X64-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0 ## encoding: [0x62,0xf3,0xfd,0x48,0x1a,0xc0,0x01]
6403 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
6404 ; X64-NEXT: vmovaps %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x28,0xc8]
6405 ; X64-NEXT: vmovaps %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x28,0xd0]
6406 ; X64-NEXT: vaddps %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x58,0xc1]
6407 ; X64-NEXT: vaddps %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6c,0x48,0x58,0xc0]
6408 ; X64-NEXT: retq ## encoding: [0xc3]
6410 %res1 = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x4.512(<4 x float> %x0, <16 x float> %x2, i16 -1)
6411 %res2 = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x4.512(<4 x float> %x0, <16 x float> %x2, i16 %mask)
6412 %res3 = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x4.512(<4 x float> %x0, <16 x float> zeroinitializer, i16 %mask)
6413 %res4 = fadd <16 x float> %res1, %res2
6414 %res5 = fadd <16 x float> %res3, %res4
6415 ret <16 x float> %res5
6418 define <16 x float>@test_int_x86_avx512_mask_broadcastf32x4_512_load(<4 x float>* %x0ptr, <16 x float> %x2, i16 %mask) {
6419 ; X86-LABEL: test_int_x86_avx512_mask_broadcastf32x4_512_load:
6421 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
6422 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
6423 ; X86-NEXT: vbroadcastf32x4 (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x1a,0x00]
6424 ; X86-NEXT: ## zmm0 {%k1} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
6425 ; X86-NEXT: retl ## encoding: [0xc3]
6427 ; X64-LABEL: test_int_x86_avx512_mask_broadcastf32x4_512_load:
6429 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
6430 ; X64-NEXT: vbroadcastf32x4 (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x1a,0x07]
6431 ; X64-NEXT: ## zmm0 {%k1} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
6432 ; X64-NEXT: retq ## encoding: [0xc3]
6433 %x0 = load <4 x float>, <4 x float>* %x0ptr
6434 %res = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x4.512(<4 x float> %x0, <16 x float> %x2, i16 %mask)
6435 ret <16 x float> %res
6438 declare <8 x double> @llvm.x86.avx512.mask.broadcastf64x4.512(<4 x double>, <8 x double>, i8)
6440 define <8 x double>@test_int_x86_avx512_broadcastf64x4_512(<4 x double> %x0, <8 x double> %x2) {
6441 ; CHECK-LABEL: test_int_x86_avx512_broadcastf64x4_512:
6443 ; CHECK-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
6444 ; CHECK-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0 ## encoding: [0x62,0xf3,0xfd,0x48,0x1a,0xc0,0x01]
6445 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
6447 %res = call <8 x double> @llvm.x86.avx512.mask.broadcastf64x4.512(<4 x double> %x0, <8 x double> %x2, i8 -1)
6448 ret <8 x double> %res
6451 define <8 x double>@test_int_x86_avx512_mask_broadcastf64x4_512(<4 x double> %x0, <8 x double> %x2, i8 %mask) {
6452 ; X86-LABEL: test_int_x86_avx512_mask_broadcastf64x4_512:
6454 ; X86-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
6455 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
6456 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
6457 ; X86-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1a,0xc8,0x01]
6458 ; X86-NEXT: vmovapd %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc1]
6459 ; X86-NEXT: retl ## encoding: [0xc3]
6461 ; X64-LABEL: test_int_x86_avx512_mask_broadcastf64x4_512:
6463 ; X64-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
6464 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
6465 ; X64-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1a,0xc8,0x01]
6466 ; X64-NEXT: vmovapd %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc1]
6467 ; X64-NEXT: retq ## encoding: [0xc3]
6469 %res = call <8 x double> @llvm.x86.avx512.mask.broadcastf64x4.512(<4 x double> %x0, <8 x double> %x2, i8 %mask)
6470 ret <8 x double> %res
6473 define <8 x double>@test_int_x86_avx512_maskz_broadcastf64x4_512(<4 x double> %x0, i8 %mask) {
6474 ; X86-LABEL: test_int_x86_avx512_maskz_broadcastf64x4_512:
6476 ; X86-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
6477 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
6478 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
6479 ; X86-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xc9,0x1a,0xc0,0x01]
6480 ; X86-NEXT: retl ## encoding: [0xc3]
6482 ; X64-LABEL: test_int_x86_avx512_maskz_broadcastf64x4_512:
6484 ; X64-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
6485 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
6486 ; X64-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xc9,0x1a,0xc0,0x01]
6487 ; X64-NEXT: retq ## encoding: [0xc3]
6489 %res = call <8 x double> @llvm.x86.avx512.mask.broadcastf64x4.512(<4 x double> %x0, <8 x double> zeroinitializer, i8 %mask)
6490 ret <8 x double> %res
6493 define <8 x double>@test_int_x86_avx512_mask_broadcastf64x4_512_load(<4 x double>* %x0ptr, <8 x double> %x2, i8 %mask) {
6494 ; X86-LABEL: test_int_x86_avx512_mask_broadcastf64x4_512_load:
6496 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
6497 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
6498 ; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
6499 ; X86-NEXT: vbroadcastf64x4 (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x1b,0x00]
6500 ; X86-NEXT: ## zmm0 {%k1} = mem[0,1,2,3,0,1,2,3]
6501 ; X86-NEXT: retl ## encoding: [0xc3]
6503 ; X64-LABEL: test_int_x86_avx512_mask_broadcastf64x4_512_load:
6505 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
6506 ; X64-NEXT: vbroadcastf64x4 (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x1b,0x07]
6507 ; X64-NEXT: ## zmm0 {%k1} = mem[0,1,2,3,0,1,2,3]
6508 ; X64-NEXT: retq ## encoding: [0xc3]
6510 %x0 = load <4 x double>, <4 x double>* %x0ptr
6511 %res = call <8 x double> @llvm.x86.avx512.mask.broadcastf64x4.512(<4 x double> %x0, <8 x double> %x2, i8 %mask)
6512 ret <8 x double> %res
6515 declare <16 x i32> @llvm.x86.avx512.mask.broadcasti32x4.512(<4 x i32>, <16 x i32>, i16)
6517 define <16 x i32>@test_int_x86_avx512_mask_broadcasti32x4_512(<4 x i32> %x0, <16 x i32> %x2, i16 %mask) {
6518 ; X86-LABEL: test_int_x86_avx512_mask_broadcasti32x4_512:
6520 ; X86-NEXT: ## kill: def $xmm0 killed $xmm0 def $ymm0
6521 ; X86-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 ## encoding: [0xc4,0xe3,0x7d,0x38,0xc0,0x01]
6522 ; X86-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 ## encoding: [0x62,0xf3,0xfd,0x48,0x3a,0xc0,0x01]
6523 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
6524 ; X86-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x6f,0xc8]
6525 ; X86-NEXT: vmovdqa32 %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x6f,0xd0]
6526 ; X86-NEXT: vpaddd %zmm2, %zmm1, %zmm1 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xca]
6527 ; X86-NEXT: vpaddd %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc1]
6528 ; X86-NEXT: retl ## encoding: [0xc3]
6530 ; X64-LABEL: test_int_x86_avx512_mask_broadcasti32x4_512:
6532 ; X64-NEXT: ## kill: def $xmm0 killed $xmm0 def $ymm0
6533 ; X64-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 ## encoding: [0xc4,0xe3,0x7d,0x38,0xc0,0x01]
6534 ; X64-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 ## encoding: [0x62,0xf3,0xfd,0x48,0x3a,0xc0,0x01]
6535 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
6536 ; X64-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x6f,0xc8]
6537 ; X64-NEXT: vmovdqa32 %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x6f,0xd0]
6538 ; X64-NEXT: vpaddd %zmm2, %zmm1, %zmm1 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xca]
6539 ; X64-NEXT: vpaddd %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc1]
6540 ; X64-NEXT: retq ## encoding: [0xc3]
6542 %res1 = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x4.512(<4 x i32> %x0, <16 x i32> %x2, i16 -1)
6543 %res2 = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x4.512(<4 x i32> %x0, <16 x i32> %x2, i16 %mask)
6544 %res3 = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x4.512(<4 x i32> %x0, <16 x i32> zeroinitializer, i16 %mask)
6545 %res4 = add <16 x i32> %res1, %res2
6546 %res5 = add <16 x i32> %res3, %res4
6547 ret <16 x i32> %res5
6550 define <16 x i32>@test_int_x86_avx512_mask_broadcasti32x4_512_load(<4 x i32>* %x0ptr, <16 x i32> %x2, i16 %mask) {
6551 ; X86-LABEL: test_int_x86_avx512_mask_broadcasti32x4_512_load:
6553 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
6554 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
6555 ; X86-NEXT: vbroadcasti32x4 (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x5a,0x00]
6556 ; X86-NEXT: ## zmm0 {%k1} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
6557 ; X86-NEXT: retl ## encoding: [0xc3]
6559 ; X64-LABEL: test_int_x86_avx512_mask_broadcasti32x4_512_load:
6561 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
6562 ; X64-NEXT: vbroadcasti32x4 (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x5a,0x07]
6563 ; X64-NEXT: ## zmm0 {%k1} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
6564 ; X64-NEXT: retq ## encoding: [0xc3]
6566 %x0 = load <4 x i32>, <4 x i32>* %x0ptr
6567 %res = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x4.512(<4 x i32> %x0, <16 x i32> %x2, i16 %mask)
6571 declare <8 x i64> @llvm.x86.avx512.mask.broadcasti64x4.512(<4 x i64>, <8 x i64>, i8)
6573 define <8 x i64>@test_int_x86_avx512_broadcasti64x4_512(<4 x i64> %x0, <8 x i64> %x2) {
6574 ; CHECK-LABEL: test_int_x86_avx512_broadcasti64x4_512:
6576 ; CHECK-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
6577 ; CHECK-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0 ## encoding: [0x62,0xf3,0xfd,0x48,0x1a,0xc0,0x01]
6578 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
6580 %res = call <8 x i64> @llvm.x86.avx512.mask.broadcasti64x4.512(<4 x i64> %x0, <8 x i64> %x2, i8 -1)
6584 define <8 x i64>@test_int_x86_avx512_mask_broadcasti64x4_512(<4 x i64> %x0, <8 x i64> %x2, i8 %mask) {
6585 ; X86-LABEL: test_int_x86_avx512_mask_broadcasti64x4_512:
6587 ; X86-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
6588 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
6589 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
6590 ; X86-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x3a,0xc8,0x01]
6591 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
6592 ; X86-NEXT: retl ## encoding: [0xc3]
6594 ; X64-LABEL: test_int_x86_avx512_mask_broadcasti64x4_512:
6596 ; X64-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
6597 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
6598 ; X64-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x3a,0xc8,0x01]
6599 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
6600 ; X64-NEXT: retq ## encoding: [0xc3]
6602 %res = call <8 x i64> @llvm.x86.avx512.mask.broadcasti64x4.512(<4 x i64> %x0, <8 x i64> %x2, i8 %mask)
6606 define <8 x i64>@test_int_x86_avx512_maskz_broadcasti64x4_512(<4 x i64> %x0, i8 %mask) {
6607 ; X86-LABEL: test_int_x86_avx512_maskz_broadcasti64x4_512:
6609 ; X86-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
6610 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
6611 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
6612 ; X86-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xc9,0x3a,0xc0,0x01]
6613 ; X86-NEXT: retl ## encoding: [0xc3]
6615 ; X64-LABEL: test_int_x86_avx512_maskz_broadcasti64x4_512:
6617 ; X64-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
6618 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
6619 ; X64-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xc9,0x3a,0xc0,0x01]
6620 ; X64-NEXT: retq ## encoding: [0xc3]
6622 %res = call <8 x i64> @llvm.x86.avx512.mask.broadcasti64x4.512(<4 x i64> %x0, <8 x i64> zeroinitializer, i8 %mask)
6626 define <8 x i64>@test_int_x86_avx512_mask_broadcasti64x4_512_load(<4 x i64>* %x0ptr, <8 x i64> %x2, i8 %mask) {
6627 ; X86-LABEL: test_int_x86_avx512_mask_broadcasti64x4_512_load:
6629 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
6630 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
6631 ; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
6632 ; X86-NEXT: vbroadcasti64x4 (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x5b,0x00]
6633 ; X86-NEXT: ## zmm0 {%k1} = mem[0,1,2,3,0,1,2,3]
6634 ; X86-NEXT: retl ## encoding: [0xc3]
6636 ; X64-LABEL: test_int_x86_avx512_mask_broadcasti64x4_512_load:
6638 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
6639 ; X64-NEXT: vbroadcasti64x4 (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x5b,0x07]
6640 ; X64-NEXT: ## zmm0 {%k1} = mem[0,1,2,3,0,1,2,3]
6641 ; X64-NEXT: retq ## encoding: [0xc3]
6643 %x0 = load <4 x i64>, <4 x i64>* %x0ptr
6644 %res = call <8 x i64> @llvm.x86.avx512.mask.broadcasti64x4.512(<4 x i64> %x0, <8 x i64> %x2, i8 %mask)
6648 declare <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32>, <16 x i32>, i16)
6650 define <16 x i32>@test_int_x86_avx512_pabs_d_512(<16 x i32> %x0, <16 x i32> %x1) {
6651 ; CHECK-LABEL: test_int_x86_avx512_pabs_d_512:
6653 ; CHECK-NEXT: vpabsd %zmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x1e,0xc0]
6654 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
6655 %res = call <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32> %x0, <16 x i32> %x1, i16 -1)
6659 define <16 x i32>@test_int_x86_avx512_mask_pabs_d_512(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) {
6660 ; X86-LABEL: test_int_x86_avx512_mask_pabs_d_512:
6662 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
6663 ; X86-NEXT: vpabsd %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x1e,0xc8]
6664 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
6665 ; X86-NEXT: retl ## encoding: [0xc3]
6667 ; X64-LABEL: test_int_x86_avx512_mask_pabs_d_512:
6669 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
6670 ; X64-NEXT: vpabsd %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x1e,0xc8]
6671 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
6672 ; X64-NEXT: retq ## encoding: [0xc3]
6673 %res = call <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32> %x0, <16 x i32> %x1, i16 %x2)
6677 declare <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64>, <8 x i64>, i8)
6679 define <8 x i64>@test_int_x86_avx512_pabs_q_512(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) {
6680 ; CHECK-LABEL: test_int_x86_avx512_pabs_q_512:
6682 ; CHECK-NEXT: vpabsq %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x1f,0xc0]
6683 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
6684 %res = call <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64> %x0, <8 x i64> %x1, i8 -1)
6688 define <8 x i64>@test_int_x86_avx512_mask_pabs_q_512(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) {
6689 ; X86-LABEL: test_int_x86_avx512_mask_pabs_q_512:
6691 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
6692 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
6693 ; X86-NEXT: vpabsq %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x1f,0xc8]
6694 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
6695 ; X86-NEXT: retl ## encoding: [0xc3]
6697 ; X64-LABEL: test_int_x86_avx512_mask_pabs_q_512:
6699 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
6700 ; X64-NEXT: vpabsq %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x1f,0xc8]
6701 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
6702 ; X64-NEXT: retq ## encoding: [0xc3]
6703 %res = call <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64> %x0, <8 x i64> %x1, i8 %x2)
6707 define i8 @test_vptestmq(<8 x i64> %a0, <8 x i64> %a1, i8 %m) {
6708 ; X86-LABEL: test_vptestmq:
6710 ; X86-NEXT: vptestmq %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x48,0x27,0xc1]
6711 ; X86-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
6712 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
6713 ; X86-NEXT: andb %cl, %al ## encoding: [0x20,0xc8]
6714 ; X86-NEXT: addb %cl, %al ## encoding: [0x00,0xc8]
6715 ; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
6716 ; X86-NEXT: retl ## encoding: [0xc3]
6718 ; X64-LABEL: test_vptestmq:
6720 ; X64-NEXT: vptestmq %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x48,0x27,0xc1]
6721 ; X64-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
6722 ; X64-NEXT: andb %al, %dil ## encoding: [0x40,0x20,0xc7]
6723 ; X64-NEXT: addb %dil, %al ## encoding: [0x40,0x00,0xf8]
6724 ; X64-NEXT: ## kill: def $al killed $al killed $eax
6725 ; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
6726 ; X64-NEXT: retq ## encoding: [0xc3]
6727 %res = call i8 @llvm.x86.avx512.ptestm.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 -1)
6728 %res1 = call i8 @llvm.x86.avx512.ptestm.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 %m)
6729 %res2 = add i8 %res1, %res
6732 declare i8 @llvm.x86.avx512.ptestm.q.512(<8 x i64>, <8 x i64>, i8)
6734 define i16 @test_vptestmd(<16 x i32> %a0, <16 x i32> %a1, i16 %m) {
6735 ; X86-LABEL: test_vptestmd:
6737 ; X86-NEXT: vptestmd %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf2,0x7d,0x48,0x27,0xc1]
6738 ; X86-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
6739 ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb7,0x44,0x24,0x04]
6740 ; X86-NEXT: andw %cx, %ax ## encoding: [0x66,0x21,0xc8]
6741 ; X86-NEXT: addl %ecx, %eax ## encoding: [0x01,0xc8]
6742 ; X86-NEXT: ## kill: def $ax killed $ax killed $eax
6743 ; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
6744 ; X86-NEXT: retl ## encoding: [0xc3]
6746 ; X64-LABEL: test_vptestmd:
6748 ; X64-NEXT: vptestmd %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf2,0x7d,0x48,0x27,0xc1]
6749 ; X64-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
6750 ; X64-NEXT: andl %eax, %edi ## encoding: [0x21,0xc7]
6751 ; X64-NEXT: addl %edi, %eax ## encoding: [0x01,0xf8]
6752 ; X64-NEXT: ## kill: def $ax killed $ax killed $eax
6753 ; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
6754 ; X64-NEXT: retq ## encoding: [0xc3]
6755 %res = call i16 @llvm.x86.avx512.ptestm.d.512(<16 x i32> %a0, <16 x i32> %a1, i16 -1)
6756 %res1 = call i16 @llvm.x86.avx512.ptestm.d.512(<16 x i32> %a0, <16 x i32> %a1, i16 %m)
6757 %res2 = add i16 %res1, %res
6760 declare i16 @llvm.x86.avx512.ptestm.d.512(<16 x i32>, <16 x i32>, i16)
6762 declare i16 @llvm.x86.avx512.ptestnm.d.512(<16 x i32>, <16 x i32>, i16 %x2)
6764 define i16@test_int_x86_avx512_ptestnm_d_512(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) {
6765 ; X86-LABEL: test_int_x86_avx512_ptestnm_d_512:
6767 ; X86-NEXT: vptestnmd %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf2,0x7e,0x48,0x27,0xc1]
6768 ; X86-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
6769 ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb7,0x44,0x24,0x04]
6770 ; X86-NEXT: andw %cx, %ax ## encoding: [0x66,0x21,0xc8]
6771 ; X86-NEXT: addl %ecx, %eax ## encoding: [0x01,0xc8]
6772 ; X86-NEXT: ## kill: def $ax killed $ax killed $eax
6773 ; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
6774 ; X86-NEXT: retl ## encoding: [0xc3]
6776 ; X64-LABEL: test_int_x86_avx512_ptestnm_d_512:
6778 ; X64-NEXT: vptestnmd %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf2,0x7e,0x48,0x27,0xc1]
6779 ; X64-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
6780 ; X64-NEXT: andl %eax, %edi ## encoding: [0x21,0xc7]
6781 ; X64-NEXT: addl %edi, %eax ## encoding: [0x01,0xf8]
6782 ; X64-NEXT: ## kill: def $ax killed $ax killed $eax
6783 ; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
6784 ; X64-NEXT: retq ## encoding: [0xc3]
6785 %res = call i16 @llvm.x86.avx512.ptestnm.d.512(<16 x i32> %x0, <16 x i32> %x1, i16 %x2)
6786 %res1 = call i16 @llvm.x86.avx512.ptestnm.d.512(<16 x i32> %x0, <16 x i32> %x1, i16-1)
6787 %res2 = add i16 %res, %res1
6791 declare i8 @llvm.x86.avx512.ptestnm.q.512(<8 x i64>, <8 x i64>, i8 %x2)
6793 define i8@test_int_x86_avx512_ptestnm_q_512(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) {
6794 ; X86-LABEL: test_int_x86_avx512_ptestnm_q_512:
6796 ; X86-NEXT: vptestnmq %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x48,0x27,0xc1]
6797 ; X86-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
6798 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
6799 ; X86-NEXT: andb %cl, %al ## encoding: [0x20,0xc8]
6800 ; X86-NEXT: addb %cl, %al ## encoding: [0x00,0xc8]
6801 ; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
6802 ; X86-NEXT: retl ## encoding: [0xc3]
6804 ; X64-LABEL: test_int_x86_avx512_ptestnm_q_512:
6806 ; X64-NEXT: vptestnmq %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x48,0x27,0xc1]
6807 ; X64-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
6808 ; X64-NEXT: andb %al, %dil ## encoding: [0x40,0x20,0xc7]
6809 ; X64-NEXT: addb %dil, %al ## encoding: [0x40,0x00,0xf8]
6810 ; X64-NEXT: ## kill: def $al killed $al killed $eax
6811 ; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
6812 ; X64-NEXT: retq ## encoding: [0xc3]
6813 %res = call i8 @llvm.x86.avx512.ptestnm.q.512(<8 x i64> %x0, <8 x i64> %x1, i8 %x2)
6814 %res1 = call i8 @llvm.x86.avx512.ptestnm.q.512(<8 x i64> %x0, <8 x i64> %x1, i8-1)
6815 %res2 = add i8 %res, %res1
6819 declare i16 @llvm.x86.avx512.kand.w(i16, i16) nounwind readnone
6820 define i16 @test_kand(i16 %a0, i16 %a1) {
6821 ; X86-LABEL: test_kand:
6823 ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb7,0x44,0x24,0x04]
6824 ; X86-NEXT: andw {{[0-9]+}}(%esp), %ax ## encoding: [0x66,0x23,0x44,0x24,0x08]
6825 ; X86-NEXT: andl $8, %eax ## encoding: [0x83,0xe0,0x08]
6826 ; X86-NEXT: ## kill: def $ax killed $ax killed $eax
6827 ; X86-NEXT: retl ## encoding: [0xc3]
6829 ; X64-LABEL: test_kand:
6831 ; X64-NEXT: movl %edi, %eax ## encoding: [0x89,0xf8]
6832 ; X64-NEXT: andl %esi, %eax ## encoding: [0x21,0xf0]
6833 ; X64-NEXT: andl $8, %eax ## encoding: [0x83,0xe0,0x08]
6834 ; X64-NEXT: ## kill: def $ax killed $ax killed $eax
6835 ; X64-NEXT: retq ## encoding: [0xc3]
6836 %t1 = call i16 @llvm.x86.avx512.kand.w(i16 %a0, i16 8)
6837 %t2 = call i16 @llvm.x86.avx512.kand.w(i16 %t1, i16 %a1)
6841 declare i16 @llvm.x86.avx512.kandn.w(i16, i16) nounwind readnone
6842 define i16 @test_kandn(i16 %a0, i16 %a1) {
6843 ; X86-LABEL: test_kandn:
6845 ; X86-NEXT: movl $65527, %eax ## encoding: [0xb8,0xf7,0xff,0x00,0x00]
6846 ; X86-NEXT: ## imm = 0xFFF7
6847 ; X86-NEXT: orl {{[0-9]+}}(%esp), %eax ## encoding: [0x0b,0x44,0x24,0x04]
6848 ; X86-NEXT: andw {{[0-9]+}}(%esp), %ax ## encoding: [0x66,0x23,0x44,0x24,0x08]
6849 ; X86-NEXT: ## kill: def $ax killed $ax killed $eax
6850 ; X86-NEXT: retl ## encoding: [0xc3]
6852 ; X64-LABEL: test_kandn:
6854 ; X64-NEXT: movl %edi, %eax ## encoding: [0x89,0xf8]
6855 ; X64-NEXT: orl $-9, %eax ## encoding: [0x83,0xc8,0xf7]
6856 ; X64-NEXT: andl %esi, %eax ## encoding: [0x21,0xf0]
6857 ; X64-NEXT: ## kill: def $ax killed $ax killed $eax
6858 ; X64-NEXT: retq ## encoding: [0xc3]
6859 %t1 = call i16 @llvm.x86.avx512.kandn.w(i16 %a0, i16 8)
6860 %t2 = call i16 @llvm.x86.avx512.kandn.w(i16 %t1, i16 %a1)
6864 declare i16 @llvm.x86.avx512.knot.w(i16) nounwind readnone
6865 define i16 @test_knot(i16 %a0) {
6866 ; X86-LABEL: test_knot:
6868 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
6869 ; X86-NEXT: notl %eax ## encoding: [0xf7,0xd0]
6870 ; X86-NEXT: ## kill: def $ax killed $ax killed $eax
6871 ; X86-NEXT: retl ## encoding: [0xc3]
6873 ; X64-LABEL: test_knot:
6875 ; X64-NEXT: movl %edi, %eax ## encoding: [0x89,0xf8]
6876 ; X64-NEXT: notl %eax ## encoding: [0xf7,0xd0]
6877 ; X64-NEXT: ## kill: def $ax killed $ax killed $eax
6878 ; X64-NEXT: retq ## encoding: [0xc3]
6879 %res = call i16 @llvm.x86.avx512.knot.w(i16 %a0)
6883 declare i16 @llvm.x86.avx512.kor.w(i16, i16) nounwind readnone
6884 define i16 @test_kor(i16 %a0, i16 %a1) {
6885 ; X86-LABEL: test_kor:
6887 ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb7,0x44,0x24,0x04]
6888 ; X86-NEXT: orw {{[0-9]+}}(%esp), %ax ## encoding: [0x66,0x0b,0x44,0x24,0x08]
6889 ; X86-NEXT: orl $8, %eax ## encoding: [0x83,0xc8,0x08]
6890 ; X86-NEXT: ## kill: def $ax killed $ax killed $eax
6891 ; X86-NEXT: retl ## encoding: [0xc3]
6893 ; X64-LABEL: test_kor:
6895 ; X64-NEXT: movl %edi, %eax ## encoding: [0x89,0xf8]
6896 ; X64-NEXT: orl %esi, %eax ## encoding: [0x09,0xf0]
6897 ; X64-NEXT: orl $8, %eax ## encoding: [0x83,0xc8,0x08]
6898 ; X64-NEXT: ## kill: def $ax killed $ax killed $eax
6899 ; X64-NEXT: retq ## encoding: [0xc3]
6900 %t1 = call i16 @llvm.x86.avx512.kor.w(i16 %a0, i16 8)
6901 %t2 = call i16 @llvm.x86.avx512.kor.w(i16 %t1, i16 %a1)
6905 declare i16 @llvm.x86.avx512.kxnor.w(i16, i16) nounwind readnone
6906 ; TODO: the two kxnor instructions here a no op and should be elimintaed,
6907 ; probably by FoldConstantArithmetic in SelectionDAG.
6908 define i16 @test_kxnor(i16 %a0, i16 %a1) {
6909 ; X86-LABEL: test_kxnor:
6911 ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb7,0x44,0x24,0x04]
6912 ; X86-NEXT: xorw {{[0-9]+}}(%esp), %ax ## encoding: [0x66,0x33,0x44,0x24,0x08]
6913 ; X86-NEXT: xorl $8, %eax ## encoding: [0x83,0xf0,0x08]
6914 ; X86-NEXT: ## kill: def $ax killed $ax killed $eax
6915 ; X86-NEXT: retl ## encoding: [0xc3]
6917 ; X64-LABEL: test_kxnor:
6919 ; X64-NEXT: movl %edi, %eax ## encoding: [0x89,0xf8]
6920 ; X64-NEXT: xorl %esi, %eax ## encoding: [0x31,0xf0]
6921 ; X64-NEXT: xorl $8, %eax ## encoding: [0x83,0xf0,0x08]
6922 ; X64-NEXT: ## kill: def $ax killed $ax killed $eax
6923 ; X64-NEXT: retq ## encoding: [0xc3]
6924 %t1 = call i16 @llvm.x86.avx512.kxnor.w(i16 %a0, i16 8)
6925 %t2 = call i16 @llvm.x86.avx512.kxnor.w(i16 %t1, i16 %a1)
6929 declare i16 @llvm.x86.avx512.kxor.w(i16, i16) nounwind readnone
6930 define i16 @test_kxor(i16 %a0, i16 %a1) {
6931 ; X86-LABEL: test_kxor:
6933 ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb7,0x44,0x24,0x04]
6934 ; X86-NEXT: xorw {{[0-9]+}}(%esp), %ax ## encoding: [0x66,0x33,0x44,0x24,0x08]
6935 ; X86-NEXT: xorl $8, %eax ## encoding: [0x83,0xf0,0x08]
6936 ; X86-NEXT: ## kill: def $ax killed $ax killed $eax
6937 ; X86-NEXT: retl ## encoding: [0xc3]
6939 ; X64-LABEL: test_kxor:
6941 ; X64-NEXT: movl %edi, %eax ## encoding: [0x89,0xf8]
6942 ; X64-NEXT: xorl %esi, %eax ## encoding: [0x31,0xf0]
6943 ; X64-NEXT: xorl $8, %eax ## encoding: [0x83,0xf0,0x08]
6944 ; X64-NEXT: ## kill: def $ax killed $ax killed $eax
6945 ; X64-NEXT: retq ## encoding: [0xc3]
6946 %t1 = call i16 @llvm.x86.avx512.kxor.w(i16 %a0, i16 8)
6947 %t2 = call i16 @llvm.x86.avx512.kxor.w(i16 %t1, i16 %a1)
6951 declare i32 @llvm.x86.avx512.kortestz.w(i16, i16) nounwind readnone
6952 define i32 @test_kortestz(<8 x i64> %A, <8 x i64> %B, <8 x i64> %C, <8 x i64> %D) {
6953 ; CHECK-LABEL: test_kortestz:
6954 ; CHECK: ## %bb.0: ## %entry
6955 ; CHECK-NEXT: vpcmpneqd %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x48,0x1f,0xc1,0x04]
6956 ; CHECK-NEXT: vpcmpneqd %zmm3, %zmm2, %k1 ## encoding: [0x62,0xf3,0x6d,0x48,0x1f,0xcb,0x04]
6957 ; CHECK-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0]
6958 ; CHECK-NEXT: kortestw %k1, %k0 ## encoding: [0xc5,0xf8,0x98,0xc1]
6959 ; CHECK-NEXT: sete %al ## encoding: [0x0f,0x94,0xc0]
6960 ; CHECK-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
6961 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
6963 %0 = bitcast <8 x i64> %A to <16 x i32>
6964 %1 = bitcast <8 x i64> %B to <16 x i32>
6965 %2 = icmp ne <16 x i32> %0, %1
6966 %3 = bitcast <8 x i64> %C to <16 x i32>
6967 %4 = bitcast <8 x i64> %D to <16 x i32>
6968 %5 = icmp ne <16 x i32> %3, %4
6969 %6 = bitcast <16 x i1> %2 to i16
6970 %7 = bitcast <16 x i1> %5 to i16
6971 %res = call i32 @llvm.x86.avx512.kortestz.w(i16 %6, i16 %7)
6975 declare i32 @llvm.x86.avx512.kortestc.w(i16, i16) nounwind readnone
6976 define i32 @test_kortestc(<8 x i64> %A, <8 x i64> %B, <8 x i64> %C, <8 x i64> %D) {
6977 ; CHECK-LABEL: test_kortestc:
6978 ; CHECK: ## %bb.0: ## %entry
6979 ; CHECK-NEXT: vpcmpneqd %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x48,0x1f,0xc1,0x04]
6980 ; CHECK-NEXT: vpcmpneqd %zmm3, %zmm2, %k1 ## encoding: [0x62,0xf3,0x6d,0x48,0x1f,0xcb,0x04]
6981 ; CHECK-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0]
6982 ; CHECK-NEXT: kortestw %k1, %k0 ## encoding: [0xc5,0xf8,0x98,0xc1]
6983 ; CHECK-NEXT: sete %al ## encoding: [0x0f,0x94,0xc0]
6984 ; CHECK-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
6985 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
6987 %0 = bitcast <8 x i64> %A to <16 x i32>
6988 %1 = bitcast <8 x i64> %B to <16 x i32>
6989 %2 = icmp ne <16 x i32> %0, %1
6990 %3 = bitcast <8 x i64> %C to <16 x i32>
6991 %4 = bitcast <8 x i64> %D to <16 x i32>
6992 %5 = icmp ne <16 x i32> %3, %4
6993 %6 = bitcast <16 x i1> %2 to i16
6994 %7 = bitcast <16 x i1> %5 to i16
6995 %res = call i32 @llvm.x86.avx512.kortestz.w(i16 %6, i16 %7)
6999 define i16 @test_cmpps(<16 x float> %a, <16 x float> %b) {
7000 ; CHECK-LABEL: test_cmpps:
7002 ; CHECK-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf1,0x7c,0x18,0xc2,0xc1,0x02]
7003 ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
7004 ; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax
7005 ; CHECK-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
7006 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
7007 %res = call i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %a, <16 x float> %b, i32 2, i16 -1, i32 8)
7010 declare i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> , <16 x float> , i32, i16, i32)
7012 define i8 @test_cmppd(<8 x double> %a, <8 x double> %b) {
7013 ; CHECK-LABEL: test_cmppd:
7015 ; CHECK-NEXT: vcmpneqpd %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf1,0xfd,0x48,0xc2,0xc1,0x04]
7016 ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
7017 ; CHECK-NEXT: ## kill: def $al killed $al killed $eax
7018 ; CHECK-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
7019 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
7020 %res = call i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %a, <8 x double> %b, i32 4, i8 -1, i32 4)
7023 declare i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> , <8 x double> , i32, i8, i32)
7025 define <8 x i64> @test_mul_epi32_rr(<16 x i32> %a, <16 x i32> %b) {
7026 ; CHECK-LABEL: test_mul_epi32_rr:
7028 ; CHECK-NEXT: vpmuldq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x28,0xc1]
7029 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
7030 %res = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> %a, <16 x i32> %b)
7034 define <8 x i64> @test_mul_epi32_rrk(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask) {
7035 ; X86-LABEL: test_mul_epi32_rrk:
7037 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
7038 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
7039 ; X86-NEXT: vpmuldq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x28,0xd1]
7040 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
7041 ; X86-NEXT: retl ## encoding: [0xc3]
7043 ; X64-LABEL: test_mul_epi32_rrk:
7045 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
7046 ; X64-NEXT: vpmuldq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x28,0xd1]
7047 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
7048 ; X64-NEXT: retq ## encoding: [0xc3]
7049 %mul = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> %a, <16 x i32> %b)
7050 %mask.cast = bitcast i8 %mask to <8 x i1>
7051 %res = select <8 x i1> %mask.cast, <8 x i64> %mul, <8 x i64> %passThru
7055 define <8 x i64> @test_mul_epi32_rrkz(<16 x i32> %a, <16 x i32> %b, i8 %mask) {
7056 ; X86-LABEL: test_mul_epi32_rrkz:
7058 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
7059 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
7060 ; X86-NEXT: vpmuldq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x28,0xc1]
7061 ; X86-NEXT: retl ## encoding: [0xc3]
7063 ; X64-LABEL: test_mul_epi32_rrkz:
7065 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
7066 ; X64-NEXT: vpmuldq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x28,0xc1]
7067 ; X64-NEXT: retq ## encoding: [0xc3]
7068 %mul = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> %a, <16 x i32> %b)
7069 %mask.cast = bitcast i8 %mask to <8 x i1>
7070 %res = select <8 x i1> %mask.cast, <8 x i64> %mul, <8 x i64> zeroinitializer
7074 define <8 x i64> @test_mul_epi32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) {
7075 ; X86-LABEL: test_mul_epi32_rm:
7077 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
7078 ; X86-NEXT: vpmuldq (%eax), %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x28,0x00]
7079 ; X86-NEXT: retl ## encoding: [0xc3]
7081 ; X64-LABEL: test_mul_epi32_rm:
7083 ; X64-NEXT: vpmuldq (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x28,0x07]
7084 ; X64-NEXT: retq ## encoding: [0xc3]
7085 %b = load <16 x i32>, <16 x i32>* %ptr_b
7086 %res = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> %a, <16 x i32> %b)
7090 define <8 x i64> @test_mul_epi32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <8 x i64> %passThru, i8 %mask) {
7091 ; X86-LABEL: test_mul_epi32_rmk:
7093 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
7094 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
7095 ; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
7096 ; X86-NEXT: vpmuldq (%eax), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x28,0x08]
7097 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
7098 ; X86-NEXT: retl ## encoding: [0xc3]
7100 ; X64-LABEL: test_mul_epi32_rmk:
7102 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
7103 ; X64-NEXT: vpmuldq (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x28,0x0f]
7104 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
7105 ; X64-NEXT: retq ## encoding: [0xc3]
7106 %b = load <16 x i32>, <16 x i32>* %ptr_b
7107 %mul = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> %a, <16 x i32> %b)
7108 %mask.cast = bitcast i8 %mask to <8 x i1>
7109 %res = select <8 x i1> %mask.cast, <8 x i64> %mul, <8 x i64> %passThru
7113 define <8 x i64> @test_mul_epi32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i8 %mask) {
7114 ; X86-LABEL: test_mul_epi32_rmkz:
7116 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
7117 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
7118 ; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
7119 ; X86-NEXT: vpmuldq (%eax), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x28,0x00]
7120 ; X86-NEXT: retl ## encoding: [0xc3]
7122 ; X64-LABEL: test_mul_epi32_rmkz:
7124 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
7125 ; X64-NEXT: vpmuldq (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x28,0x07]
7126 ; X64-NEXT: retq ## encoding: [0xc3]
7127 %b = load <16 x i32>, <16 x i32>* %ptr_b
7128 %mul = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> %a, <16 x i32> %b)
7129 %mask.cast = bitcast i8 %mask to <8 x i1>
7130 %res = select <8 x i1> %mask.cast, <8 x i64> %mul, <8 x i64> zeroinitializer
7134 define <8 x i64> @test_mul_epi32_rmb(<16 x i32> %a, i64* %ptr_b) {
7135 ; X86-LABEL: test_mul_epi32_rmb:
7137 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
7138 ; X86-NEXT: vpmuldq (%eax){1to8}, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x58,0x28,0x00]
7139 ; X86-NEXT: retl ## encoding: [0xc3]
7141 ; X64-LABEL: test_mul_epi32_rmb:
7143 ; X64-NEXT: vpmuldq (%rdi){1to8}, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x58,0x28,0x07]
7144 ; X64-NEXT: retq ## encoding: [0xc3]
7145 %q = load i64, i64* %ptr_b
7146 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
7147 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
7148 %b = bitcast <8 x i64> %b64 to <16 x i32>
7149 %res = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> %a, <16 x i32> %b)
7153 define <8 x i64> @test_mul_epi32_rmbk(<16 x i32> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) {
7154 ; X86-LABEL: test_mul_epi32_rmbk:
7156 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
7157 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
7158 ; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
7159 ; X86-NEXT: vpmuldq (%eax){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x59,0x28,0x08]
7160 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
7161 ; X86-NEXT: retl ## encoding: [0xc3]
7163 ; X64-LABEL: test_mul_epi32_rmbk:
7165 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
7166 ; X64-NEXT: vpmuldq (%rdi){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x59,0x28,0x0f]
7167 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
7168 ; X64-NEXT: retq ## encoding: [0xc3]
7169 %q = load i64, i64* %ptr_b
7170 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
7171 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
7172 %b = bitcast <8 x i64> %b64 to <16 x i32>
7173 %mul = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> %a, <16 x i32> %b)
7174 %mask.cast = bitcast i8 %mask to <8 x i1>
7175 %res = select <8 x i1> %mask.cast, <8 x i64> %mul, <8 x i64> %passThru
7179 define <8 x i64> @test_mul_epi32_rmbkz(<16 x i32> %a, i64* %ptr_b, i8 %mask) {
7180 ; X86-LABEL: test_mul_epi32_rmbkz:
7182 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
7183 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
7184 ; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
7185 ; X86-NEXT: vpmuldq (%eax){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xd9,0x28,0x00]
7186 ; X86-NEXT: retl ## encoding: [0xc3]
7188 ; X64-LABEL: test_mul_epi32_rmbkz:
7190 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
7191 ; X64-NEXT: vpmuldq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xd9,0x28,0x07]
7192 ; X64-NEXT: retq ## encoding: [0xc3]
7193 %q = load i64, i64* %ptr_b
7194 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
7195 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
7196 %b = bitcast <8 x i64> %b64 to <16 x i32>
7197 %mul = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> %a, <16 x i32> %b)
7198 %mask.cast = bitcast i8 %mask to <8 x i1>
7199 %res = select <8 x i1> %mask.cast, <8 x i64> %mul, <8 x i64> zeroinitializer
7203 declare <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32>, <16 x i32>)
7205 define <8 x i64> @test_mul_epu32_rr(<16 x i32> %a, <16 x i32> %b) {
7206 ; CHECK-LABEL: test_mul_epu32_rr:
7208 ; CHECK-NEXT: vpmuludq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xf4,0xc1]
7209 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
7210 %res = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b)
7214 define <8 x i64> @test_mul_epu32_rrk(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask) {
7215 ; X86-LABEL: test_mul_epu32_rrk:
7217 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
7218 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
7219 ; X86-NEXT: vpmuludq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xf4,0xd1]
7220 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
7221 ; X86-NEXT: retl ## encoding: [0xc3]
7223 ; X64-LABEL: test_mul_epu32_rrk:
7225 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
7226 ; X64-NEXT: vpmuludq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xf4,0xd1]
7227 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
7228 ; X64-NEXT: retq ## encoding: [0xc3]
7229 %mul = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b)
7230 %mask.cast = bitcast i8 %mask to <8 x i1>
7231 %res = select <8 x i1> %mask.cast, <8 x i64> %mul, <8 x i64> %passThru
7235 define <8 x i64> @test_mul_epu32_rrkz(<16 x i32> %a, <16 x i32> %b, i8 %mask) {
7236 ; X86-LABEL: test_mul_epu32_rrkz:
7238 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
7239 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
7240 ; X86-NEXT: vpmuludq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xf4,0xc1]
7241 ; X86-NEXT: retl ## encoding: [0xc3]
7243 ; X64-LABEL: test_mul_epu32_rrkz:
7245 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
7246 ; X64-NEXT: vpmuludq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xf4,0xc1]
7247 ; X64-NEXT: retq ## encoding: [0xc3]
7248 %mul = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b)
7249 %mask.cast = bitcast i8 %mask to <8 x i1>
7250 %res = select <8 x i1> %mask.cast, <8 x i64> %mul, <8 x i64> zeroinitializer
7254 define <8 x i64> @test_mul_epu32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) {
7255 ; X86-LABEL: test_mul_epu32_rm:
7257 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
7258 ; X86-NEXT: vpmuludq (%eax), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xf4,0x00]
7259 ; X86-NEXT: retl ## encoding: [0xc3]
7261 ; X64-LABEL: test_mul_epu32_rm:
7263 ; X64-NEXT: vpmuludq (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xf4,0x07]
7264 ; X64-NEXT: retq ## encoding: [0xc3]
7265 %b = load <16 x i32>, <16 x i32>* %ptr_b
7266 %res = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b)
7270 define <8 x i64> @test_mul_epu32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <8 x i64> %passThru, i8 %mask) {
7271 ; X86-LABEL: test_mul_epu32_rmk:
7273 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
7274 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
7275 ; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
7276 ; X86-NEXT: vpmuludq (%eax), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xf4,0x08]
7277 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
7278 ; X86-NEXT: retl ## encoding: [0xc3]
7280 ; X64-LABEL: test_mul_epu32_rmk:
7282 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
7283 ; X64-NEXT: vpmuludq (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xf4,0x0f]
7284 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
7285 ; X64-NEXT: retq ## encoding: [0xc3]
7286 %b = load <16 x i32>, <16 x i32>* %ptr_b
7287 %mul = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b)
7288 %mask.cast = bitcast i8 %mask to <8 x i1>
7289 %res = select <8 x i1> %mask.cast, <8 x i64> %mul, <8 x i64> %passThru
7293 define <8 x i64> @test_mul_epu32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i8 %mask) {
7294 ; X86-LABEL: test_mul_epu32_rmkz:
7296 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
7297 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
7298 ; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
7299 ; X86-NEXT: vpmuludq (%eax), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xf4,0x00]
7300 ; X86-NEXT: retl ## encoding: [0xc3]
7302 ; X64-LABEL: test_mul_epu32_rmkz:
7304 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
7305 ; X64-NEXT: vpmuludq (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xf4,0x07]
7306 ; X64-NEXT: retq ## encoding: [0xc3]
7307 %b = load <16 x i32>, <16 x i32>* %ptr_b
7308 %mul = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b)
7309 %mask.cast = bitcast i8 %mask to <8 x i1>
7310 %res = select <8 x i1> %mask.cast, <8 x i64> %mul, <8 x i64> zeroinitializer
7314 define <8 x i64> @test_mul_epu32_rmb(<16 x i32> %a, i64* %ptr_b) {
7315 ; X86-LABEL: test_mul_epu32_rmb:
7317 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
7318 ; X86-NEXT: vpmuludq (%eax){1to8}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x58,0xf4,0x00]
7319 ; X86-NEXT: retl ## encoding: [0xc3]
7321 ; X64-LABEL: test_mul_epu32_rmb:
7323 ; X64-NEXT: vpmuludq (%rdi){1to8}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x58,0xf4,0x07]
7324 ; X64-NEXT: retq ## encoding: [0xc3]
7325 %q = load i64, i64* %ptr_b
7326 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
7327 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
7328 %b = bitcast <8 x i64> %b64 to <16 x i32>
7329 %res = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b)
7333 define <8 x i64> @test_mul_epu32_rmbk(<16 x i32> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) {
7334 ; X86-LABEL: test_mul_epu32_rmbk:
7336 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
7337 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
7338 ; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
7339 ; X86-NEXT: vpmuludq (%eax){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x59,0xf4,0x08]
7340 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
7341 ; X86-NEXT: retl ## encoding: [0xc3]
7343 ; X64-LABEL: test_mul_epu32_rmbk:
7345 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
7346 ; X64-NEXT: vpmuludq (%rdi){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x59,0xf4,0x0f]
7347 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
7348 ; X64-NEXT: retq ## encoding: [0xc3]
7349 %q = load i64, i64* %ptr_b
7350 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
7351 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
7352 %b = bitcast <8 x i64> %b64 to <16 x i32>
7353 %mul = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b)
7354 %mask.cast = bitcast i8 %mask to <8 x i1>
7355 %res = select <8 x i1> %mask.cast, <8 x i64> %mul, <8 x i64> %passThru
7359 define <8 x i64> @test_mul_epu32_rmbkz(<16 x i32> %a, i64* %ptr_b, i8 %mask) {
7360 ; X86-LABEL: test_mul_epu32_rmbkz:
7362 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
7363 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
7364 ; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
7365 ; X86-NEXT: vpmuludq (%eax){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xd9,0xf4,0x00]
7366 ; X86-NEXT: retl ## encoding: [0xc3]
7368 ; X64-LABEL: test_mul_epu32_rmbkz:
7370 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
7371 ; X64-NEXT: vpmuludq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xd9,0xf4,0x07]
7372 ; X64-NEXT: retq ## encoding: [0xc3]
7373 %q = load i64, i64* %ptr_b
7374 %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
7375 %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
7376 %b = bitcast <8 x i64> %b64 to <16 x i32>
7377 %mul = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b)
7378 %mask.cast = bitcast i8 %mask to <8 x i1>
7379 %res = select <8 x i1> %mask.cast, <8 x i64> %mul, <8 x i64> zeroinitializer
7383 declare <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32>, <16 x i32>)
7385 define <2 x double> @test_x86_avx512_mm_cvtu32_sd(<2 x double> %a, i32 %b)
7386 ; X86-LABEL: test_x86_avx512_mm_cvtu32_sd:
7388 ; X86-NEXT: vcvtusi2sdl {{[0-9]+}}(%esp), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7f,0x08,0x7b,0x44,0x24,0x01]
7389 ; X86-NEXT: retl ## encoding: [0xc3]
7391 ; X64-LABEL: test_x86_avx512_mm_cvtu32_sd:
7393 ; X64-NEXT: vcvtusi2sd %edi, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7f,0x08,0x7b,0xc7]
7394 ; X64-NEXT: retq ## encoding: [0xc3]
7396 %res = call <2 x double> @llvm.x86.avx512.cvtusi2sd(<2 x double> %a, i32 %b) ; <<<2 x double>> [#uses=1]
7397 ret <2 x double> %res
7399 declare <2 x double> @llvm.x86.avx512.cvtusi2sd(<2 x double>, i32) nounwind readnone
7401 define <16 x float> @test_x86_vbroadcast_ss_512(i8* %a0) {
7402 ; X86-LABEL: test_x86_vbroadcast_ss_512:
7404 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
7405 ; X86-NEXT: vbroadcastss (%eax), %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x18,0x00]
7406 ; X86-NEXT: retl ## encoding: [0xc3]
7408 ; X64-LABEL: test_x86_vbroadcast_ss_512:
7410 ; X64-NEXT: vbroadcastss (%rdi), %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x18,0x07]
7411 ; X64-NEXT: retq ## encoding: [0xc3]
7412 %res = call <16 x float> @llvm.x86.avx512.vbroadcast.ss.512(i8* %a0) ; <<16 x float>> [#uses=1]
7413 ret <16 x float> %res
7415 declare <16 x float> @llvm.x86.avx512.vbroadcast.ss.512(i8*) nounwind readonly
7417 define <8 x double> @test_x86_vbroadcast_sd_512(i8* %a0) {
7418 ; X86-LABEL: test_x86_vbroadcast_sd_512:
7420 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
7421 ; X86-NEXT: vbroadcastsd (%eax), %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x19,0x00]
7422 ; X86-NEXT: retl ## encoding: [0xc3]
7424 ; X64-LABEL: test_x86_vbroadcast_sd_512:
7426 ; X64-NEXT: vbroadcastsd (%rdi), %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x19,0x07]
7427 ; X64-NEXT: retq ## encoding: [0xc3]
7428 %res = call <8 x double> @llvm.x86.avx512.vbroadcast.sd.512(i8* %a0) ; <<8 x double>> [#uses=1]
7429 ret <8 x double> %res
7431 declare <8 x double> @llvm.x86.avx512.vbroadcast.sd.512(i8*) nounwind readonly
7433 declare <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double>, <8 x i64>, <8 x double>, i8)
7435 define <8 x double>@test_int_x86_avx512_permvar_df_512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2) {
7436 ; CHECK-LABEL: test_int_x86_avx512_permvar_df_512:
7438 ; CHECK-NEXT: vpermpd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x48,0x16,0xc0]
7439 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
7440 %res = call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 -1)
7441 ret <8 x double> %res
7444 define <8 x double>@test_int_x86_avx512_mask_permvar_df_512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3) {
7445 ; X86-LABEL: test_int_x86_avx512_mask_permvar_df_512:
7447 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
7448 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
7449 ; X86-NEXT: vpermpd %zmm0, %zmm1, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x16,0xd0]
7450 ; X86-NEXT: vmovapd %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2]
7451 ; X86-NEXT: retl ## encoding: [0xc3]
7453 ; X64-LABEL: test_int_x86_avx512_mask_permvar_df_512:
7455 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
7456 ; X64-NEXT: vpermpd %zmm0, %zmm1, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x16,0xd0]
7457 ; X64-NEXT: vmovapd %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2]
7458 ; X64-NEXT: retq ## encoding: [0xc3]
7459 %res = call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3)
7460 ret <8 x double> %res
7463 define <8 x double>@test_int_x86_avx512_maskz_permvar_df_512(<8 x double> %x0, <8 x i64> %x1, i8 %x3) {
7464 ; X86-LABEL: test_int_x86_avx512_maskz_permvar_df_512:
7466 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
7467 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
7468 ; X86-NEXT: vpermpd %zmm0, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0xc9,0x16,0xc0]
7469 ; X86-NEXT: retl ## encoding: [0xc3]
7471 ; X64-LABEL: test_int_x86_avx512_maskz_permvar_df_512:
7473 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
7474 ; X64-NEXT: vpermpd %zmm0, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0xc9,0x16,0xc0]
7475 ; X64-NEXT: retq ## encoding: [0xc3]
7476 %res = call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> zeroinitializer, i8 %x3)
7477 ret <8 x double> %res
7480 declare <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
7482 define <8 x i64>@test_int_x86_avx512_permvar_di_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2) {
7483 ; CHECK-LABEL: test_int_x86_avx512_permvar_di_512:
7485 ; CHECK-NEXT: vpermpd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x48,0x16,0xc0]
7486 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
7487 %res = call <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
7491 define <8 x i64>@test_int_x86_avx512_mask_permvar_di_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
7492 ; X86-LABEL: test_int_x86_avx512_mask_permvar_di_512:
7494 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
7495 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
7496 ; X86-NEXT: vpermq %zmm0, %zmm1, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x36,0xd0]
7497 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
7498 ; X86-NEXT: retl ## encoding: [0xc3]
7500 ; X64-LABEL: test_int_x86_avx512_mask_permvar_di_512:
7502 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
7503 ; X64-NEXT: vpermq %zmm0, %zmm1, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x36,0xd0]
7504 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
7505 ; X64-NEXT: retq ## encoding: [0xc3]
7506 %res = call <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
7510 define <8 x i64>@test_int_x86_avx512_maskz_permvar_di_512(<8 x i64> %x0, <8 x i64> %x1, i8 %x3) {
7511 ; X86-LABEL: test_int_x86_avx512_maskz_permvar_di_512:
7513 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
7514 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
7515 ; X86-NEXT: vpermq %zmm0, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0xc9,0x36,0xc0]
7516 ; X86-NEXT: retl ## encoding: [0xc3]
7518 ; X64-LABEL: test_int_x86_avx512_maskz_permvar_di_512:
7520 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
7521 ; X64-NEXT: vpermq %zmm0, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0xc9,0x36,0xc0]
7522 ; X64-NEXT: retq ## encoding: [0xc3]
7523 %res = call <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> zeroinitializer, i8 %x3)
7527 declare <16 x float> @llvm.x86.avx512.mask.permvar.sf.512(<16 x float>, <16 x i32>, <16 x float>, i16)
7529 define <16 x float>@test_int_x86_avx512_permvar_sf_512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2) {
7530 ; CHECK-LABEL: test_int_x86_avx512_permvar_sf_512:
7532 ; CHECK-NEXT: vpermps %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x48,0x16,0xc0]
7533 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
7534 %res = call <16 x float> @llvm.x86.avx512.mask.permvar.sf.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 -1)
7535 ret <16 x float> %res
7538 define <16 x float>@test_int_x86_avx512_mask_permvar_sf_512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3) {
7539 ; X86-LABEL: test_int_x86_avx512_mask_permvar_sf_512:
7541 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
7542 ; X86-NEXT: vpermps %zmm0, %zmm1, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x16,0xd0]
7543 ; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
7544 ; X86-NEXT: retl ## encoding: [0xc3]
7546 ; X64-LABEL: test_int_x86_avx512_mask_permvar_sf_512:
7548 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
7549 ; X64-NEXT: vpermps %zmm0, %zmm1, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x16,0xd0]
7550 ; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
7551 ; X64-NEXT: retq ## encoding: [0xc3]
7552 %res = call <16 x float> @llvm.x86.avx512.mask.permvar.sf.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3)
7553 ret <16 x float> %res
7556 define <16 x float>@test_int_x86_avx512_maskz_permvar_sf_512(<16 x float> %x0, <16 x i32> %x1, i16 %x3) {
7557 ; X86-LABEL: test_int_x86_avx512_maskz_permvar_sf_512:
7559 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
7560 ; X86-NEXT: vpermps %zmm0, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0xc9,0x16,0xc0]
7561 ; X86-NEXT: retl ## encoding: [0xc3]
7563 ; X64-LABEL: test_int_x86_avx512_maskz_permvar_sf_512:
7565 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
7566 ; X64-NEXT: vpermps %zmm0, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0xc9,0x16,0xc0]
7567 ; X64-NEXT: retq ## encoding: [0xc3]
7568 %res = call <16 x float> @llvm.x86.avx512.mask.permvar.sf.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> zeroinitializer, i16 %x3)
7569 ret <16 x float> %res
7572 declare <16 x i32> @llvm.x86.avx512.mask.permvar.si.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
7574 define <16 x i32>@test_int_x86_avx512_permvar_si_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) {
7575 ; CHECK-LABEL: test_int_x86_avx512_permvar_si_512:
7577 ; CHECK-NEXT: vpermps %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x48,0x16,0xc0]
7578 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
7579 %res = call <16 x i32> @llvm.x86.avx512.mask.permvar.si.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
7583 define <16 x i32>@test_int_x86_avx512_mask_permvar_si_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
7584 ; X86-LABEL: test_int_x86_avx512_mask_permvar_si_512:
7586 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
7587 ; X86-NEXT: vpermd %zmm0, %zmm1, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x36,0xd0]
7588 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
7589 ; X86-NEXT: retl ## encoding: [0xc3]
7591 ; X64-LABEL: test_int_x86_avx512_mask_permvar_si_512:
7593 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
7594 ; X64-NEXT: vpermd %zmm0, %zmm1, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x36,0xd0]
7595 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
7596 ; X64-NEXT: retq ## encoding: [0xc3]
7597 %res = call <16 x i32> @llvm.x86.avx512.mask.permvar.si.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
7601 define <16 x i32>@test_int_x86_avx512_maskz_permvar_si_512(<16 x i32> %x0, <16 x i32> %x1, i16 %x3) {
7602 ; X86-LABEL: test_int_x86_avx512_maskz_permvar_si_512:
7604 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
7605 ; X86-NEXT: vpermd %zmm0, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0xc9,0x36,0xc0]
7606 ; X86-NEXT: retl ## encoding: [0xc3]
7608 ; X64-LABEL: test_int_x86_avx512_maskz_permvar_si_512:
7610 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
7611 ; X64-NEXT: vpermd %zmm0, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0xc9,0x36,0xc0]
7612 ; X64-NEXT: retq ## encoding: [0xc3]
7613 %res = call <16 x i32> @llvm.x86.avx512.mask.permvar.si.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> zeroinitializer, i16 %x3)
7617 declare <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i32, i16)
7619 define <16 x i32>@test_int_x86_avx512_pternlog_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) {
7620 ; CHECK-LABEL: test_int_x86_avx512_pternlog_d_512:
7622 ; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf3,0x75,0x48,0x25,0xc2,0x21]
7623 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
7624 %res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 -1)
7628 define <16 x i32>@test_int_x86_avx512_mask_pternlog_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x4) {
7629 ; X86-LABEL: test_int_x86_avx512_mask_pternlog_d_512:
7631 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
7632 ; X86-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf3,0x75,0x49,0x25,0xc2,0x21]
7633 ; X86-NEXT: retl ## encoding: [0xc3]
7635 ; X64-LABEL: test_int_x86_avx512_mask_pternlog_d_512:
7637 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
7638 ; X64-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf3,0x75,0x49,0x25,0xc2,0x21]
7639 ; X64-NEXT: retq ## encoding: [0xc3]
7640 %res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 %x4)
7644 declare <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i32, i16)
7646 define <16 x i32>@test_int_x86_avx512_maskz_pternlog_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x4) {
7647 ; X86-LABEL: test_int_x86_avx512_maskz_pternlog_d_512:
7649 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
7650 ; X86-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x75,0xc9,0x25,0xc2,0x21]
7651 ; X86-NEXT: retl ## encoding: [0xc3]
7653 ; X64-LABEL: test_int_x86_avx512_maskz_pternlog_d_512:
7655 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
7656 ; X64-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x75,0xc9,0x25,0xc2,0x21]
7657 ; X64-NEXT: retq ## encoding: [0xc3]
7658 %res = call <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 %x4)
7662 declare <8 x i64> @llvm.x86.avx512.mask.pternlog.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i32, i8)
7664 define <8 x i64>@test_int_x86_avx512_pternlog_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2) {
7665 ; CHECK-LABEL: test_int_x86_avx512_pternlog_q_512:
7667 ; CHECK-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf3,0xf5,0x48,0x25,0xc2,0x21]
7668 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
7669 %res = call <8 x i64> @llvm.x86.avx512.mask.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 -1)
7673 define <8 x i64>@test_int_x86_avx512_mask_pternlog_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x4) {
7674 ; X86-LABEL: test_int_x86_avx512_mask_pternlog_q_512:
7676 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
7677 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
7678 ; X86-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf3,0xf5,0x49,0x25,0xc2,0x21]
7679 ; X86-NEXT: retl ## encoding: [0xc3]
7681 ; X64-LABEL: test_int_x86_avx512_mask_pternlog_q_512:
7683 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
7684 ; X64-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf3,0xf5,0x49,0x25,0xc2,0x21]
7685 ; X64-NEXT: retq ## encoding: [0xc3]
7686 %res = call <8 x i64> @llvm.x86.avx512.mask.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 %x4)
7690 declare <8 x i64> @llvm.x86.avx512.maskz.pternlog.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i32, i8)
7692 define <8 x i64>@test_int_x86_avx512_maskz_pternlog_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x4) {
7693 ; X86-LABEL: test_int_x86_avx512_maskz_pternlog_q_512:
7695 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
7696 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
7697 ; X86-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xf5,0xc9,0x25,0xc2,0x21]
7698 ; X86-NEXT: retl ## encoding: [0xc3]
7700 ; X64-LABEL: test_int_x86_avx512_maskz_pternlog_q_512:
7702 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
7703 ; X64-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xf5,0xc9,0x25,0xc2,0x21]
7704 ; X64-NEXT: retq ## encoding: [0xc3]
7705 %res = call <8 x i64> @llvm.x86.avx512.maskz.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 %x4)
7709 declare <16 x i32> @llvm.x86.avx512.mask.vpermi2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
7711 define <16 x i32>@test_int_x86_avx512_vpermi2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2p, <16 x i32> %x4) {
7712 ; CHECK-LABEL: test_int_x86_avx512_vpermi2var_d_512:
7714 ; CHECK-NEXT: vpermt2d %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x48,0x7e,0xc2]
7715 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
7716 %x2 = load <16 x i32>, <16 x i32>* %x2p
7717 %res = call <16 x i32> @llvm.x86.avx512.mask.vpermi2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x4, i16 -1)
7721 define <16 x i32>@test_int_x86_avx512_mask_vpermi2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2p, <16 x i32> %x4, i16 %x3) {
7722 ; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_d_512:
7724 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
7725 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
7726 ; X86-NEXT: vpermi2d (%eax), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x76,0x08]
7727 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
7728 ; X86-NEXT: retl ## encoding: [0xc3]
7730 ; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_d_512:
7732 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
7733 ; X64-NEXT: vpermi2d (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x76,0x0f]
7734 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
7735 ; X64-NEXT: retq ## encoding: [0xc3]
7736 %x2 = load <16 x i32>, <16 x i32>* %x2p
7737 %res = call <16 x i32> @llvm.x86.avx512.mask.vpermi2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
7741 declare <8 x double> @llvm.x86.avx512.mask.vpermi2var.pd.512(<8 x double>, <8 x i64>, <8 x double>, i8)
7743 define <8 x double>@test_int_x86_avx512_vpermi2var_pd_512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2) {
7744 ; CHECK-LABEL: test_int_x86_avx512_vpermi2var_pd_512:
7746 ; CHECK-NEXT: vpermt2pd %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x48,0x7f,0xc2]
7747 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
7748 %res = call <8 x double> @llvm.x86.avx512.mask.vpermi2var.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 -1)
7749 ret <8 x double> %res
7752 define <8 x double>@test_int_x86_avx512_mask_vpermi2var_pd_512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3) {
7753 ; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_pd_512:
7755 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
7756 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
7757 ; X86-NEXT: vpermi2pd %zmm2, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x77,0xca]
7758 ; X86-NEXT: vmovapd %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc1]
7759 ; X86-NEXT: retl ## encoding: [0xc3]
7761 ; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_pd_512:
7763 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
7764 ; X64-NEXT: vpermi2pd %zmm2, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x77,0xca]
7765 ; X64-NEXT: vmovapd %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc1]
7766 ; X64-NEXT: retq ## encoding: [0xc3]
7767 %res = call <8 x double> @llvm.x86.avx512.mask.vpermi2var.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3)
7768 ret <8 x double> %res
7771 declare <16 x float> @llvm.x86.avx512.mask.vpermi2var.ps.512(<16 x float>, <16 x i32>, <16 x float>, i16)
7773 define <16 x float>@test_int_x86_avx512_vpermi2var_ps_512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2) {
7774 ; CHECK-LABEL: test_int_x86_avx512_vpermi2var_ps_512:
7776 ; CHECK-NEXT: vpermt2ps %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x48,0x7f,0xc2]
7777 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
7778 %res = call <16 x float> @llvm.x86.avx512.mask.vpermi2var.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 -1)
7779 ret <16 x float> %res
7782 define <16 x float>@test_int_x86_avx512_mask_vpermi2var_ps_512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3) {
7783 ; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_512:
7785 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
7786 ; X86-NEXT: vpermi2ps %zmm2, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x77,0xca]
7787 ; X86-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
7788 ; X86-NEXT: retl ## encoding: [0xc3]
7790 ; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_512:
7792 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
7793 ; X64-NEXT: vpermi2ps %zmm2, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x77,0xca]
7794 ; X64-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
7795 ; X64-NEXT: retq ## encoding: [0xc3]
7796 %res = call <16 x float> @llvm.x86.avx512.mask.vpermi2var.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3)
7797 ret <16 x float> %res
7800 declare <8 x i64> @llvm.x86.avx512.mask.vpermi2var.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
7802 define <8 x i64>@test_int_x86_avx512_vpermi2var_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2) {
7803 ; CHECK-LABEL: test_int_x86_avx512_vpermi2var_q_512:
7805 ; CHECK-NEXT: vpermt2q %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x48,0x7e,0xc2]
7806 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
7807 %res = call <8 x i64> @llvm.x86.avx512.mask.vpermi2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
7811 define <8 x i64>@test_int_x86_avx512_mask_vpermi2var_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
7812 ; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_q_512:
7814 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
7815 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
7816 ; X86-NEXT: vpermi2q %zmm2, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x76,0xca]
7817 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
7818 ; X86-NEXT: retl ## encoding: [0xc3]
7820 ; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_q_512:
7822 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
7823 ; X64-NEXT: vpermi2q %zmm2, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x76,0xca]
7824 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
7825 ; X64-NEXT: retq ## encoding: [0xc3]
7826 %res = call <8 x i64> @llvm.x86.avx512.mask.vpermi2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
7830 declare <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
7832 define <16 x i32>@test_int_x86_avx512_maskz_vpermt2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2p, i16 %x3) {
7833 ; X86-LABEL: test_int_x86_avx512_maskz_vpermt2var_d_512:
7835 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
7836 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
7837 ; X86-NEXT: vpermi2d (%eax), %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0xc9,0x76,0x00]
7838 ; X86-NEXT: retl ## encoding: [0xc3]
7840 ; X64-LABEL: test_int_x86_avx512_maskz_vpermt2var_d_512:
7842 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
7843 ; X64-NEXT: vpermi2d (%rdi), %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0xc9,0x76,0x07]
7844 ; X64-NEXT: retq ## encoding: [0xc3]
7845 %x2 = load <16 x i32>, <16 x i32>* %x2p
7846 %res = call <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
7850 declare <8 x double> @llvm.x86.avx512.maskz.vpermt2var.pd.512(<8 x i64>, <8 x double>, <8 x double>, i8)
7852 define <8 x double>@test_int_x86_avx512_maskz_vpermt2var_pd_512(<8 x i64> %x0, <8 x double> %x1, double* %x2ptr, i8 %x3) {
7853 ; X86-LABEL: test_int_x86_avx512_maskz_vpermt2var_pd_512:
7855 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
7856 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
7857 ; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
7858 ; X86-NEXT: vpermi2pd (%eax){1to8}, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0xd9,0x77,0x00]
7859 ; X86-NEXT: retl ## encoding: [0xc3]
7861 ; X64-LABEL: test_int_x86_avx512_maskz_vpermt2var_pd_512:
7863 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
7864 ; X64-NEXT: vpermi2pd (%rdi){1to8}, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0xd9,0x77,0x07]
7865 ; X64-NEXT: retq ## encoding: [0xc3]
7866 %x2s = load double, double* %x2ptr
7867 %x2ins = insertelement <8 x double> undef, double %x2s, i32 0
7868 %x2 = shufflevector <8 x double> %x2ins, <8 x double> undef, <8 x i32> zeroinitializer
7869 %res = call <8 x double> @llvm.x86.avx512.maskz.vpermt2var.pd.512(<8 x i64> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3)
7870 ret <8 x double> %res
7873 declare <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32>, <16 x float>, <16 x float>, i16)
7875 define <16 x float>@test_int_x86_avx512_maskz_vpermt2var_ps_512(<16 x i32> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) {
7876 ; X86-LABEL: test_int_x86_avx512_maskz_vpermt2var_ps_512:
7878 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
7879 ; X86-NEXT: vpermi2ps %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0xc9,0x77,0xc2]
7880 ; X86-NEXT: retl ## encoding: [0xc3]
7882 ; X64-LABEL: test_int_x86_avx512_maskz_vpermt2var_ps_512:
7884 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
7885 ; X64-NEXT: vpermi2ps %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0xc9,0x77,0xc2]
7886 ; X64-NEXT: retq ## encoding: [0xc3]
7887 %res = call <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3)
7888 ret <16 x float> %res
7892 declare <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
7894 define <8 x i64>@test_int_x86_avx512_maskz_vpermt2var_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
7895 ; X86-LABEL: test_int_x86_avx512_maskz_vpermt2var_q_512:
7897 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
7898 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
7899 ; X86-NEXT: vpermi2q %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0xc9,0x76,0xc2]
7900 ; X86-NEXT: retl ## encoding: [0xc3]
7902 ; X64-LABEL: test_int_x86_avx512_maskz_vpermt2var_q_512:
7904 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
7905 ; X64-NEXT: vpermi2q %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0xc9,0x76,0xc2]
7906 ; X64-NEXT: retq ## encoding: [0xc3]
7907 %res = call <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
7911 declare <16 x i32> @llvm.x86.avx512.mask.vpermt2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
7913 define <16 x i32>@test_int_x86_avx512_vpermt2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) {
7914 ; CHECK-LABEL: test_int_x86_avx512_vpermt2var_d_512:
7916 ; CHECK-NEXT: vpermi2d %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x48,0x76,0xc2]
7917 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
7918 %res = call <16 x i32> @llvm.x86.avx512.mask.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
7922 define <16 x i32>@test_int_x86_avx512_mask_vpermt2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
7923 ; X86-LABEL: test_int_x86_avx512_mask_vpermt2var_d_512:
7925 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
7926 ; X86-NEXT: vpermt2d %zmm2, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x7e,0xca]
7927 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
7928 ; X86-NEXT: retl ## encoding: [0xc3]
7930 ; X64-LABEL: test_int_x86_avx512_mask_vpermt2var_d_512:
7932 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
7933 ; X64-NEXT: vpermt2d %zmm2, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x7e,0xca]
7934 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
7935 ; X64-NEXT: retq ## encoding: [0xc3]
7936 %res = call <16 x i32> @llvm.x86.avx512.mask.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
7940 declare <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
7941 declare <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
7942 declare <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
7944 define <16 x float> @test_vsubps_rn(<16 x float> %a0, <16 x float> %a1) {
7945 ; CHECK-LABEL: test_vsubps_rn:
7947 ; CHECK-NEXT: vsubps {rn-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x18,0x5c,0xc1]
7948 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
7949 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1,
7950 <16 x float> zeroinitializer, i16 -1, i32 8)
7951 ret <16 x float> %res
7954 define <16 x float> @test_vsubps_rd(<16 x float> %a0, <16 x float> %a1) {
7955 ; CHECK-LABEL: test_vsubps_rd:
7957 ; CHECK-NEXT: vsubps {rd-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x38,0x5c,0xc1]
7958 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
7959 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1,
7960 <16 x float> zeroinitializer, i16 -1, i32 9)
7961 ret <16 x float> %res
7964 define <16 x float> @test_vsubps_ru(<16 x float> %a0, <16 x float> %a1) {
7965 ; CHECK-LABEL: test_vsubps_ru:
7967 ; CHECK-NEXT: vsubps {ru-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x58,0x5c,0xc1]
7968 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
7969 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1,
7970 <16 x float> zeroinitializer, i16 -1, i32 10)
7971 ret <16 x float> %res
7974 define <16 x float> @test_vsubps_rz(<16 x float> %a0, <16 x float> %a1) {
7975 ; CHECK-LABEL: test_vsubps_rz:
7977 ; CHECK-NEXT: vsubps {rz-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x78,0x5c,0xc1]
7978 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
7979 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1,
7980 <16 x float> zeroinitializer, i16 -1, i32 11)
7981 ret <16 x float> %res
7984 define <16 x float> @test_vmulps_rn(<16 x float> %a0, <16 x float> %a1) {
7985 ; CHECK-LABEL: test_vmulps_rn:
7987 ; CHECK-NEXT: vmulps {rn-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x18,0x59,0xc1]
7988 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
7989 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
7990 <16 x float> zeroinitializer, i16 -1, i32 8)
7991 ret <16 x float> %res
7994 define <16 x float> @test_vmulps_rd(<16 x float> %a0, <16 x float> %a1) {
7995 ; CHECK-LABEL: test_vmulps_rd:
7997 ; CHECK-NEXT: vmulps {rd-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x38,0x59,0xc1]
7998 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
7999 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
8000 <16 x float> zeroinitializer, i16 -1, i32 9)
8001 ret <16 x float> %res
8004 define <16 x float> @test_vmulps_ru(<16 x float> %a0, <16 x float> %a1) {
8005 ; CHECK-LABEL: test_vmulps_ru:
8007 ; CHECK-NEXT: vmulps {ru-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x58,0x59,0xc1]
8008 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
8009 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
8010 <16 x float> zeroinitializer, i16 -1, i32 10)
8011 ret <16 x float> %res
8014 define <16 x float> @test_vmulps_rz(<16 x float> %a0, <16 x float> %a1) {
8015 ; CHECK-LABEL: test_vmulps_rz:
8017 ; CHECK-NEXT: vmulps {rz-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x78,0x59,0xc1]
8018 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
8019 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
8020 <16 x float> zeroinitializer, i16 -1, i32 11)
8021 ret <16 x float> %res
8025 define <16 x float> @test_vmulps_mask_rn(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
8026 ; X86-LABEL: test_vmulps_mask_rn:
8028 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8029 ; X86-NEXT: vmulps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x99,0x59,0xc1]
8030 ; X86-NEXT: retl ## encoding: [0xc3]
8032 ; X64-LABEL: test_vmulps_mask_rn:
8034 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
8035 ; X64-NEXT: vmulps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x99,0x59,0xc1]
8036 ; X64-NEXT: retq ## encoding: [0xc3]
8037 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
8038 <16 x float> zeroinitializer, i16 %mask, i32 8)
8039 ret <16 x float> %res
8042 define <16 x float> @test_vmulps_mask_rd(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
8043 ; X86-LABEL: test_vmulps_mask_rd:
8045 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8046 ; X86-NEXT: vmulps {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xb9,0x59,0xc1]
8047 ; X86-NEXT: retl ## encoding: [0xc3]
8049 ; X64-LABEL: test_vmulps_mask_rd:
8051 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
8052 ; X64-NEXT: vmulps {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xb9,0x59,0xc1]
8053 ; X64-NEXT: retq ## encoding: [0xc3]
8054 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
8055 <16 x float> zeroinitializer, i16 %mask, i32 9)
8056 ret <16 x float> %res
8059 define <16 x float> @test_vmulps_mask_ru(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
8060 ; X86-LABEL: test_vmulps_mask_ru:
8062 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8063 ; X86-NEXT: vmulps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xd9,0x59,0xc1]
8064 ; X86-NEXT: retl ## encoding: [0xc3]
8066 ; X64-LABEL: test_vmulps_mask_ru:
8068 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
8069 ; X64-NEXT: vmulps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xd9,0x59,0xc1]
8070 ; X64-NEXT: retq ## encoding: [0xc3]
8071 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
8072 <16 x float> zeroinitializer, i16 %mask, i32 10)
8073 ret <16 x float> %res
8076 define <16 x float> @test_vmulps_mask_rz(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
8077 ; X86-LABEL: test_vmulps_mask_rz:
8079 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8080 ; X86-NEXT: vmulps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xf9,0x59,0xc1]
8081 ; X86-NEXT: retl ## encoding: [0xc3]
8083 ; X64-LABEL: test_vmulps_mask_rz:
8085 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
8086 ; X64-NEXT: vmulps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xf9,0x59,0xc1]
8087 ; X64-NEXT: retq ## encoding: [0xc3]
8088 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
8089 <16 x float> zeroinitializer, i16 %mask, i32 11)
8090 ret <16 x float> %res
8093 ;; With Passthru value
8094 define <16 x float> @test_vmulps_mask_passthru_rn(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) {
8095 ; X86-LABEL: test_vmulps_mask_passthru_rn:
8097 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8098 ; X86-NEXT: vmulps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x59,0xd1]
8099 ; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
8100 ; X86-NEXT: retl ## encoding: [0xc3]
8102 ; X64-LABEL: test_vmulps_mask_passthru_rn:
8104 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
8105 ; X64-NEXT: vmulps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x59,0xd1]
8106 ; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
8107 ; X64-NEXT: retq ## encoding: [0xc3]
8108 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
8109 <16 x float> %passthru, i16 %mask, i32 8)
8110 ret <16 x float> %res
8113 define <16 x float> @test_vmulps_mask_passthru_rd(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) {
8114 ; X86-LABEL: test_vmulps_mask_passthru_rd:
8116 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8117 ; X86-NEXT: vmulps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x39,0x59,0xd1]
8118 ; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
8119 ; X86-NEXT: retl ## encoding: [0xc3]
8121 ; X64-LABEL: test_vmulps_mask_passthru_rd:
8123 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
8124 ; X64-NEXT: vmulps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x39,0x59,0xd1]
8125 ; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
8126 ; X64-NEXT: retq ## encoding: [0xc3]
8127 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
8128 <16 x float> %passthru, i16 %mask, i32 9)
8129 ret <16 x float> %res
8132 define <16 x float> @test_vmulps_mask_passthru_ru(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) {
8133 ; X86-LABEL: test_vmulps_mask_passthru_ru:
8135 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8136 ; X86-NEXT: vmulps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x59,0x59,0xd1]
8137 ; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
8138 ; X86-NEXT: retl ## encoding: [0xc3]
8140 ; X64-LABEL: test_vmulps_mask_passthru_ru:
8142 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
8143 ; X64-NEXT: vmulps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x59,0x59,0xd1]
8144 ; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
8145 ; X64-NEXT: retq ## encoding: [0xc3]
8146 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
8147 <16 x float> %passthru, i16 %mask, i32 10)
8148 ret <16 x float> %res
8151 define <16 x float> @test_vmulps_mask_passthru_rz(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) {
8152 ; X86-LABEL: test_vmulps_mask_passthru_rz:
8154 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8155 ; X86-NEXT: vmulps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x79,0x59,0xd1]
8156 ; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
8157 ; X86-NEXT: retl ## encoding: [0xc3]
8159 ; X64-LABEL: test_vmulps_mask_passthru_rz:
8161 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
8162 ; X64-NEXT: vmulps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x79,0x59,0xd1]
8163 ; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
8164 ; X64-NEXT: retq ## encoding: [0xc3]
8165 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
8166 <16 x float> %passthru, i16 %mask, i32 11)
8167 ret <16 x float> %res
8171 define <8 x double> @test_vmulpd_mask_rn(<8 x double> %a0, <8 x double> %a1, i8 %mask) {
8172 ; X86-LABEL: test_vmulpd_mask_rn:
8174 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
8175 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
8176 ; X86-NEXT: vmulpd {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x99,0x59,0xc1]
8177 ; X86-NEXT: retl ## encoding: [0xc3]
8179 ; X64-LABEL: test_vmulpd_mask_rn:
8181 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
8182 ; X64-NEXT: vmulpd {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x99,0x59,0xc1]
8183 ; X64-NEXT: retq ## encoding: [0xc3]
8184 %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1,
8185 <8 x double> zeroinitializer, i8 %mask, i32 8)
8186 ret <8 x double> %res
8189 define <8 x double> @test_vmulpd_mask_rd(<8 x double> %a0, <8 x double> %a1, i8 %mask) {
8190 ; X86-LABEL: test_vmulpd_mask_rd:
8192 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
8193 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
8194 ; X86-NEXT: vmulpd {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xb9,0x59,0xc1]
8195 ; X86-NEXT: retl ## encoding: [0xc3]
8197 ; X64-LABEL: test_vmulpd_mask_rd:
8199 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
8200 ; X64-NEXT: vmulpd {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xb9,0x59,0xc1]
8201 ; X64-NEXT: retq ## encoding: [0xc3]
8202 %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1,
8203 <8 x double> zeroinitializer, i8 %mask, i32 9)
8204 ret <8 x double> %res
8207 define <8 x double> @test_vmulpd_mask_ru(<8 x double> %a0, <8 x double> %a1, i8 %mask) {
8208 ; X86-LABEL: test_vmulpd_mask_ru:
8210 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
8211 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
8212 ; X86-NEXT: vmulpd {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xd9,0x59,0xc1]
8213 ; X86-NEXT: retl ## encoding: [0xc3]
8215 ; X64-LABEL: test_vmulpd_mask_ru:
8217 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
8218 ; X64-NEXT: vmulpd {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xd9,0x59,0xc1]
8219 ; X64-NEXT: retq ## encoding: [0xc3]
8220 %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1,
8221 <8 x double> zeroinitializer, i8 %mask, i32 10)
8222 ret <8 x double> %res
8225 define <8 x double> @test_vmulpd_mask_rz(<8 x double> %a0, <8 x double> %a1, i8 %mask) {
8226 ; X86-LABEL: test_vmulpd_mask_rz:
8228 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
8229 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
8230 ; X86-NEXT: vmulpd {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xf9,0x59,0xc1]
8231 ; X86-NEXT: retl ## encoding: [0xc3]
8233 ; X64-LABEL: test_vmulpd_mask_rz:
8235 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
8236 ; X64-NEXT: vmulpd {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xf9,0x59,0xc1]
8237 ; X64-NEXT: retq ## encoding: [0xc3]
8238 %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1,
8239 <8 x double> zeroinitializer, i8 %mask, i32 11)
8240 ret <8 x double> %res
8243 define <16 x float> @test_mm512_maskz_add_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
8244 ; X86-LABEL: test_mm512_maskz_add_round_ps_rn_sae:
8246 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8247 ; X86-NEXT: vaddps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x99,0x58,0xc1]
8248 ; X86-NEXT: retl ## encoding: [0xc3]
8250 ; X64-LABEL: test_mm512_maskz_add_round_ps_rn_sae:
8252 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
8253 ; X64-NEXT: vaddps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x99,0x58,0xc1]
8254 ; X64-NEXT: retq ## encoding: [0xc3]
8255 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 8)
8256 ret <16 x float> %res
8258 define <16 x float> @test_mm512_maskz_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
8259 ; X86-LABEL: test_mm512_maskz_add_round_ps_rd_sae:
8261 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8262 ; X86-NEXT: vaddps {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xb9,0x58,0xc1]
8263 ; X86-NEXT: retl ## encoding: [0xc3]
8265 ; X64-LABEL: test_mm512_maskz_add_round_ps_rd_sae:
8267 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
8268 ; X64-NEXT: vaddps {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xb9,0x58,0xc1]
8269 ; X64-NEXT: retq ## encoding: [0xc3]
8270 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 9)
8271 ret <16 x float> %res
8273 define <16 x float> @test_mm512_maskz_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
8274 ; X86-LABEL: test_mm512_maskz_add_round_ps_ru_sae:
8276 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8277 ; X86-NEXT: vaddps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xd9,0x58,0xc1]
8278 ; X86-NEXT: retl ## encoding: [0xc3]
8280 ; X64-LABEL: test_mm512_maskz_add_round_ps_ru_sae:
8282 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
8283 ; X64-NEXT: vaddps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xd9,0x58,0xc1]
8284 ; X64-NEXT: retq ## encoding: [0xc3]
8285 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 10)
8286 ret <16 x float> %res
8289 define <16 x float> @test_mm512_maskz_add_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
8290 ; X86-LABEL: test_mm512_maskz_add_round_ps_rz_sae:
8292 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8293 ; X86-NEXT: vaddps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xf9,0x58,0xc1]
8294 ; X86-NEXT: retl ## encoding: [0xc3]
8296 ; X64-LABEL: test_mm512_maskz_add_round_ps_rz_sae:
8298 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
8299 ; X64-NEXT: vaddps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xf9,0x58,0xc1]
8300 ; X64-NEXT: retq ## encoding: [0xc3]
8301 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 11)
8302 ret <16 x float> %res
8306 define <16 x float> @test_mm512_maskz_add_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
8307 ; X86-LABEL: test_mm512_maskz_add_round_ps_current:
8309 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8310 ; X86-NEXT: vaddps %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x58,0xc1]
8311 ; X86-NEXT: retl ## encoding: [0xc3]
8313 ; X64-LABEL: test_mm512_maskz_add_round_ps_current:
8315 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
8316 ; X64-NEXT: vaddps %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x58,0xc1]
8317 ; X64-NEXT: retq ## encoding: [0xc3]
8318 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4)
8319 ret <16 x float> %res
8322 define <16 x float> @test_mm512_mask_add_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
8323 ; X86-LABEL: test_mm512_mask_add_round_ps_rn_sae:
8325 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8326 ; X86-NEXT: vaddps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x58,0xd1]
8327 ; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
8328 ; X86-NEXT: retl ## encoding: [0xc3]
8330 ; X64-LABEL: test_mm512_mask_add_round_ps_rn_sae:
8332 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
8333 ; X64-NEXT: vaddps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x58,0xd1]
8334 ; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
8335 ; X64-NEXT: retq ## encoding: [0xc3]
8336 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 8)
8337 ret <16 x float> %res
8339 define <16 x float> @test_mm512_mask_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
8340 ; X86-LABEL: test_mm512_mask_add_round_ps_rd_sae:
8342 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8343 ; X86-NEXT: vaddps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x39,0x58,0xd1]
8344 ; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
8345 ; X86-NEXT: retl ## encoding: [0xc3]
8347 ; X64-LABEL: test_mm512_mask_add_round_ps_rd_sae:
8349 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
8350 ; X64-NEXT: vaddps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x39,0x58,0xd1]
8351 ; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
8352 ; X64-NEXT: retq ## encoding: [0xc3]
8353 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 9)
8354 ret <16 x float> %res
8356 define <16 x float> @test_mm512_mask_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
8357 ; X86-LABEL: test_mm512_mask_add_round_ps_ru_sae:
8359 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8360 ; X86-NEXT: vaddps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x59,0x58,0xd1]
8361 ; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
8362 ; X86-NEXT: retl ## encoding: [0xc3]
8364 ; X64-LABEL: test_mm512_mask_add_round_ps_ru_sae:
8366 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
8367 ; X64-NEXT: vaddps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x59,0x58,0xd1]
8368 ; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
8369 ; X64-NEXT: retq ## encoding: [0xc3]
8370 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 10)
8371 ret <16 x float> %res
8374 define <16 x float> @test_mm512_mask_add_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
8375 ; X86-LABEL: test_mm512_mask_add_round_ps_rz_sae:
8377 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8378 ; X86-NEXT: vaddps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x79,0x58,0xd1]
8379 ; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
8380 ; X86-NEXT: retl ## encoding: [0xc3]
8382 ; X64-LABEL: test_mm512_mask_add_round_ps_rz_sae:
8384 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
8385 ; X64-NEXT: vaddps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x79,0x58,0xd1]
8386 ; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
8387 ; X64-NEXT: retq ## encoding: [0xc3]
8388 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 11)
8389 ret <16 x float> %res
8393 define <16 x float> @test_mm512_mask_add_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
8394 ; X86-LABEL: test_mm512_mask_add_round_ps_current:
8396 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8397 ; X86-NEXT: vaddps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x58,0xd1]
8398 ; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
8399 ; X86-NEXT: retl ## encoding: [0xc3]
8401 ; X64-LABEL: test_mm512_mask_add_round_ps_current:
8403 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
8404 ; X64-NEXT: vaddps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x58,0xd1]
8405 ; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
8406 ; X64-NEXT: retq ## encoding: [0xc3]
8407 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
8408 ret <16 x float> %res
8412 define <16 x float> @test_mm512_add_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
8413 ; CHECK-LABEL: test_mm512_add_round_ps_rn_sae:
8415 ; CHECK-NEXT: vaddps {rn-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x18,0x58,0xc1]
8416 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
8417 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 8)
8418 ret <16 x float> %res
8420 define <16 x float> @test_mm512_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
8421 ; CHECK-LABEL: test_mm512_add_round_ps_rd_sae:
8423 ; CHECK-NEXT: vaddps {rd-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x38,0x58,0xc1]
8424 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
8425 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 9)
8426 ret <16 x float> %res
8428 define <16 x float> @test_mm512_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
8429 ; CHECK-LABEL: test_mm512_add_round_ps_ru_sae:
8431 ; CHECK-NEXT: vaddps {ru-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x58,0x58,0xc1]
8432 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
8433 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 10)
8434 ret <16 x float> %res
8437 define <16 x float> @test_mm512_add_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
8438 ; CHECK-LABEL: test_mm512_add_round_ps_rz_sae:
8440 ; CHECK-NEXT: vaddps {rz-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x78,0x58,0xc1]
8441 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
8442 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 11)
8443 ret <16 x float> %res
8446 define <16 x float> @test_mm512_add_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
8447 ; CHECK-LABEL: test_mm512_add_round_ps_current:
8449 ; CHECK-NEXT: vaddps %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x58,0xc1]
8450 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
8451 %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
8452 ret <16 x float> %res
8454 declare <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
8456 define <16 x float> @test_mm512_mask_sub_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
8457 ; X86-LABEL: test_mm512_mask_sub_round_ps_rn_sae:
8459 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8460 ; X86-NEXT: vsubps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x5c,0xd1]
8461 ; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
8462 ; X86-NEXT: retl ## encoding: [0xc3]
8464 ; X64-LABEL: test_mm512_mask_sub_round_ps_rn_sae:
8466 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
8467 ; X64-NEXT: vsubps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x5c,0xd1]
8468 ; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
8469 ; X64-NEXT: retq ## encoding: [0xc3]
8470 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 8)
8471 ret <16 x float> %res
8473 define <16 x float> @test_mm512_mask_sub_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
8474 ; X86-LABEL: test_mm512_mask_sub_round_ps_rd_sae:
8476 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8477 ; X86-NEXT: vsubps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x39,0x5c,0xd1]
8478 ; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
8479 ; X86-NEXT: retl ## encoding: [0xc3]
8481 ; X64-LABEL: test_mm512_mask_sub_round_ps_rd_sae:
8483 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
8484 ; X64-NEXT: vsubps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x39,0x5c,0xd1]
8485 ; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
8486 ; X64-NEXT: retq ## encoding: [0xc3]
8487 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 9)
8488 ret <16 x float> %res
8490 define <16 x float> @test_mm512_mask_sub_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
8491 ; X86-LABEL: test_mm512_mask_sub_round_ps_ru_sae:
8493 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8494 ; X86-NEXT: vsubps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x59,0x5c,0xd1]
8495 ; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
8496 ; X86-NEXT: retl ## encoding: [0xc3]
8498 ; X64-LABEL: test_mm512_mask_sub_round_ps_ru_sae:
8500 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
8501 ; X64-NEXT: vsubps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x59,0x5c,0xd1]
8502 ; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
8503 ; X64-NEXT: retq ## encoding: [0xc3]
8504 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 10)
8505 ret <16 x float> %res
8508 define <16 x float> @test_mm512_mask_sub_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
8509 ; X86-LABEL: test_mm512_mask_sub_round_ps_rz_sae:
8511 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8512 ; X86-NEXT: vsubps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x79,0x5c,0xd1]
8513 ; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
8514 ; X86-NEXT: retl ## encoding: [0xc3]
8516 ; X64-LABEL: test_mm512_mask_sub_round_ps_rz_sae:
8518 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
8519 ; X64-NEXT: vsubps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x79,0x5c,0xd1]
8520 ; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
8521 ; X64-NEXT: retq ## encoding: [0xc3]
8522 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 11)
8523 ret <16 x float> %res
8527 define <16 x float> @test_mm512_mask_sub_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
8528 ; X86-LABEL: test_mm512_mask_sub_round_ps_current:
8530 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8531 ; X86-NEXT: vsubps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x5c,0xd1]
8532 ; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
8533 ; X86-NEXT: retl ## encoding: [0xc3]
8535 ; X64-LABEL: test_mm512_mask_sub_round_ps_current:
8537 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
8538 ; X64-NEXT: vsubps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x5c,0xd1]
8539 ; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
8540 ; X64-NEXT: retq ## encoding: [0xc3]
8541 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
8542 ret <16 x float> %res
8545 define <16 x float> @test_mm512_sub_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
8546 ; CHECK-LABEL: test_mm512_sub_round_ps_rn_sae:
8548 ; CHECK-NEXT: vsubps {rn-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x18,0x5c,0xc1]
8549 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
8550 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 8)
8551 ret <16 x float> %res
8553 define <16 x float> @test_mm512_sub_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
8554 ; CHECK-LABEL: test_mm512_sub_round_ps_rd_sae:
8556 ; CHECK-NEXT: vsubps {rd-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x38,0x5c,0xc1]
8557 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
8558 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 9)
8559 ret <16 x float> %res
8561 define <16 x float> @test_mm512_sub_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
8562 ; CHECK-LABEL: test_mm512_sub_round_ps_ru_sae:
8564 ; CHECK-NEXT: vsubps {ru-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x58,0x5c,0xc1]
8565 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
8566 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 10)
8567 ret <16 x float> %res
8570 define <16 x float> @test_mm512_sub_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
8571 ; CHECK-LABEL: test_mm512_sub_round_ps_rz_sae:
8573 ; CHECK-NEXT: vsubps {rz-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x78,0x5c,0xc1]
8574 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
8575 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 11)
8576 ret <16 x float> %res
8579 define <16 x float> @test_mm512_sub_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
8580 ; CHECK-LABEL: test_mm512_sub_round_ps_current:
8582 ; CHECK-NEXT: vsubps %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x5c,0xc1]
8583 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
8584 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
8585 ret <16 x float> %res
8588 define <16 x float> @test_mm512_maskz_div_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
8589 ; X86-LABEL: test_mm512_maskz_div_round_ps_rn_sae:
8591 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8592 ; X86-NEXT: vdivps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x99,0x5e,0xc1]
8593 ; X86-NEXT: retl ## encoding: [0xc3]
8595 ; X64-LABEL: test_mm512_maskz_div_round_ps_rn_sae:
8597 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
8598 ; X64-NEXT: vdivps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x99,0x5e,0xc1]
8599 ; X64-NEXT: retq ## encoding: [0xc3]
8600 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 8)
8601 ret <16 x float> %res
8603 define <16 x float> @test_mm512_maskz_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
8604 ; X86-LABEL: test_mm512_maskz_div_round_ps_rd_sae:
8606 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8607 ; X86-NEXT: vdivps {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xb9,0x5e,0xc1]
8608 ; X86-NEXT: retl ## encoding: [0xc3]
8610 ; X64-LABEL: test_mm512_maskz_div_round_ps_rd_sae:
8612 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
8613 ; X64-NEXT: vdivps {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xb9,0x5e,0xc1]
8614 ; X64-NEXT: retq ## encoding: [0xc3]
8615 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 9)
8616 ret <16 x float> %res
8618 define <16 x float> @test_mm512_maskz_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
8619 ; X86-LABEL: test_mm512_maskz_div_round_ps_ru_sae:
8621 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8622 ; X86-NEXT: vdivps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xd9,0x5e,0xc1]
8623 ; X86-NEXT: retl ## encoding: [0xc3]
8625 ; X64-LABEL: test_mm512_maskz_div_round_ps_ru_sae:
8627 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
8628 ; X64-NEXT: vdivps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xd9,0x5e,0xc1]
8629 ; X64-NEXT: retq ## encoding: [0xc3]
8630 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 10)
8631 ret <16 x float> %res
8634 define <16 x float> @test_mm512_maskz_div_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
8635 ; X86-LABEL: test_mm512_maskz_div_round_ps_rz_sae:
8637 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8638 ; X86-NEXT: vdivps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xf9,0x5e,0xc1]
8639 ; X86-NEXT: retl ## encoding: [0xc3]
8641 ; X64-LABEL: test_mm512_maskz_div_round_ps_rz_sae:
8643 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
8644 ; X64-NEXT: vdivps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xf9,0x5e,0xc1]
8645 ; X64-NEXT: retq ## encoding: [0xc3]
8646 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 11)
8647 ret <16 x float> %res
8651 define <16 x float> @test_mm512_maskz_div_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
8652 ; X86-LABEL: test_mm512_maskz_div_round_ps_current:
8654 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8655 ; X86-NEXT: vdivps %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x5e,0xc1]
8656 ; X86-NEXT: retl ## encoding: [0xc3]
8658 ; X64-LABEL: test_mm512_maskz_div_round_ps_current:
8660 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
8661 ; X64-NEXT: vdivps %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x5e,0xc1]
8662 ; X64-NEXT: retq ## encoding: [0xc3]
8663 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4)
8664 ret <16 x float> %res
8667 define <16 x float> @test_mm512_mask_div_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
8668 ; X86-LABEL: test_mm512_mask_div_round_ps_rn_sae:
8670 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8671 ; X86-NEXT: vdivps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x5e,0xd1]
8672 ; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
8673 ; X86-NEXT: retl ## encoding: [0xc3]
8675 ; X64-LABEL: test_mm512_mask_div_round_ps_rn_sae:
8677 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
8678 ; X64-NEXT: vdivps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x5e,0xd1]
8679 ; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
8680 ; X64-NEXT: retq ## encoding: [0xc3]
8681 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 8)
8682 ret <16 x float> %res
8684 define <16 x float> @test_mm512_mask_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
8685 ; X86-LABEL: test_mm512_mask_div_round_ps_rd_sae:
8687 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8688 ; X86-NEXT: vdivps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x39,0x5e,0xd1]
8689 ; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
8690 ; X86-NEXT: retl ## encoding: [0xc3]
8692 ; X64-LABEL: test_mm512_mask_div_round_ps_rd_sae:
8694 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
8695 ; X64-NEXT: vdivps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x39,0x5e,0xd1]
8696 ; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
8697 ; X64-NEXT: retq ## encoding: [0xc3]
8698 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 9)
8699 ret <16 x float> %res
8701 define <16 x float> @test_mm512_mask_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
8702 ; X86-LABEL: test_mm512_mask_div_round_ps_ru_sae:
8704 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8705 ; X86-NEXT: vdivps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x59,0x5e,0xd1]
8706 ; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
8707 ; X86-NEXT: retl ## encoding: [0xc3]
8709 ; X64-LABEL: test_mm512_mask_div_round_ps_ru_sae:
8711 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
8712 ; X64-NEXT: vdivps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x59,0x5e,0xd1]
8713 ; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
8714 ; X64-NEXT: retq ## encoding: [0xc3]
8715 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 10)
8716 ret <16 x float> %res
8719 define <16 x float> @test_mm512_mask_div_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
8720 ; X86-LABEL: test_mm512_mask_div_round_ps_rz_sae:
8722 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8723 ; X86-NEXT: vdivps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x79,0x5e,0xd1]
8724 ; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
8725 ; X86-NEXT: retl ## encoding: [0xc3]
8727 ; X64-LABEL: test_mm512_mask_div_round_ps_rz_sae:
8729 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
8730 ; X64-NEXT: vdivps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x79,0x5e,0xd1]
8731 ; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
8732 ; X64-NEXT: retq ## encoding: [0xc3]
8733 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 11)
8734 ret <16 x float> %res
8738 define <16 x float> @test_mm512_mask_div_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
8739 ; X86-LABEL: test_mm512_mask_div_round_ps_current:
8741 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
8742 ; X86-NEXT: vdivps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x5e,0xd1]
8743 ; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
8744 ; X86-NEXT: retl ## encoding: [0xc3]
8746 ; X64-LABEL: test_mm512_mask_div_round_ps_current:
8748 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
8749 ; X64-NEXT: vdivps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x5e,0xd1]
8750 ; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
8751 ; X64-NEXT: retq ## encoding: [0xc3]
8752 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
8753 ret <16 x float> %res
8757 define <16 x float> @test_mm512_div_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
8758 ; CHECK-LABEL: test_mm512_div_round_ps_rn_sae:
8760 ; CHECK-NEXT: vdivps {rn-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x18,0x5e,0xc1]
8761 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
8762 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 8)
8763 ret <16 x float> %res
8765 define <16 x float> @test_mm512_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
8766 ; CHECK-LABEL: test_mm512_div_round_ps_rd_sae:
8768 ; CHECK-NEXT: vdivps {rd-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x38,0x5e,0xc1]
8769 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
8770 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 9)
8771 ret <16 x float> %res
8773 define <16 x float> @test_mm512_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
8774 ; CHECK-LABEL: test_mm512_div_round_ps_ru_sae:
8776 ; CHECK-NEXT: vdivps {ru-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x58,0x5e,0xc1]
8777 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
8778 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 10)
8779 ret <16 x float> %res
8782 define <16 x float> @test_mm512_div_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
8783 ; CHECK-LABEL: test_mm512_div_round_ps_rz_sae:
8785 ; CHECK-NEXT: vdivps {rz-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x78,0x5e,0xc1]
8786 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
8787 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 11)
8788 ret <16 x float> %res
8791 define <16 x float> @test_mm512_div_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
8792 ; CHECK-LABEL: test_mm512_div_round_ps_current:
8794 ; CHECK-NEXT: vdivps %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x5e,0xc1]
8795 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
8796 %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
8797 ret <16 x float> %res
8799 declare <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
8801 define void @test_mask_compress_store_pd_512(i8* %addr, <8 x double> %data, i8 %mask) {
8802 ; X86-LABEL: test_mask_compress_store_pd_512:
8804 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
8805 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
8806 ; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
8807 ; X86-NEXT: vcompresspd %zmm0, (%eax) {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x8a,0x00]
8808 ; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
8809 ; X86-NEXT: retl ## encoding: [0xc3]
8811 ; X64-LABEL: test_mask_compress_store_pd_512:
8813 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
8814 ; X64-NEXT: vcompresspd %zmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x8a,0x07]
8815 ; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
8816 ; X64-NEXT: retq ## encoding: [0xc3]
8817 call void @llvm.x86.avx512.mask.compress.store.pd.512(i8* %addr, <8 x double> %data, i8 %mask)
8821 declare void @llvm.x86.avx512.mask.compress.store.pd.512(i8* %addr, <8 x double> %data, i8 %mask)
8823 define void @test_compress_store_pd_512(i8* %addr, <8 x double> %data) {
8824 ; X86-LABEL: test_compress_store_pd_512:
8826 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
8827 ; X86-NEXT: kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8]
8828 ; X86-NEXT: vcompresspd %zmm0, (%eax) {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x8a,0x00]
8829 ; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
8830 ; X86-NEXT: retl ## encoding: [0xc3]
8832 ; X64-LABEL: test_compress_store_pd_512:
8834 ; X64-NEXT: kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8]
8835 ; X64-NEXT: vcompresspd %zmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x8a,0x07]
8836 ; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
8837 ; X64-NEXT: retq ## encoding: [0xc3]
8838 call void @llvm.x86.avx512.mask.compress.store.pd.512(i8* %addr, <8 x double> %data, i8 -1)
8842 define void @test_mask_compress_store_ps_512(i8* %addr, <16 x float> %data, i16 %mask) {
8843 ; X86-LABEL: test_mask_compress_store_ps_512:
8845 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
8846 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
8847 ; X86-NEXT: vcompressps %zmm0, (%eax) {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x8a,0x00]
8848 ; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
8849 ; X86-NEXT: retl ## encoding: [0xc3]
8851 ; X64-LABEL: test_mask_compress_store_ps_512:
8853 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
8854 ; X64-NEXT: vcompressps %zmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x8a,0x07]
8855 ; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
8856 ; X64-NEXT: retq ## encoding: [0xc3]
8857 call void @llvm.x86.avx512.mask.compress.store.ps.512(i8* %addr, <16 x float> %data, i16 %mask)
8861 declare void @llvm.x86.avx512.mask.compress.store.ps.512(i8* %addr, <16 x float> %data, i16 %mask)
8863 define void @test_compress_store_ps_512(i8* %addr, <16 x float> %data) {
8864 ; X86-LABEL: test_compress_store_ps_512:
8866 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
8867 ; X86-NEXT: kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8]
8868 ; X86-NEXT: vcompressps %zmm0, (%eax) {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x8a,0x00]
8869 ; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
8870 ; X86-NEXT: retl ## encoding: [0xc3]
8872 ; X64-LABEL: test_compress_store_ps_512:
8874 ; X64-NEXT: kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8]
8875 ; X64-NEXT: vcompressps %zmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x8a,0x07]
8876 ; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
8877 ; X64-NEXT: retq ## encoding: [0xc3]
8878 call void @llvm.x86.avx512.mask.compress.store.ps.512(i8* %addr, <16 x float> %data, i16 -1)
8882 define void @test_mask_compress_store_q_512(i8* %addr, <8 x i64> %data, i8 %mask) {
8883 ; X86-LABEL: test_mask_compress_store_q_512:
8885 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
8886 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
8887 ; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
8888 ; X86-NEXT: vpcompressq %zmm0, (%eax) {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x8b,0x00]
8889 ; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
8890 ; X86-NEXT: retl ## encoding: [0xc3]
8892 ; X64-LABEL: test_mask_compress_store_q_512:
8894 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
8895 ; X64-NEXT: vpcompressq %zmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x8b,0x07]
8896 ; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
8897 ; X64-NEXT: retq ## encoding: [0xc3]
8898 call void @llvm.x86.avx512.mask.compress.store.q.512(i8* %addr, <8 x i64> %data, i8 %mask)
8902 declare void @llvm.x86.avx512.mask.compress.store.q.512(i8* %addr, <8 x i64> %data, i8 %mask)
8904 define void @test_compress_store_q_512(i8* %addr, <8 x i64> %data) {
8905 ; X86-LABEL: test_compress_store_q_512:
8907 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
8908 ; X86-NEXT: kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8]
8909 ; X86-NEXT: vpcompressq %zmm0, (%eax) {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x8b,0x00]
8910 ; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
8911 ; X86-NEXT: retl ## encoding: [0xc3]
8913 ; X64-LABEL: test_compress_store_q_512:
8915 ; X64-NEXT: kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8]
8916 ; X64-NEXT: vpcompressq %zmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x8b,0x07]
8917 ; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
8918 ; X64-NEXT: retq ## encoding: [0xc3]
8919 call void @llvm.x86.avx512.mask.compress.store.q.512(i8* %addr, <8 x i64> %data, i8 -1)
8923 define void @test_mask_compress_store_d_512(i8* %addr, <16 x i32> %data, i16 %mask) {
8924 ; X86-LABEL: test_mask_compress_store_d_512:
8926 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
8927 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
8928 ; X86-NEXT: vpcompressd %zmm0, (%eax) {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x8b,0x00]
8929 ; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
8930 ; X86-NEXT: retl ## encoding: [0xc3]
8932 ; X64-LABEL: test_mask_compress_store_d_512:
8934 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
8935 ; X64-NEXT: vpcompressd %zmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x8b,0x07]
8936 ; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
8937 ; X64-NEXT: retq ## encoding: [0xc3]
8938 call void @llvm.x86.avx512.mask.compress.store.d.512(i8* %addr, <16 x i32> %data, i16 %mask)
8942 declare void @llvm.x86.avx512.mask.compress.store.d.512(i8* %addr, <16 x i32> %data, i16 %mask)
8944 define void @test_compress_store_d_512(i8* %addr, <16 x i32> %data) {
8945 ; X86-LABEL: test_compress_store_d_512:
8947 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
8948 ; X86-NEXT: kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8]
8949 ; X86-NEXT: vpcompressd %zmm0, (%eax) {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x8b,0x00]
8950 ; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
8951 ; X86-NEXT: retl ## encoding: [0xc3]
8953 ; X64-LABEL: test_compress_store_d_512:
8955 ; X64-NEXT: kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8]
8956 ; X64-NEXT: vpcompressd %zmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x8b,0x07]
8957 ; X64-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
8958 ; X64-NEXT: retq ## encoding: [0xc3]
8959 call void @llvm.x86.avx512.mask.compress.store.d.512(i8* %addr, <16 x i32> %data, i16 -1)
8963 define <8 x double> @test_mask_expand_load_pd_512(i8* %addr, <8 x double> %data, i8 %mask) {
8964 ; X86-LABEL: test_mask_expand_load_pd_512:
8966 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
8967 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
8968 ; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
8969 ; X86-NEXT: vexpandpd (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x88,0x00]
8970 ; X86-NEXT: retl ## encoding: [0xc3]
8972 ; X64-LABEL: test_mask_expand_load_pd_512:
8974 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
8975 ; X64-NEXT: vexpandpd (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x88,0x07]
8976 ; X64-NEXT: retq ## encoding: [0xc3]
8977 %res = call <8 x double> @llvm.x86.avx512.mask.expand.load.pd.512(i8* %addr, <8 x double> %data, i8 %mask)
8978 ret <8 x double> %res
8981 define <8 x double> @test_maskz_expand_load_pd_512(i8* %addr, i8 %mask) {
8982 ; X86-LABEL: test_maskz_expand_load_pd_512:
8984 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
8985 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
8986 ; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
8987 ; X86-NEXT: vexpandpd (%eax), %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x88,0x00]
8988 ; X86-NEXT: retl ## encoding: [0xc3]
8990 ; X64-LABEL: test_maskz_expand_load_pd_512:
8992 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
8993 ; X64-NEXT: vexpandpd (%rdi), %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x88,0x07]
8994 ; X64-NEXT: retq ## encoding: [0xc3]
8995 %res = call <8 x double> @llvm.x86.avx512.mask.expand.load.pd.512(i8* %addr, <8 x double> zeroinitializer, i8 %mask)
8996 ret <8 x double> %res
8999 declare <8 x double> @llvm.x86.avx512.mask.expand.load.pd.512(i8* %addr, <8 x double> %data, i8 %mask)
9001 define <8 x double> @test_expand_load_pd_512(i8* %addr, <8 x double> %data) {
9002 ; X86-LABEL: test_expand_load_pd_512:
9004 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
9005 ; X86-NEXT: kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8]
9006 ; X86-NEXT: vexpandpd (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x88,0x00]
9007 ; X86-NEXT: retl ## encoding: [0xc3]
9009 ; X64-LABEL: test_expand_load_pd_512:
9011 ; X64-NEXT: kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8]
9012 ; X64-NEXT: vexpandpd (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x88,0x07]
9013 ; X64-NEXT: retq ## encoding: [0xc3]
9014 %res = call <8 x double> @llvm.x86.avx512.mask.expand.load.pd.512(i8* %addr, <8 x double> %data, i8 -1)
9015 ret <8 x double> %res
9018 ; Make sure we don't crash if you pass 0 to the mask.
9019 define <8 x double> @test_zero_mask_expand_load_pd_512(i8* %addr, <8 x double> %data, i8 %mask) {
9020 ; CHECK-LABEL: test_zero_mask_expand_load_pd_512:
9022 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
9023 %res = call <8 x double> @llvm.x86.avx512.mask.expand.load.pd.512(i8* %addr, <8 x double> %data, i8 0)
9024 ret <8 x double> %res
9027 define <16 x float> @test_mask_expand_load_ps_512(i8* %addr, <16 x float> %data, i16 %mask) {
9028 ; X86-LABEL: test_mask_expand_load_ps_512:
9030 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
9031 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
9032 ; X86-NEXT: vexpandps (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x88,0x00]
9033 ; X86-NEXT: retl ## encoding: [0xc3]
9035 ; X64-LABEL: test_mask_expand_load_ps_512:
9037 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
9038 ; X64-NEXT: vexpandps (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x88,0x07]
9039 ; X64-NEXT: retq ## encoding: [0xc3]
9040 %res = call <16 x float> @llvm.x86.avx512.mask.expand.load.ps.512(i8* %addr, <16 x float> %data, i16 %mask)
9041 ret <16 x float> %res
9044 define <16 x float> @test_maskz_expand_load_ps_512(i8* %addr, i16 %mask) {
9045 ; X86-LABEL: test_maskz_expand_load_ps_512:
9047 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
9048 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
9049 ; X86-NEXT: vexpandps (%eax), %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x88,0x00]
9050 ; X86-NEXT: retl ## encoding: [0xc3]
9052 ; X64-LABEL: test_maskz_expand_load_ps_512:
9054 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
9055 ; X64-NEXT: vexpandps (%rdi), %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x88,0x07]
9056 ; X64-NEXT: retq ## encoding: [0xc3]
9057 %res = call <16 x float> @llvm.x86.avx512.mask.expand.load.ps.512(i8* %addr, <16 x float> zeroinitializer, i16 %mask)
9058 ret <16 x float> %res
9061 declare <16 x float> @llvm.x86.avx512.mask.expand.load.ps.512(i8* %addr, <16 x float> %data, i16 %mask)
9063 define <16 x float> @test_expand_load_ps_512(i8* %addr, <16 x float> %data) {
9064 ; X86-LABEL: test_expand_load_ps_512:
9066 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
9067 ; X86-NEXT: kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8]
9068 ; X86-NEXT: vexpandps (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x88,0x00]
9069 ; X86-NEXT: retl ## encoding: [0xc3]
9071 ; X64-LABEL: test_expand_load_ps_512:
9073 ; X64-NEXT: kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8]
9074 ; X64-NEXT: vexpandps (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x88,0x07]
9075 ; X64-NEXT: retq ## encoding: [0xc3]
9076 %res = call <16 x float> @llvm.x86.avx512.mask.expand.load.ps.512(i8* %addr, <16 x float> %data, i16 -1)
9077 ret <16 x float> %res
9080 define <8 x i64> @test_mask_expand_load_q_512(i8* %addr, <8 x i64> %data, i8 %mask) {
9081 ; X86-LABEL: test_mask_expand_load_q_512:
9083 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
9084 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
9085 ; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
9086 ; X86-NEXT: vpexpandq (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x89,0x00]
9087 ; X86-NEXT: retl ## encoding: [0xc3]
9089 ; X64-LABEL: test_mask_expand_load_q_512:
9091 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
9092 ; X64-NEXT: vpexpandq (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x89,0x07]
9093 ; X64-NEXT: retq ## encoding: [0xc3]
9094 %res = call <8 x i64> @llvm.x86.avx512.mask.expand.load.q.512(i8* %addr, <8 x i64> %data, i8 %mask)
9098 define <8 x i64> @test_maskz_expand_load_q_512(i8* %addr, i8 %mask) {
9099 ; X86-LABEL: test_maskz_expand_load_q_512:
9101 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
9102 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
9103 ; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
9104 ; X86-NEXT: vpexpandq (%eax), %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x89,0x00]
9105 ; X86-NEXT: retl ## encoding: [0xc3]
9107 ; X64-LABEL: test_maskz_expand_load_q_512:
9109 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
9110 ; X64-NEXT: vpexpandq (%rdi), %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x89,0x07]
9111 ; X64-NEXT: retq ## encoding: [0xc3]
9112 %res = call <8 x i64> @llvm.x86.avx512.mask.expand.load.q.512(i8* %addr, <8 x i64> zeroinitializer, i8 %mask)
9116 declare <8 x i64> @llvm.x86.avx512.mask.expand.load.q.512(i8* %addr, <8 x i64> %data, i8 %mask)
9118 define <8 x i64> @test_expand_load_q_512(i8* %addr, <8 x i64> %data) {
9119 ; X86-LABEL: test_expand_load_q_512:
9121 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
9122 ; X86-NEXT: kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8]
9123 ; X86-NEXT: vpexpandq (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x89,0x00]
9124 ; X86-NEXT: retl ## encoding: [0xc3]
9126 ; X64-LABEL: test_expand_load_q_512:
9128 ; X64-NEXT: kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8]
9129 ; X64-NEXT: vpexpandq (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x89,0x07]
9130 ; X64-NEXT: retq ## encoding: [0xc3]
9131 %res = call <8 x i64> @llvm.x86.avx512.mask.expand.load.q.512(i8* %addr, <8 x i64> %data, i8 -1)
9135 define <16 x i32> @test_mask_expand_load_d_512(i8* %addr, <16 x i32> %data, i16 %mask) {
9136 ; X86-LABEL: test_mask_expand_load_d_512:
9138 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
9139 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
9140 ; X86-NEXT: vpexpandd (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x89,0x00]
9141 ; X86-NEXT: retl ## encoding: [0xc3]
9143 ; X64-LABEL: test_mask_expand_load_d_512:
9145 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
9146 ; X64-NEXT: vpexpandd (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x89,0x07]
9147 ; X64-NEXT: retq ## encoding: [0xc3]
9148 %res = call <16 x i32> @llvm.x86.avx512.mask.expand.load.d.512(i8* %addr, <16 x i32> %data, i16 %mask)
9152 define <16 x i32> @test_maskz_expand_load_d_512(i8* %addr, i16 %mask) {
9153 ; X86-LABEL: test_maskz_expand_load_d_512:
9155 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
9156 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
9157 ; X86-NEXT: vpexpandd (%eax), %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x89,0x00]
9158 ; X86-NEXT: retl ## encoding: [0xc3]
9160 ; X64-LABEL: test_maskz_expand_load_d_512:
9162 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
9163 ; X64-NEXT: vpexpandd (%rdi), %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x89,0x07]
9164 ; X64-NEXT: retq ## encoding: [0xc3]
9165 %res = call <16 x i32> @llvm.x86.avx512.mask.expand.load.d.512(i8* %addr, <16 x i32> zeroinitializer, i16 %mask)
9169 declare <16 x i32> @llvm.x86.avx512.mask.expand.load.d.512(i8* %addr, <16 x i32> %data, i16 %mask)
9171 define <16 x i32> @test_expand_load_d_512(i8* %addr, <16 x i32> %data) {
9172 ; X86-LABEL: test_expand_load_d_512:
9174 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
9175 ; X86-NEXT: kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8]
9176 ; X86-NEXT: vpexpandd (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x89,0x00]
9177 ; X86-NEXT: retl ## encoding: [0xc3]
9179 ; X64-LABEL: test_expand_load_d_512:
9181 ; X64-NEXT: kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8]
9182 ; X64-NEXT: vpexpandd (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x89,0x07]
9183 ; X64-NEXT: retq ## encoding: [0xc3]
9184 %res = call <16 x i32> @llvm.x86.avx512.mask.expand.load.d.512(i8* %addr, <16 x i32> %data, i16 -1)
9188 define <16 x float> @test_mm512_maskz_min_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
9189 ; X86-LABEL: test_mm512_maskz_min_round_ps_sae:
9191 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
9192 ; X86-NEXT: vminps {sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x99,0x5d,0xc1]
9193 ; X86-NEXT: retl ## encoding: [0xc3]
9195 ; X64-LABEL: test_mm512_maskz_min_round_ps_sae:
9197 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
9198 ; X64-NEXT: vminps {sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x99,0x5d,0xc1]
9199 ; X64-NEXT: retq ## encoding: [0xc3]
9200 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 8)
9201 ret <16 x float> %res
9204 define <16 x float> @test_mm512_maskz_min_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
9205 ; X86-LABEL: test_mm512_maskz_min_round_ps_current:
9207 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
9208 ; X86-NEXT: vminps %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x5d,0xc1]
9209 ; X86-NEXT: retl ## encoding: [0xc3]
9211 ; X64-LABEL: test_mm512_maskz_min_round_ps_current:
9213 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
9214 ; X64-NEXT: vminps %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x5d,0xc1]
9215 ; X64-NEXT: retq ## encoding: [0xc3]
9216 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4)
9217 ret <16 x float> %res
9220 define <16 x float> @test_mm512_mask_min_round_ps_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
9221 ; X86-LABEL: test_mm512_mask_min_round_ps_sae:
9223 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
9224 ; X86-NEXT: vminps {sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x5d,0xd1]
9225 ; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
9226 ; X86-NEXT: retl ## encoding: [0xc3]
9228 ; X64-LABEL: test_mm512_mask_min_round_ps_sae:
9230 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
9231 ; X64-NEXT: vminps {sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x5d,0xd1]
9232 ; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
9233 ; X64-NEXT: retq ## encoding: [0xc3]
9234 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 8)
9235 ret <16 x float> %res
9238 define <16 x float> @test_mm512_mask_min_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
9239 ; X86-LABEL: test_mm512_mask_min_round_ps_current:
9241 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
9242 ; X86-NEXT: vminps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x5d,0xd1]
9243 ; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
9244 ; X86-NEXT: retl ## encoding: [0xc3]
9246 ; X64-LABEL: test_mm512_mask_min_round_ps_current:
9248 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
9249 ; X64-NEXT: vminps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x5d,0xd1]
9250 ; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
9251 ; X64-NEXT: retq ## encoding: [0xc3]
9252 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
9253 ret <16 x float> %res
9256 define <16 x float> @test_mm512_min_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
9257 ; CHECK-LABEL: test_mm512_min_round_ps_sae:
9259 ; CHECK-NEXT: vminps {sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x18,0x5d,0xc1]
9260 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
9261 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 8)
9262 ret <16 x float> %res
9265 define <16 x float> @test_mm512_min_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
9266 ; CHECK-LABEL: test_mm512_min_round_ps_current:
9268 ; CHECK-NEXT: vminps %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x5d,0xc1]
9269 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
9270 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
9271 ret <16 x float> %res
9273 declare <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
9275 define <16 x float> @test_mm512_maskz_max_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
9276 ; X86-LABEL: test_mm512_maskz_max_round_ps_sae:
9278 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
9279 ; X86-NEXT: vmaxps {sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x99,0x5f,0xc1]
9280 ; X86-NEXT: retl ## encoding: [0xc3]
9282 ; X64-LABEL: test_mm512_maskz_max_round_ps_sae:
9284 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
9285 ; X64-NEXT: vmaxps {sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x99,0x5f,0xc1]
9286 ; X64-NEXT: retq ## encoding: [0xc3]
9287 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 8)
9288 ret <16 x float> %res
9291 define <16 x float> @test_mm512_maskz_max_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
9292 ; X86-LABEL: test_mm512_maskz_max_round_ps_current:
9294 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
9295 ; X86-NEXT: vmaxps %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x5f,0xc1]
9296 ; X86-NEXT: retl ## encoding: [0xc3]
9298 ; X64-LABEL: test_mm512_maskz_max_round_ps_current:
9300 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
9301 ; X64-NEXT: vmaxps %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x5f,0xc1]
9302 ; X64-NEXT: retq ## encoding: [0xc3]
9303 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4)
9304 ret <16 x float> %res
9307 define <16 x float> @test_mm512_mask_max_round_ps_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
9308 ; X86-LABEL: test_mm512_mask_max_round_ps_sae:
9310 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
9311 ; X86-NEXT: vmaxps {sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x5f,0xd1]
9312 ; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
9313 ; X86-NEXT: retl ## encoding: [0xc3]
9315 ; X64-LABEL: test_mm512_mask_max_round_ps_sae:
9317 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
9318 ; X64-NEXT: vmaxps {sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x5f,0xd1]
9319 ; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
9320 ; X64-NEXT: retq ## encoding: [0xc3]
9321 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 8)
9322 ret <16 x float> %res
9325 define <16 x float> @test_mm512_mask_max_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
9326 ; X86-LABEL: test_mm512_mask_max_round_ps_current:
9328 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
9329 ; X86-NEXT: vmaxps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x5f,0xd1]
9330 ; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
9331 ; X86-NEXT: retl ## encoding: [0xc3]
9333 ; X64-LABEL: test_mm512_mask_max_round_ps_current:
9335 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
9336 ; X64-NEXT: vmaxps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x5f,0xd1]
9337 ; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
9338 ; X64-NEXT: retq ## encoding: [0xc3]
9339 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
9340 ret <16 x float> %res
9343 define <16 x float> @test_mm512_max_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
9344 ; CHECK-LABEL: test_mm512_max_round_ps_sae:
9346 ; CHECK-NEXT: vmaxps {sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x18,0x5f,0xc1]
9347 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
9348 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 8)
9349 ret <16 x float> %res
9352 define <16 x float> @test_mm512_max_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
9353 ; CHECK-LABEL: test_mm512_max_round_ps_current:
9355 ; CHECK-NEXT: vmaxps %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x5f,0xc1]
9356 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
9357 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
9358 ret <16 x float> %res
9360 declare <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
9362 define <8 x double> @test_sqrt_pd_512(<8 x double> %a0) {
9363 ; CHECK-LABEL: test_sqrt_pd_512:
9365 ; CHECK-NEXT: vsqrtpd %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x51,0xc0]
9366 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
9367 %res = call <8 x double> @llvm.x86.avx512.mask.sqrt.pd.512(<8 x double> %a0, <8 x double> undef, i8 -1, i32 4)
9368 ret <8 x double> %res
9370 define <8 x double> @test_mask_sqrt_pd_512(<8 x double> %a0, <8 x double> %passthru, i8 %mask) {
9371 ; X86-LABEL: test_mask_sqrt_pd_512:
9373 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
9374 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
9375 ; X86-NEXT: vsqrtpd %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x51,0xc8]
9376 ; X86-NEXT: vmovapd %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc1]
9377 ; X86-NEXT: retl ## encoding: [0xc3]
9379 ; X64-LABEL: test_mask_sqrt_pd_512:
9381 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
9382 ; X64-NEXT: vsqrtpd %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x51,0xc8]
9383 ; X64-NEXT: vmovapd %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc1]
9384 ; X64-NEXT: retq ## encoding: [0xc3]
9385 %res = call <8 x double> @llvm.x86.avx512.mask.sqrt.pd.512(<8 x double> %a0, <8 x double> %passthru, i8 %mask, i32 4)
9386 ret <8 x double> %res
9388 define <8 x double> @test_maskz_sqrt_pd_512(<8 x double> %a0, i8 %mask) {
9389 ; X86-LABEL: test_maskz_sqrt_pd_512:
9391 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
9392 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
9393 ; X86-NEXT: vsqrtpd %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x51,0xc0]
9394 ; X86-NEXT: retl ## encoding: [0xc3]
9396 ; X64-LABEL: test_maskz_sqrt_pd_512:
9398 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
9399 ; X64-NEXT: vsqrtpd %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x51,0xc0]
9400 ; X64-NEXT: retq ## encoding: [0xc3]
9401 %res = call <8 x double> @llvm.x86.avx512.mask.sqrt.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 %mask, i32 4)
9402 ret <8 x double> %res
9404 define <8 x double> @test_sqrt_round_pd_512(<8 x double> %a0) {
9405 ; CHECK-LABEL: test_sqrt_round_pd_512:
9407 ; CHECK-NEXT: vsqrtpd {rz-sae}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x78,0x51,0xc0]
9408 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
9409 %res = call <8 x double> @llvm.x86.avx512.mask.sqrt.pd.512(<8 x double> %a0, <8 x double> undef, i8 -1, i32 11)
9410 ret <8 x double> %res
9412 define <8 x double> @test_mask_sqrt_round_pd_512(<8 x double> %a0, <8 x double> %passthru, i8 %mask) {
9413 ; X86-LABEL: test_mask_sqrt_round_pd_512:
9415 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
9416 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
9417 ; X86-NEXT: vsqrtpd {rz-sae}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x79,0x51,0xc8]
9418 ; X86-NEXT: vmovapd %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc1]
9419 ; X86-NEXT: retl ## encoding: [0xc3]
9421 ; X64-LABEL: test_mask_sqrt_round_pd_512:
9423 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
9424 ; X64-NEXT: vsqrtpd {rz-sae}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x79,0x51,0xc8]
9425 ; X64-NEXT: vmovapd %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc1]
9426 ; X64-NEXT: retq ## encoding: [0xc3]
9427 %res = call <8 x double> @llvm.x86.avx512.mask.sqrt.pd.512(<8 x double> %a0, <8 x double> %passthru, i8 %mask, i32 11)
9428 ret <8 x double> %res
9430 define <8 x double> @test_maskz_sqrt_round_pd_512(<8 x double> %a0, i8 %mask) {
9431 ; X86-LABEL: test_maskz_sqrt_round_pd_512:
9433 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
9434 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
9435 ; X86-NEXT: vsqrtpd {rz-sae}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xf9,0x51,0xc0]
9436 ; X86-NEXT: retl ## encoding: [0xc3]
9438 ; X64-LABEL: test_maskz_sqrt_round_pd_512:
9440 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
9441 ; X64-NEXT: vsqrtpd {rz-sae}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xf9,0x51,0xc0]
9442 ; X64-NEXT: retq ## encoding: [0xc3]
9443 %res = call <8 x double> @llvm.x86.avx512.mask.sqrt.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 %mask, i32 11)
9444 ret <8 x double> %res
9446 declare <8 x double> @llvm.x86.avx512.mask.sqrt.pd.512(<8 x double>, <8 x double>, i8, i32) nounwind readnone
9448 define <16 x float> @test_sqrt_ps_512(<16 x float> %a0) {
9449 ; CHECK-LABEL: test_sqrt_ps_512:
9451 ; CHECK-NEXT: vsqrtps %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x51,0xc0]
9452 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
9453 %res = call <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float> %a0, <16 x float> undef, i16 -1, i32 4)
9454 ret <16 x float> %res
9456 define <16 x float> @test_mask_sqrt_ps_512(<16 x float> %a0, <16 x float> %passthru, i16 %mask) {
9457 ; X86-LABEL: test_mask_sqrt_ps_512:
9459 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
9460 ; X86-NEXT: vsqrtps %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x51,0xc8]
9461 ; X86-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
9462 ; X86-NEXT: retl ## encoding: [0xc3]
9464 ; X64-LABEL: test_mask_sqrt_ps_512:
9466 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
9467 ; X64-NEXT: vsqrtps %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x51,0xc8]
9468 ; X64-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
9469 ; X64-NEXT: retq ## encoding: [0xc3]
9470 %res = call <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float> %a0, <16 x float> %passthru, i16 %mask, i32 4)
9471 ret <16 x float> %res
9473 define <16 x float> @test_maskz_sqrt_ps_512(<16 x float> %a0, i16 %mask) {
9474 ; X86-LABEL: test_maskz_sqrt_ps_512:
9476 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
9477 ; X86-NEXT: vsqrtps %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x51,0xc0]
9478 ; X86-NEXT: retl ## encoding: [0xc3]
9480 ; X64-LABEL: test_maskz_sqrt_ps_512:
9482 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
9483 ; X64-NEXT: vsqrtps %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x51,0xc0]
9484 ; X64-NEXT: retq ## encoding: [0xc3]
9485 %res = call <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 %mask, i32 4)
9486 ret <16 x float> %res
9488 define <16 x float> @test_sqrt_round_ps_512(<16 x float> %a0) {
9489 ; CHECK-LABEL: test_sqrt_round_ps_512:
9491 ; CHECK-NEXT: vsqrtps {rz-sae}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x78,0x51,0xc0]
9492 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
9493 %res = call <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 11)
9494 ret <16 x float> %res
9496 define <16 x float> @test_mask_sqrt_round_ps_512(<16 x float> %a0, <16 x float> %passthru, i16 %mask) {
9497 ; X86-LABEL: test_mask_sqrt_round_ps_512:
9499 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
9500 ; X86-NEXT: vsqrtps {rz-sae}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x79,0x51,0xc8]
9501 ; X86-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
9502 ; X86-NEXT: retl ## encoding: [0xc3]
9504 ; X64-LABEL: test_mask_sqrt_round_ps_512:
9506 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
9507 ; X64-NEXT: vsqrtps {rz-sae}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x79,0x51,0xc8]
9508 ; X64-NEXT: vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
9509 ; X64-NEXT: retq ## encoding: [0xc3]
9510 %res = call <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float> %a0, <16 x float> %passthru, i16 %mask, i32 11)
9511 ret <16 x float> %res
9513 define <16 x float> @test_maskz_sqrt_round_ps_512(<16 x float> %a0, i16 %mask) {
9514 ; X86-LABEL: test_maskz_sqrt_round_ps_512:
9516 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
9517 ; X86-NEXT: vsqrtps {rz-sae}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xf9,0x51,0xc0]
9518 ; X86-NEXT: retl ## encoding: [0xc3]
9520 ; X64-LABEL: test_maskz_sqrt_round_ps_512:
9522 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
9523 ; X64-NEXT: vsqrtps {rz-sae}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xf9,0x51,0xc0]
9524 ; X64-NEXT: retq ## encoding: [0xc3]
9525 %res = call <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 %mask, i32 11)
9526 ret <16 x float> %res
9528 declare <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float>, <16 x float>, i16, i32) nounwind readnone
9530 declare <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
9532 define <16 x i32>@test_int_x86_avx512_prolv_d_512_old(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) {
9533 ; CHECK-LABEL: test_int_x86_avx512_prolv_d_512_old:
9535 ; CHECK-NEXT: vprolvd %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x15,0xc1]
9536 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
9537 %res = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
9541 define <16 x i32>@test_int_x86_avx512_mask_prolv_d_512_old(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
9542 ; X86-LABEL: test_int_x86_avx512_mask_prolv_d_512_old:
9544 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
9545 ; X86-NEXT: vprolvd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x15,0xd1]
9546 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
9547 ; X86-NEXT: retl ## encoding: [0xc3]
9549 ; X64-LABEL: test_int_x86_avx512_mask_prolv_d_512_old:
9551 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
9552 ; X64-NEXT: vprolvd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x15,0xd1]
9553 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
9554 ; X64-NEXT: retq ## encoding: [0xc3]
9555 %res = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
9559 define <16 x i32>@test_int_x86_avx512_maskz_prolv_d_512_old(<16 x i32> %x0, <16 x i32> %x1, i16 %x3) {
9560 ; X86-LABEL: test_int_x86_avx512_maskz_prolv_d_512_old:
9562 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
9563 ; X86-NEXT: vprolvd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x15,0xc1]
9564 ; X86-NEXT: retl ## encoding: [0xc3]
9566 ; X64-LABEL: test_int_x86_avx512_maskz_prolv_d_512_old:
9568 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
9569 ; X64-NEXT: vprolvd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x15,0xc1]
9570 ; X64-NEXT: retq ## encoding: [0xc3]
9571 %res = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> zeroinitializer, i16 %x3)
9575 declare <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
9577 define <8 x i64>@test_int_x86_avx512_prolv_q_512_old(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2) {
9578 ; CHECK-LABEL: test_int_x86_avx512_prolv_q_512_old:
9580 ; CHECK-NEXT: vprolvq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x15,0xc1]
9581 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
9582 %res = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
9586 define <8 x i64>@test_int_x86_avx512_mask_prolv_q_512_old(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
9587 ; X86-LABEL: test_int_x86_avx512_mask_prolv_q_512_old:
9589 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
9590 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
9591 ; X86-NEXT: vprolvq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x15,0xd1]
9592 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
9593 ; X86-NEXT: retl ## encoding: [0xc3]
9595 ; X64-LABEL: test_int_x86_avx512_mask_prolv_q_512_old:
9597 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
9598 ; X64-NEXT: vprolvq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x15,0xd1]
9599 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
9600 ; X64-NEXT: retq ## encoding: [0xc3]
9601 %res = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
9605 define <8 x i64>@test_int_x86_avx512_maskz_prolv_q_512_old(<8 x i64> %x0, <8 x i64> %x1, i8 %x3) {
9606 ; X86-LABEL: test_int_x86_avx512_maskz_prolv_q_512_old:
9608 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
9609 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
9610 ; X86-NEXT: vprolvq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x15,0xc1]
9611 ; X86-NEXT: retl ## encoding: [0xc3]
9613 ; X64-LABEL: test_int_x86_avx512_maskz_prolv_q_512_old:
9615 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
9616 ; X64-NEXT: vprolvq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x15,0xc1]
9617 ; X64-NEXT: retq ## encoding: [0xc3]
9618 %res = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> zeroinitializer, i8 %x3)
9622 declare <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
9624 define <16 x i32>@test_int_x86_avx512_prorv_d_512_old(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) {
9625 ; CHECK-LABEL: test_int_x86_avx512_prorv_d_512_old:
9627 ; CHECK-NEXT: vprorvd %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x14,0xc1]
9628 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
9629 %res = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
9633 define <16 x i32>@test_int_x86_avx512_mask_prorv_d_512_old(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
9634 ; X86-LABEL: test_int_x86_avx512_mask_prorv_d_512_old:
9636 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
9637 ; X86-NEXT: vprorvd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x14,0xd1]
9638 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
9639 ; X86-NEXT: retl ## encoding: [0xc3]
9641 ; X64-LABEL: test_int_x86_avx512_mask_prorv_d_512_old:
9643 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
9644 ; X64-NEXT: vprorvd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x14,0xd1]
9645 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
9646 ; X64-NEXT: retq ## encoding: [0xc3]
9647 %res = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
9651 define <16 x i32>@test_int_x86_avx512_maskz_prorv_d_512_old(<16 x i32> %x0, <16 x i32> %x1, i16 %x3) {
9652 ; X86-LABEL: test_int_x86_avx512_maskz_prorv_d_512_old:
9654 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
9655 ; X86-NEXT: vprorvd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x14,0xc1]
9656 ; X86-NEXT: retl ## encoding: [0xc3]
9658 ; X64-LABEL: test_int_x86_avx512_maskz_prorv_d_512_old:
9660 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
9661 ; X64-NEXT: vprorvd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x14,0xc1]
9662 ; X64-NEXT: retq ## encoding: [0xc3]
9663 %res = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> zeroinitializer, i16 %x3)
9667 declare <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
9669 define <8 x i64>@test_int_x86_avx512_prorv_q_512_old(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2) {
9670 ; CHECK-LABEL: test_int_x86_avx512_prorv_q_512_old:
9672 ; CHECK-NEXT: vprorvq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x14,0xc1]
9673 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
9674 %res = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
9678 define <8 x i64>@test_int_x86_avx512_mask_prorv_q_512_old(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
9679 ; X86-LABEL: test_int_x86_avx512_mask_prorv_q_512_old:
9681 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
9682 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
9683 ; X86-NEXT: vprorvq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x14,0xd1]
9684 ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
9685 ; X86-NEXT: retl ## encoding: [0xc3]
9687 ; X64-LABEL: test_int_x86_avx512_mask_prorv_q_512_old:
9689 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
9690 ; X64-NEXT: vprorvq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x14,0xd1]
9691 ; X64-NEXT: vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
9692 ; X64-NEXT: retq ## encoding: [0xc3]
9693 %res = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
9697 define <8 x i64>@test_int_x86_avx512_maskz_prorv_q_512_old(<8 x i64> %x0, <8 x i64> %x1, i8 %x3) {
9698 ; X86-LABEL: test_int_x86_avx512_maskz_prorv_q_512_old:
9700 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
9701 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
9702 ; X86-NEXT: vprorvq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x14,0xc1]
9703 ; X86-NEXT: retl ## encoding: [0xc3]
9705 ; X64-LABEL: test_int_x86_avx512_maskz_prorv_q_512_old:
9707 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
9708 ; X64-NEXT: vprorvq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x14,0xc1]
9709 ; X64-NEXT: retq ## encoding: [0xc3]
9710 %res = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> zeroinitializer, i8 %x3)
9714 declare <16 x i32> @llvm.x86.avx512.mask.prol.d.512(<16 x i32>, i32, <16 x i32>, i16)
9716 define <16 x i32>@test_int_x86_avx512_mask_prol_d_512(<16 x i32> %x0, i32 %x1, <16 x i32> %x2, i16 %x3) {
9717 ; X86-LABEL: test_int_x86_avx512_mask_prol_d_512:
9719 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
9720 ; X86-NEXT: vprold $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xc8,0x03]
9721 ; X86-NEXT: vprold $4, %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0x6d,0xc9,0x72,0xc8,0x04]
9722 ; X86-NEXT: vprold $5, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0x72,0xc8,0x05]
9723 ; X86-NEXT: vpaddd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc0]
9724 ; X86-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0]
9725 ; X86-NEXT: retl ## encoding: [0xc3]
9727 ; X64-LABEL: test_int_x86_avx512_mask_prol_d_512:
9729 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
9730 ; X64-NEXT: vprold $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xc8,0x03]
9731 ; X64-NEXT: vprold $4, %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0x6d,0xc9,0x72,0xc8,0x04]
9732 ; X64-NEXT: vprold $5, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0x72,0xc8,0x05]
9733 ; X64-NEXT: vpaddd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc0]
9734 ; X64-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0]
9735 ; X64-NEXT: retq ## encoding: [0xc3]
9736 %res = call <16 x i32> @llvm.x86.avx512.mask.prol.d.512(<16 x i32> %x0, i32 3, <16 x i32> %x2, i16 %x3)
9737 %res1 = call <16 x i32> @llvm.x86.avx512.mask.prol.d.512(<16 x i32> %x0, i32 4, <16 x i32> zeroinitializer, i16 %x3)
9738 %res2 = call <16 x i32> @llvm.x86.avx512.mask.prol.d.512(<16 x i32> %x0, i32 5, <16 x i32> %x2, i16 -1)
9739 %res3 = add <16 x i32> %res, %res1
9740 %res4 = add <16 x i32> %res3, %res2
9741 ret <16 x i32> %res4
9744 declare <8 x i64> @llvm.x86.avx512.mask.prol.q.512(<8 x i64>, i32, <8 x i64>, i8)
9746 define <8 x i64>@test_int_x86_avx512_mask_prol_q_512(<8 x i64> %x0, i32 %x1, <8 x i64> %x2, i8 %x3) {
9747 ; X86-LABEL: test_int_x86_avx512_mask_prol_q_512:
9749 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x08]
9750 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
9751 ; X86-NEXT: vprolq $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x72,0xc8,0x03]
9752 ; X86-NEXT: vprolq $4, %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0xed,0xc9,0x72,0xc8,0x04]
9753 ; X86-NEXT: vprolq $5, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x72,0xc8,0x05]
9754 ; X86-NEXT: vpaddq %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0]
9755 ; X86-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
9756 ; X86-NEXT: retl ## encoding: [0xc3]
9758 ; X64-LABEL: test_int_x86_avx512_mask_prol_q_512:
9760 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
9761 ; X64-NEXT: vprolq $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x72,0xc8,0x03]
9762 ; X64-NEXT: vprolq $4, %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0xed,0xc9,0x72,0xc8,0x04]
9763 ; X64-NEXT: vprolq $5, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x72,0xc8,0x05]
9764 ; X64-NEXT: vpaddq %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0]
9765 ; X64-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
9766 ; X64-NEXT: retq ## encoding: [0xc3]
9767 %res = call <8 x i64> @llvm.x86.avx512.mask.prol.q.512(<8 x i64> %x0, i32 3, <8 x i64> %x2, i8 %x3)
9768 %res1 = call <8 x i64> @llvm.x86.avx512.mask.prol.q.512(<8 x i64> %x0, i32 4, <8 x i64> zeroinitializer, i8 %x3)
9769 %res2 = call <8 x i64> @llvm.x86.avx512.mask.prol.q.512(<8 x i64> %x0, i32 5, <8 x i64> %x2, i8 -1)
9770 %res3 = add <8 x i64> %res, %res1
9771 %res4 = add <8 x i64> %res3, %res2
9775 declare <16 x i32> @llvm.x86.avx512.mask.pror.d.512(<16 x i32>, i32, <16 x i32>, i16)
9777 define <16 x i32>@test_int_x86_avx512_mask_pror_d_512(<16 x i32> %x0, i32 %x1, <16 x i32> %x2, i16 %x3) {
9778 ; X86-LABEL: test_int_x86_avx512_mask_pror_d_512:
9780 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
9781 ; X86-NEXT: vprord $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xc0,0x03]
9782 ; X86-NEXT: vprord $4, %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0x6d,0xc9,0x72,0xc0,0x04]
9783 ; X86-NEXT: vprord $5, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0x72,0xc0,0x05]
9784 ; X86-NEXT: vpaddd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc0]
9785 ; X86-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0]
9786 ; X86-NEXT: retl ## encoding: [0xc3]
9788 ; X64-LABEL: test_int_x86_avx512_mask_pror_d_512:
9790 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
9791 ; X64-NEXT: vprord $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xc0,0x03]
9792 ; X64-NEXT: vprord $4, %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0x6d,0xc9,0x72,0xc0,0x04]
9793 ; X64-NEXT: vprord $5, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0x72,0xc0,0x05]
9794 ; X64-NEXT: vpaddd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc0]
9795 ; X64-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0]
9796 ; X64-NEXT: retq ## encoding: [0xc3]
9797 %res = call <16 x i32> @llvm.x86.avx512.mask.pror.d.512(<16 x i32> %x0, i32 3, <16 x i32> %x2, i16 %x3)
9798 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pror.d.512(<16 x i32> %x0, i32 4, <16 x i32> zeroinitializer, i16 %x3)
9799 %res2 = call <16 x i32> @llvm.x86.avx512.mask.pror.d.512(<16 x i32> %x0, i32 5, <16 x i32> %x2, i16 -1)
9800 %res3 = add <16 x i32> %res, %res1
9801 %res4 = add <16 x i32> %res3, %res2
9802 ret <16 x i32> %res4
9805 declare <8 x i64> @llvm.x86.avx512.mask.pror.q.512(<8 x i64>, i32, <8 x i64>, i8)
9807 define <8 x i64>@test_int_x86_avx512_mask_pror_q_512(<8 x i64> %x0, i32 %x1, <8 x i64> %x2, i8 %x3) {
9808 ; X86-LABEL: test_int_x86_avx512_mask_pror_q_512:
9810 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x08]
9811 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
9812 ; X86-NEXT: vprorq $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x72,0xc0,0x03]
9813 ; X86-NEXT: vprorq $4, %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0xed,0xc9,0x72,0xc0,0x04]
9814 ; X86-NEXT: vprorq $5, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x72,0xc0,0x05]
9815 ; X86-NEXT: vpaddq %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0]
9816 ; X86-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
9817 ; X86-NEXT: retl ## encoding: [0xc3]
9819 ; X64-LABEL: test_int_x86_avx512_mask_pror_q_512:
9821 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
9822 ; X64-NEXT: vprorq $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x72,0xc0,0x03]
9823 ; X64-NEXT: vprorq $4, %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0xed,0xc9,0x72,0xc0,0x04]
9824 ; X64-NEXT: vprorq $5, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x72,0xc0,0x05]
9825 ; X64-NEXT: vpaddq %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0]
9826 ; X64-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
9827 ; X64-NEXT: retq ## encoding: [0xc3]
9828 %res = call <8 x i64> @llvm.x86.avx512.mask.pror.q.512(<8 x i64> %x0, i32 3, <8 x i64> %x2, i8 %x3)
9829 %res1 = call <8 x i64> @llvm.x86.avx512.mask.pror.q.512(<8 x i64> %x0, i32 4, <8 x i64> zeroinitializer, i8 %x3)
9830 %res2 = call <8 x i64> @llvm.x86.avx512.mask.pror.q.512(<8 x i64> %x0, i32 5, <8 x i64> %x2, i8 -1)
9831 %res3 = add <8 x i64> %res, %res1
9832 %res4 = add <8 x i64> %res3, %res2
9836 declare <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32)
9838 define <2 x double>@test_int_x86_avx512_mask_vfmadd_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3,i32 %x4 ){
9839 ; X86-LABEL: test_int_x86_avx512_mask_vfmadd_sd:
9841 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
9842 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
9843 ; X86-NEXT: vmovapd %xmm0, %xmm3 ## encoding: [0xc5,0xf9,0x28,0xd8]
9844 ; X86-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm3 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xa9,0xda]
9845 ; X86-NEXT: ## xmm3 {%k1} = (xmm1 * xmm3) + xmm2
9846 ; X86-NEXT: vmovapd %xmm0, %xmm4 ## encoding: [0xc5,0xf9,0x28,0xe0]
9847 ; X86-NEXT: vfmadd213sd {rz-sae}, %xmm2, %xmm1, %xmm4 ## encoding: [0x62,0xf2,0xf5,0x78,0xa9,0xe2]
9848 ; X86-NEXT: vaddpd %xmm4, %xmm3, %xmm3 ## encoding: [0xc5,0xe1,0x58,0xdc]
9849 ; X86-NEXT: vfmadd213sd {ru-sae}, %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x59,0xa9,0xc2]
9850 ; X86-NEXT: vaddpd %xmm3, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x58,0xc3]
9851 ; X86-NEXT: retl ## encoding: [0xc3]
9853 ; X64-LABEL: test_int_x86_avx512_mask_vfmadd_sd:
9855 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
9856 ; X64-NEXT: vmovapd %xmm0, %xmm3 ## encoding: [0xc5,0xf9,0x28,0xd8]
9857 ; X64-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm3 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xa9,0xda]
9858 ; X64-NEXT: ## xmm3 {%k1} = (xmm1 * xmm3) + xmm2
9859 ; X64-NEXT: vmovapd %xmm0, %xmm4 ## encoding: [0xc5,0xf9,0x28,0xe0]
9860 ; X64-NEXT: vfmadd213sd {rz-sae}, %xmm2, %xmm1, %xmm4 ## encoding: [0x62,0xf2,0xf5,0x78,0xa9,0xe2]
9861 ; X64-NEXT: vaddpd %xmm4, %xmm3, %xmm3 ## encoding: [0xc5,0xe1,0x58,0xdc]
9862 ; X64-NEXT: vfmadd213sd {ru-sae}, %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x59,0xa9,0xc2]
9863 ; X64-NEXT: vaddpd %xmm3, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x58,0xc3]
9864 ; X64-NEXT: retq ## encoding: [0xc3]
9865 %res = call <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 4)
9866 %res1 = call <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1, i32 11)
9867 %res2 = call <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 10)
9868 %res3 = fadd <2 x double> %res, %res1
9869 %res4 = fadd <2 x double> %res2, %res3
9870 ret <2 x double> %res4
9873 declare <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32)
9875 define <4 x float>@test_int_x86_avx512_mask_vfmadd_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3,i32 %x4 ){
9876 ; X86-LABEL: test_int_x86_avx512_mask_vfmadd_ss:
9878 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
9879 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
9880 ; X86-NEXT: vmovaps %xmm0, %xmm3 ## encoding: [0xc5,0xf8,0x28,0xd8]
9881 ; X86-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm3 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xa9,0xda]
9882 ; X86-NEXT: ## xmm3 {%k1} = (xmm1 * xmm3) + xmm2
9883 ; X86-NEXT: vmovaps %xmm0, %xmm4 ## encoding: [0xc5,0xf8,0x28,0xe0]
9884 ; X86-NEXT: vfmadd213ss {rz-sae}, %xmm2, %xmm1, %xmm4 ## encoding: [0x62,0xf2,0x75,0x78,0xa9,0xe2]
9885 ; X86-NEXT: vaddps %xmm4, %xmm3, %xmm3 ## encoding: [0xc5,0xe0,0x58,0xdc]
9886 ; X86-NEXT: vfmadd213ss {ru-sae}, %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x59,0xa9,0xc2]
9887 ; X86-NEXT: vaddps %xmm3, %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0x58,0xc3]
9888 ; X86-NEXT: retl ## encoding: [0xc3]
9890 ; X64-LABEL: test_int_x86_avx512_mask_vfmadd_ss:
9892 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
9893 ; X64-NEXT: vmovaps %xmm0, %xmm3 ## encoding: [0xc5,0xf8,0x28,0xd8]
9894 ; X64-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm3 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xa9,0xda]
9895 ; X64-NEXT: ## xmm3 {%k1} = (xmm1 * xmm3) + xmm2
9896 ; X64-NEXT: vmovaps %xmm0, %xmm4 ## encoding: [0xc5,0xf8,0x28,0xe0]
9897 ; X64-NEXT: vfmadd213ss {rz-sae}, %xmm2, %xmm1, %xmm4 ## encoding: [0x62,0xf2,0x75,0x78,0xa9,0xe2]
9898 ; X64-NEXT: vaddps %xmm4, %xmm3, %xmm3 ## encoding: [0xc5,0xe0,0x58,0xdc]
9899 ; X64-NEXT: vfmadd213ss {ru-sae}, %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x59,0xa9,0xc2]
9900 ; X64-NEXT: vaddps %xmm3, %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0x58,0xc3]
9901 ; X64-NEXT: retq ## encoding: [0xc3]
9902 %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 4)
9903 %res1 = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1, i32 11)
9904 %res2 = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 10)
9905 %res3 = fadd <4 x float> %res, %res1
9906 %res4 = fadd <4 x float> %res2, %res3
9907 ret <4 x float> %res4
9910 declare <2 x double> @llvm.x86.avx512.maskz.vfmadd.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32)
9912 define <2 x double>@test_int_x86_avx512_maskz_vfmadd_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3,i32 %x4 ){
9913 ; X86-LABEL: test_int_x86_avx512_maskz_vfmadd_sd:
9915 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
9916 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
9917 ; X86-NEXT: vmovapd %xmm0, %xmm3 ## encoding: [0xc5,0xf9,0x28,0xd8]
9918 ; X86-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm3 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0x89,0xa9,0xda]
9919 ; X86-NEXT: ## xmm3 {%k1} {z} = (xmm1 * xmm3) + xmm2
9920 ; X86-NEXT: vfmadd213sd {rz-sae}, %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0xf9,0xa9,0xc2]
9921 ; X86-NEXT: vaddpd %xmm0, %xmm3, %xmm0 ## encoding: [0xc5,0xe1,0x58,0xc0]
9922 ; X86-NEXT: retl ## encoding: [0xc3]
9924 ; X64-LABEL: test_int_x86_avx512_maskz_vfmadd_sd:
9926 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
9927 ; X64-NEXT: vmovapd %xmm0, %xmm3 ## encoding: [0xc5,0xf9,0x28,0xd8]
9928 ; X64-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm3 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0x89,0xa9,0xda]
9929 ; X64-NEXT: ## xmm3 {%k1} {z} = (xmm1 * xmm3) + xmm2
9930 ; X64-NEXT: vfmadd213sd {rz-sae}, %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0xf9,0xa9,0xc2]
9931 ; X64-NEXT: vaddpd %xmm0, %xmm3, %xmm0 ## encoding: [0xc5,0xe1,0x58,0xc0]
9932 ; X64-NEXT: retq ## encoding: [0xc3]
9933 %res = call <2 x double> @llvm.x86.avx512.maskz.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 4)
9934 %res1 = call <2 x double> @llvm.x86.avx512.maskz.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 11)
9935 %res2 = fadd <2 x double> %res, %res1
9936 ret <2 x double> %res2
9939 declare <4 x float> @llvm.x86.avx512.maskz.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32)
9941 define <4 x float>@test_int_x86_avx512_maskz_vfmadd_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3,i32 %x4 ){
9942 ; X86-LABEL: test_int_x86_avx512_maskz_vfmadd_ss:
9944 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
9945 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
9946 ; X86-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x89,0xa9,0xc2]
9947 ; X86-NEXT: ## xmm0 {%k1} {z} = (xmm1 * xmm0) + xmm2
9948 ; X86-NEXT: retl ## encoding: [0xc3]
9950 ; X64-LABEL: test_int_x86_avx512_maskz_vfmadd_ss:
9952 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
9953 ; X64-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x89,0xa9,0xc2]
9954 ; X64-NEXT: ## xmm0 {%k1} {z} = (xmm1 * xmm0) + xmm2
9955 ; X64-NEXT: retq ## encoding: [0xc3]
9956 %res = call <4 x float> @llvm.x86.avx512.maskz.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 4)
9957 %res1 = call <4 x float> @llvm.x86.avx512.maskz.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 11)
9958 %res2 = fadd <4 x float> %res, %res1
9959 ret <4 x float> %res
9961 declare <2 x double> @llvm.x86.avx512.mask3.vfmadd.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32)
9963 define <2 x double>@test_int_x86_avx512_mask3_vfmadd_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3,i32 %x4 ){
9964 ; X86-LABEL: test_int_x86_avx512_mask3_vfmadd_sd:
9966 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
9967 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
9968 ; X86-NEXT: vmovapd %xmm2, %xmm3 ## encoding: [0xc5,0xf9,0x28,0xda]
9969 ; X86-NEXT: vfmadd231sd %xmm1, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xb9,0xd9]
9970 ; X86-NEXT: ## xmm3 {%k1} = (xmm0 * xmm1) + xmm3
9971 ; X86-NEXT: vmovapd %xmm2, %xmm4 ## encoding: [0xc5,0xf9,0x28,0xe2]
9972 ; X86-NEXT: vfmadd231sd {rz-sae}, %xmm1, %xmm0, %xmm4 ## encoding: [0x62,0xf2,0xfd,0x78,0xb9,0xe1]
9973 ; X86-NEXT: vaddpd %xmm4, %xmm3, %xmm3 ## encoding: [0xc5,0xe1,0x58,0xdc]
9974 ; X86-NEXT: vfmadd231sd {ru-sae}, %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x59,0xb9,0xd1]
9975 ; X86-NEXT: vaddpd %xmm3, %xmm2, %xmm0 ## encoding: [0xc5,0xe9,0x58,0xc3]
9976 ; X86-NEXT: retl ## encoding: [0xc3]
9978 ; X64-LABEL: test_int_x86_avx512_mask3_vfmadd_sd:
9980 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
9981 ; X64-NEXT: vmovapd %xmm2, %xmm3 ## encoding: [0xc5,0xf9,0x28,0xda]
9982 ; X64-NEXT: vfmadd231sd %xmm1, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xb9,0xd9]
9983 ; X64-NEXT: ## xmm3 {%k1} = (xmm0 * xmm1) + xmm3
9984 ; X64-NEXT: vmovapd %xmm2, %xmm4 ## encoding: [0xc5,0xf9,0x28,0xe2]
9985 ; X64-NEXT: vfmadd231sd {rz-sae}, %xmm1, %xmm0, %xmm4 ## encoding: [0x62,0xf2,0xfd,0x78,0xb9,0xe1]
9986 ; X64-NEXT: vaddpd %xmm4, %xmm3, %xmm3 ## encoding: [0xc5,0xe1,0x58,0xdc]
9987 ; X64-NEXT: vfmadd231sd {ru-sae}, %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x59,0xb9,0xd1]
9988 ; X64-NEXT: vaddpd %xmm3, %xmm2, %xmm0 ## encoding: [0xc5,0xe9,0x58,0xc3]
9989 ; X64-NEXT: retq ## encoding: [0xc3]
9990 %res = call <2 x double> @llvm.x86.avx512.mask3.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 4)
9991 %res1 = call <2 x double> @llvm.x86.avx512.mask3.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1, i32 11)
9992 %res2 = call <2 x double> @llvm.x86.avx512.mask3.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 10)
9993 %res3 = fadd <2 x double> %res, %res1
9994 %res4 = fadd <2 x double> %res2, %res3
9995 ret <2 x double> %res4
9998 declare <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32)
10000 define <4 x float>@test_int_x86_avx512_mask3_vfmadd_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3,i32 %x4 ){
10001 ; X86-LABEL: test_int_x86_avx512_mask3_vfmadd_ss:
10003 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
10004 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
10005 ; X86-NEXT: vmovaps %xmm2, %xmm3 ## encoding: [0xc5,0xf8,0x28,0xda]
10006 ; X86-NEXT: vfmadd231ss %xmm1, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xb9,0xd9]
10007 ; X86-NEXT: ## xmm3 {%k1} = (xmm0 * xmm1) + xmm3
10008 ; X86-NEXT: vmovaps %xmm2, %xmm4 ## encoding: [0xc5,0xf8,0x28,0xe2]
10009 ; X86-NEXT: vfmadd231ss {rz-sae}, %xmm1, %xmm0, %xmm4 ## encoding: [0x62,0xf2,0x7d,0x78,0xb9,0xe1]
10010 ; X86-NEXT: vaddps %xmm4, %xmm3, %xmm3 ## encoding: [0xc5,0xe0,0x58,0xdc]
10011 ; X86-NEXT: vfmadd231ss {ru-sae}, %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x59,0xb9,0xd1]
10012 ; X86-NEXT: vaddps %xmm3, %xmm2, %xmm0 ## encoding: [0xc5,0xe8,0x58,0xc3]
10013 ; X86-NEXT: retl ## encoding: [0xc3]
10015 ; X64-LABEL: test_int_x86_avx512_mask3_vfmadd_ss:
10017 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
10018 ; X64-NEXT: vmovaps %xmm2, %xmm3 ## encoding: [0xc5,0xf8,0x28,0xda]
10019 ; X64-NEXT: vfmadd231ss %xmm1, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xb9,0xd9]
10020 ; X64-NEXT: ## xmm3 {%k1} = (xmm0 * xmm1) + xmm3
10021 ; X64-NEXT: vmovaps %xmm2, %xmm4 ## encoding: [0xc5,0xf8,0x28,0xe2]
10022 ; X64-NEXT: vfmadd231ss {rz-sae}, %xmm1, %xmm0, %xmm4 ## encoding: [0x62,0xf2,0x7d,0x78,0xb9,0xe1]
10023 ; X64-NEXT: vaddps %xmm4, %xmm3, %xmm3 ## encoding: [0xc5,0xe0,0x58,0xdc]
10024 ; X64-NEXT: vfmadd231ss {ru-sae}, %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x59,0xb9,0xd1]
10025 ; X64-NEXT: vaddps %xmm3, %xmm2, %xmm0 ## encoding: [0xc5,0xe8,0x58,0xc3]
10026 ; X64-NEXT: retq ## encoding: [0xc3]
10027 %res = call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 4)
10028 %res1 = call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1, i32 11)
10029 %res2 = call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 10)
10030 %res3 = fadd <4 x float> %res, %res1
10031 %res4 = fadd <4 x float> %res2, %res3
10032 ret <4 x float> %res4
10035 define void @fmadd_ss_mask_memfold(float* %a, float* %b, i8 %c) {
10036 ; X86-LABEL: fmadd_ss_mask_memfold:
10038 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x0c]
10039 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x08]
10040 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ## encoding: [0x8b,0x54,0x24,0x04]
10041 ; X86-NEXT: vmovss (%edx), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x02]
10042 ; X86-NEXT: ## xmm0 = mem[0],zero,zero,zero
10043 ; X86-NEXT: vmovss (%ecx), %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x09]
10044 ; X86-NEXT: ## xmm1 = mem[0],zero,zero,zero
10045 ; X86-NEXT: vfmadd213ss %xmm0, %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xa9,0xc8]
10046 ; X86-NEXT: ## xmm1 = (xmm0 * xmm1) + xmm0
10047 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
10048 ; X86-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf1,0x7e,0x09,0x10,0xc1]
10049 ; X86-NEXT: vmovss %xmm0, (%edx) ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x11,0x02]
10050 ; X86-NEXT: retl ## encoding: [0xc3]
10052 ; X64-LABEL: fmadd_ss_mask_memfold:
10054 ; X64-NEXT: vmovss (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x07]
10055 ; X64-NEXT: ## xmm0 = mem[0],zero,zero,zero
10056 ; X64-NEXT: vmovss (%rsi), %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x0e]
10057 ; X64-NEXT: ## xmm1 = mem[0],zero,zero,zero
10058 ; X64-NEXT: vfmadd213ss %xmm0, %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xa9,0xc8]
10059 ; X64-NEXT: ## xmm1 = (xmm0 * xmm1) + xmm0
10060 ; X64-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
10061 ; X64-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf1,0x7e,0x09,0x10,0xc1]
10062 ; X64-NEXT: vmovss %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x11,0x07]
10063 ; X64-NEXT: retq ## encoding: [0xc3]
10064 %a.val = load float, float* %a
10065 %av0 = insertelement <4 x float> undef, float %a.val, i32 0
10066 %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1
10067 %av2 = insertelement <4 x float> %av1, float 0.000000e+00, i32 2
10068 %av = insertelement <4 x float> %av2, float 0.000000e+00, i32 3
10070 %b.val = load float, float* %b
10071 %bv0 = insertelement <4 x float> undef, float %b.val, i32 0
10072 %bv1 = insertelement <4 x float> %bv0, float 0.000000e+00, i32 1
10073 %bv2 = insertelement <4 x float> %bv1, float 0.000000e+00, i32 2
10074 %bv = insertelement <4 x float> %bv2, float 0.000000e+00, i32 3
10076 %vr = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %av, <4 x float> %bv, <4 x float> %av, i8 %c, i32 4)
10078 %sr = extractelement <4 x float> %vr, i32 0
10079 store float %sr, float* %a
10083 define void @fmadd_ss_maskz_memfold(float* %a, float* %b, i8 %c) {
10084 ; X86-LABEL: fmadd_ss_maskz_memfold:
10086 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x0c]
10087 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x08]
10088 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ## encoding: [0x8b,0x54,0x24,0x04]
10089 ; X86-NEXT: vmovss (%edx), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x02]
10090 ; X86-NEXT: ## xmm0 = mem[0],zero,zero,zero
10091 ; X86-NEXT: vfmadd231ss (%ecx), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xb9,0x01]
10092 ; X86-NEXT: ## xmm0 = (xmm0 * mem) + xmm0
10093 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
10094 ; X86-NEXT: vmovss %xmm0, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0x89,0x10,0xc0]
10095 ; X86-NEXT: vmovss %xmm0, (%edx) ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x11,0x02]
10096 ; X86-NEXT: retl ## encoding: [0xc3]
10098 ; X64-LABEL: fmadd_ss_maskz_memfold:
10100 ; X64-NEXT: vmovss (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x07]
10101 ; X64-NEXT: ## xmm0 = mem[0],zero,zero,zero
10102 ; X64-NEXT: vfmadd231ss (%rsi), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xb9,0x06]
10103 ; X64-NEXT: ## xmm0 = (xmm0 * mem) + xmm0
10104 ; X64-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
10105 ; X64-NEXT: vmovss %xmm0, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0x89,0x10,0xc0]
10106 ; X64-NEXT: vmovss %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x11,0x07]
10107 ; X64-NEXT: retq ## encoding: [0xc3]
10108 %a.val = load float, float* %a
10109 %av0 = insertelement <4 x float> undef, float %a.val, i32 0
10110 %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1
10111 %av2 = insertelement <4 x float> %av1, float 0.000000e+00, i32 2
10112 %av = insertelement <4 x float> %av2, float 0.000000e+00, i32 3
10114 %b.val = load float, float* %b
10115 %bv0 = insertelement <4 x float> undef, float %b.val, i32 0
10116 %bv1 = insertelement <4 x float> %bv0, float 0.000000e+00, i32 1
10117 %bv2 = insertelement <4 x float> %bv1, float 0.000000e+00, i32 2
10118 %bv = insertelement <4 x float> %bv2, float 0.000000e+00, i32 3
10120 %vr = call <4 x float> @llvm.x86.avx512.maskz.vfmadd.ss(<4 x float> %av, <4 x float> %bv, <4 x float> %av, i8 %c, i32 4)
10122 %sr = extractelement <4 x float> %vr, i32 0
10123 store float %sr, float* %a
10127 define void @fmadd_sd_mask_memfold(double* %a, double* %b, i8 %c) {
10128 ; X86-LABEL: fmadd_sd_mask_memfold:
10130 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x0c]
10131 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x08]
10132 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ## encoding: [0x8b,0x54,0x24,0x04]
10133 ; X86-NEXT: vmovsd (%edx), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x02]
10134 ; X86-NEXT: ## xmm0 = mem[0],zero
10135 ; X86-NEXT: vmovsd (%ecx), %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x09]
10136 ; X86-NEXT: ## xmm1 = mem[0],zero
10137 ; X86-NEXT: vfmadd213sd %xmm0, %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xa9,0xc8]
10138 ; X86-NEXT: ## xmm1 = (xmm0 * xmm1) + xmm0
10139 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
10140 ; X86-NEXT: vmovsd %xmm1, %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf1,0xff,0x09,0x10,0xc1]
10141 ; X86-NEXT: vmovsd %xmm0, (%edx) ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x02]
10142 ; X86-NEXT: retl ## encoding: [0xc3]
10144 ; X64-LABEL: fmadd_sd_mask_memfold:
10146 ; X64-NEXT: vmovsd (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07]
10147 ; X64-NEXT: ## xmm0 = mem[0],zero
10148 ; X64-NEXT: vmovsd (%rsi), %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x0e]
10149 ; X64-NEXT: ## xmm1 = mem[0],zero
10150 ; X64-NEXT: vfmadd213sd %xmm0, %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xa9,0xc8]
10151 ; X64-NEXT: ## xmm1 = (xmm0 * xmm1) + xmm0
10152 ; X64-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
10153 ; X64-NEXT: vmovsd %xmm1, %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf1,0xff,0x09,0x10,0xc1]
10154 ; X64-NEXT: vmovsd %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x07]
10155 ; X64-NEXT: retq ## encoding: [0xc3]
10156 %a.val = load double, double* %a
10157 %av0 = insertelement <2 x double> undef, double %a.val, i32 0
10158 %av = insertelement <2 x double> %av0, double 0.000000e+00, i32 1
10160 %b.val = load double, double* %b
10161 %bv0 = insertelement <2 x double> undef, double %b.val, i32 0
10162 %bv = insertelement <2 x double> %bv0, double 0.000000e+00, i32 1
10164 %vr = call <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double> %av, <2 x double> %bv, <2 x double> %av, i8 %c, i32 4)
10166 %sr = extractelement <2 x double> %vr, i32 0
10167 store double %sr, double* %a
10171 define void @fmadd_sd_maskz_memfold(double* %a, double* %b, i8 %c) {
10172 ; X86-LABEL: fmadd_sd_maskz_memfold:
10174 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x0c]
10175 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x08]
10176 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ## encoding: [0x8b,0x54,0x24,0x04]
10177 ; X86-NEXT: vmovsd (%edx), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x02]
10178 ; X86-NEXT: ## xmm0 = mem[0],zero
10179 ; X86-NEXT: vfmadd231sd (%ecx), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xb9,0x01]
10180 ; X86-NEXT: ## xmm0 = (xmm0 * mem) + xmm0
10181 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
10182 ; X86-NEXT: vmovsd %xmm0, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xff,0x89,0x10,0xc0]
10183 ; X86-NEXT: vmovsd %xmm0, (%edx) ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x02]
10184 ; X86-NEXT: retl ## encoding: [0xc3]
10186 ; X64-LABEL: fmadd_sd_maskz_memfold:
10188 ; X64-NEXT: vmovsd (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07]
10189 ; X64-NEXT: ## xmm0 = mem[0],zero
10190 ; X64-NEXT: vfmadd231sd (%rsi), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xb9,0x06]
10191 ; X64-NEXT: ## xmm0 = (xmm0 * mem) + xmm0
10192 ; X64-NEXT: kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
10193 ; X64-NEXT: vmovsd %xmm0, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xff,0x89,0x10,0xc0]
10194 ; X64-NEXT: vmovsd %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x07]
10195 ; X64-NEXT: retq ## encoding: [0xc3]
10196 %a.val = load double, double* %a
10197 %av0 = insertelement <2 x double> undef, double %a.val, i32 0
10198 %av = insertelement <2 x double> %av0, double 0.000000e+00, i32 1
10200 %b.val = load double, double* %b
10201 %bv0 = insertelement <2 x double> undef, double %b.val, i32 0
10202 %bv = insertelement <2 x double> %bv0, double 0.000000e+00, i32 1
10204 %vr = call <2 x double> @llvm.x86.avx512.maskz.vfmadd.sd(<2 x double> %av, <2 x double> %bv, <2 x double> %av, i8 %c, i32 4)
10206 %sr = extractelement <2 x double> %vr, i32 0
10207 store double %sr, double* %a
10211 declare <2 x double> @llvm.x86.avx512.mask3.vfmsub.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32)
10213 define <2 x double>@test_int_x86_avx512_mask3_vfmsub_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3,i32 %x4 ){
10214 ; X86-LABEL: test_int_x86_avx512_mask3_vfmsub_sd:
10216 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
10217 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
10218 ; X86-NEXT: vmovapd %xmm2, %xmm3 ## encoding: [0xc5,0xf9,0x28,0xda]
10219 ; X86-NEXT: vfmsub231sd %xmm1, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xbb,0xd9]
10220 ; X86-NEXT: ## xmm3 {%k1} = (xmm0 * xmm1) - xmm3
10221 ; X86-NEXT: vmovapd %xmm2, %xmm4 ## encoding: [0xc5,0xf9,0x28,0xe2]
10222 ; X86-NEXT: vfmsub231sd {rz-sae}, %xmm1, %xmm0, %xmm4 ## encoding: [0x62,0xf2,0xfd,0x78,0xbb,0xe1]
10223 ; X86-NEXT: vaddpd %xmm4, %xmm3, %xmm3 ## encoding: [0xc5,0xe1,0x58,0xdc]
10224 ; X86-NEXT: vfmsub231sd {ru-sae}, %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x59,0xbb,0xd1]
10225 ; X86-NEXT: vaddpd %xmm3, %xmm2, %xmm0 ## encoding: [0xc5,0xe9,0x58,0xc3]
10226 ; X86-NEXT: retl ## encoding: [0xc3]
10228 ; X64-LABEL: test_int_x86_avx512_mask3_vfmsub_sd:
10230 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
10231 ; X64-NEXT: vmovapd %xmm2, %xmm3 ## encoding: [0xc5,0xf9,0x28,0xda]
10232 ; X64-NEXT: vfmsub231sd %xmm1, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xbb,0xd9]
10233 ; X64-NEXT: ## xmm3 {%k1} = (xmm0 * xmm1) - xmm3
10234 ; X64-NEXT: vmovapd %xmm2, %xmm4 ## encoding: [0xc5,0xf9,0x28,0xe2]
10235 ; X64-NEXT: vfmsub231sd {rz-sae}, %xmm1, %xmm0, %xmm4 ## encoding: [0x62,0xf2,0xfd,0x78,0xbb,0xe1]
10236 ; X64-NEXT: vaddpd %xmm4, %xmm3, %xmm3 ## encoding: [0xc5,0xe1,0x58,0xdc]
10237 ; X64-NEXT: vfmsub231sd {ru-sae}, %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x59,0xbb,0xd1]
10238 ; X64-NEXT: vaddpd %xmm3, %xmm2, %xmm0 ## encoding: [0xc5,0xe9,0x58,0xc3]
10239 ; X64-NEXT: retq ## encoding: [0xc3]
10240 %res = call <2 x double> @llvm.x86.avx512.mask3.vfmsub.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 4)
10241 %res1 = call <2 x double> @llvm.x86.avx512.mask3.vfmsub.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1, i32 11)
10242 %res2 = call <2 x double> @llvm.x86.avx512.mask3.vfmsub.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 10)
10243 %res3 = fadd <2 x double> %res, %res1
10244 %res4 = fadd <2 x double> %res2, %res3
10245 ret <2 x double> %res4
10248 declare <4 x float> @llvm.x86.avx512.mask3.vfmsub.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32)
10250 define <4 x float>@test_int_x86_avx512_mask3_vfmsub_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3,i32 %x4 ){
10251 ; X86-LABEL: test_int_x86_avx512_mask3_vfmsub_ss:
10253 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
10254 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
10255 ; X86-NEXT: vmovaps %xmm2, %xmm3 ## encoding: [0xc5,0xf8,0x28,0xda]
10256 ; X86-NEXT: vfmsub231ss %xmm1, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xbb,0xd9]
10257 ; X86-NEXT: ## xmm3 {%k1} = (xmm0 * xmm1) - xmm3
10258 ; X86-NEXT: vmovaps %xmm2, %xmm4 ## encoding: [0xc5,0xf8,0x28,0xe2]
10259 ; X86-NEXT: vfmsub231ss {rz-sae}, %xmm1, %xmm0, %xmm4 ## encoding: [0x62,0xf2,0x7d,0x78,0xbb,0xe1]
10260 ; X86-NEXT: vaddps %xmm4, %xmm3, %xmm3 ## encoding: [0xc5,0xe0,0x58,0xdc]
10261 ; X86-NEXT: vfmsub231ss {ru-sae}, %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x59,0xbb,0xd1]
10262 ; X86-NEXT: vaddps %xmm3, %xmm2, %xmm0 ## encoding: [0xc5,0xe8,0x58,0xc3]
10263 ; X86-NEXT: retl ## encoding: [0xc3]
10265 ; X64-LABEL: test_int_x86_avx512_mask3_vfmsub_ss:
10267 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
10268 ; X64-NEXT: vmovaps %xmm2, %xmm3 ## encoding: [0xc5,0xf8,0x28,0xda]
10269 ; X64-NEXT: vfmsub231ss %xmm1, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xbb,0xd9]
10270 ; X64-NEXT: ## xmm3 {%k1} = (xmm0 * xmm1) - xmm3
10271 ; X64-NEXT: vmovaps %xmm2, %xmm4 ## encoding: [0xc5,0xf8,0x28,0xe2]
10272 ; X64-NEXT: vfmsub231ss {rz-sae}, %xmm1, %xmm0, %xmm4 ## encoding: [0x62,0xf2,0x7d,0x78,0xbb,0xe1]
10273 ; X64-NEXT: vaddps %xmm4, %xmm3, %xmm3 ## encoding: [0xc5,0xe0,0x58,0xdc]
10274 ; X64-NEXT: vfmsub231ss {ru-sae}, %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x59,0xbb,0xd1]
10275 ; X64-NEXT: vaddps %xmm3, %xmm2, %xmm0 ## encoding: [0xc5,0xe8,0x58,0xc3]
10276 ; X64-NEXT: retq ## encoding: [0xc3]
10277 %res = call <4 x float> @llvm.x86.avx512.mask3.vfmsub.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 4)
10278 %res1 = call <4 x float> @llvm.x86.avx512.mask3.vfmsub.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1, i32 11)
10279 %res2 = call <4 x float> @llvm.x86.avx512.mask3.vfmsub.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 10)
10280 %res3 = fadd <4 x float> %res, %res1
10281 %res4 = fadd <4 x float> %res2, %res3
10282 ret <4 x float> %res4
10285 declare <2 x double> @llvm.x86.avx512.mask3.vfnmsub.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32)
10287 define <2 x double>@test_int_x86_avx512_mask3_vfnmsub_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3,i32 %x4 ){
10288 ; X86-LABEL: test_int_x86_avx512_mask3_vfnmsub_sd:
10290 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
10291 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
10292 ; X86-NEXT: vmovapd %xmm2, %xmm3 ## encoding: [0xc5,0xf9,0x28,0xda]
10293 ; X86-NEXT: vfnmsub231sd %xmm1, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xbf,0xd9]
10294 ; X86-NEXT: ## xmm3 {%k1} = -(xmm0 * xmm1) - xmm3
10295 ; X86-NEXT: vmovapd %xmm2, %xmm4 ## encoding: [0xc5,0xf9,0x28,0xe2]
10296 ; X86-NEXT: vfnmsub231sd {rz-sae}, %xmm1, %xmm0, %xmm4 ## encoding: [0x62,0xf2,0xfd,0x78,0xbf,0xe1]
10297 ; X86-NEXT: vaddpd %xmm4, %xmm3, %xmm3 ## encoding: [0xc5,0xe1,0x58,0xdc]
10298 ; X86-NEXT: vfnmsub231sd {ru-sae}, %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x59,0xbf,0xd1]
10299 ; X86-NEXT: vaddpd %xmm3, %xmm2, %xmm0 ## encoding: [0xc5,0xe9,0x58,0xc3]
10300 ; X86-NEXT: retl ## encoding: [0xc3]
10302 ; X64-LABEL: test_int_x86_avx512_mask3_vfnmsub_sd:
10304 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
10305 ; X64-NEXT: vmovapd %xmm2, %xmm3 ## encoding: [0xc5,0xf9,0x28,0xda]
10306 ; X64-NEXT: vfnmsub231sd %xmm1, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xbf,0xd9]
10307 ; X64-NEXT: ## xmm3 {%k1} = -(xmm0 * xmm1) - xmm3
10308 ; X64-NEXT: vmovapd %xmm2, %xmm4 ## encoding: [0xc5,0xf9,0x28,0xe2]
10309 ; X64-NEXT: vfnmsub231sd {rz-sae}, %xmm1, %xmm0, %xmm4 ## encoding: [0x62,0xf2,0xfd,0x78,0xbf,0xe1]
10310 ; X64-NEXT: vaddpd %xmm4, %xmm3, %xmm3 ## encoding: [0xc5,0xe1,0x58,0xdc]
10311 ; X64-NEXT: vfnmsub231sd {ru-sae}, %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x59,0xbf,0xd1]
10312 ; X64-NEXT: vaddpd %xmm3, %xmm2, %xmm0 ## encoding: [0xc5,0xe9,0x58,0xc3]
10313 ; X64-NEXT: retq ## encoding: [0xc3]
10314 %res = call <2 x double> @llvm.x86.avx512.mask3.vfnmsub.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 4)
10315 %res1 = call <2 x double> @llvm.x86.avx512.mask3.vfnmsub.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1, i32 11)
10316 %res2 = call <2 x double> @llvm.x86.avx512.mask3.vfnmsub.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 10)
10317 %res3 = fadd <2 x double> %res, %res1
10318 %res4 = fadd <2 x double> %res2, %res3
10319 ret <2 x double> %res4
10322 declare <4 x float> @llvm.x86.avx512.mask3.vfnmsub.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32)
10324 define <4 x float>@test_int_x86_avx512_mask3_vfnmsub_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3,i32 %x4 ){
10325 ; X86-LABEL: test_int_x86_avx512_mask3_vfnmsub_ss:
10327 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
10328 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
10329 ; X86-NEXT: vmovaps %xmm2, %xmm3 ## encoding: [0xc5,0xf8,0x28,0xda]
10330 ; X86-NEXT: vfnmsub231ss %xmm1, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xbf,0xd9]
10331 ; X86-NEXT: ## xmm3 {%k1} = -(xmm0 * xmm1) - xmm3
10332 ; X86-NEXT: vmovaps %xmm2, %xmm4 ## encoding: [0xc5,0xf8,0x28,0xe2]
10333 ; X86-NEXT: vfnmsub231ss {rz-sae}, %xmm1, %xmm0, %xmm4 ## encoding: [0x62,0xf2,0x7d,0x78,0xbf,0xe1]
10334 ; X86-NEXT: vaddps %xmm4, %xmm3, %xmm3 ## encoding: [0xc5,0xe0,0x58,0xdc]
10335 ; X86-NEXT: vfnmsub231ss {ru-sae}, %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x59,0xbf,0xd1]
10336 ; X86-NEXT: vaddps %xmm3, %xmm2, %xmm0 ## encoding: [0xc5,0xe8,0x58,0xc3]
10337 ; X86-NEXT: retl ## encoding: [0xc3]
10339 ; X64-LABEL: test_int_x86_avx512_mask3_vfnmsub_ss:
10341 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
10342 ; X64-NEXT: vmovaps %xmm2, %xmm3 ## encoding: [0xc5,0xf8,0x28,0xda]
10343 ; X64-NEXT: vfnmsub231ss %xmm1, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xbf,0xd9]
10344 ; X64-NEXT: ## xmm3 {%k1} = -(xmm0 * xmm1) - xmm3
10345 ; X64-NEXT: vmovaps %xmm2, %xmm4 ## encoding: [0xc5,0xf8,0x28,0xe2]
10346 ; X64-NEXT: vfnmsub231ss {rz-sae}, %xmm1, %xmm0, %xmm4 ## encoding: [0x62,0xf2,0x7d,0x78,0xbf,0xe1]
10347 ; X64-NEXT: vaddps %xmm4, %xmm3, %xmm3 ## encoding: [0xc5,0xe0,0x58,0xdc]
10348 ; X64-NEXT: vfnmsub231ss {ru-sae}, %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x59,0xbf,0xd1]
10349 ; X64-NEXT: vaddps %xmm3, %xmm2, %xmm0 ## encoding: [0xc5,0xe8,0x58,0xc3]
10350 ; X64-NEXT: retq ## encoding: [0xc3]
10351 %res = call <4 x float> @llvm.x86.avx512.mask3.vfnmsub.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 4)
10352 %res1 = call <4 x float> @llvm.x86.avx512.mask3.vfnmsub.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1, i32 11)
10353 %res2 = call <4 x float> @llvm.x86.avx512.mask3.vfnmsub.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 10)
10354 %res3 = fadd <4 x float> %res, %res1
10355 %res4 = fadd <4 x float> %res2, %res3
10356 ret <4 x float> %res4
10359 define <4 x float>@test_int_x86_avx512_mask3_vfmadd_ss_rm(<4 x float> %x0, <4 x float> %x1, float *%ptr_b ,i8 %x3,i32 %x4) {
10360 ; X86-LABEL: test_int_x86_avx512_mask3_vfmadd_ss_rm:
10362 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
10363 ; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ## encoding: [0x8a,0x4c,0x24,0x08]
10364 ; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
10365 ; X86-NEXT: vfmadd231ss (%eax), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xb9,0x08]
10366 ; X86-NEXT: ## xmm1 {%k1} = (xmm0 * mem) + xmm1
10367 ; X86-NEXT: vmovaps %xmm1, %xmm0 ## encoding: [0xc5,0xf8,0x28,0xc1]
10368 ; X86-NEXT: retl ## encoding: [0xc3]
10370 ; X64-LABEL: test_int_x86_avx512_mask3_vfmadd_ss_rm:
10372 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
10373 ; X64-NEXT: vfmadd231ss (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xb9,0x0f]
10374 ; X64-NEXT: ## xmm1 {%k1} = (xmm0 * mem) + xmm1
10375 ; X64-NEXT: vmovaps %xmm1, %xmm0 ## encoding: [0xc5,0xf8,0x28,0xc1]
10376 ; X64-NEXT: retq ## encoding: [0xc3]
10377 %q = load float, float* %ptr_b
10378 %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
10379 %res = call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float> %x0, <4 x float> %vecinit.i, <4 x float> %x1, i8 %x3, i32 4)
10380 ret < 4 x float> %res
10383 define <4 x float>@test_int_x86_avx512_mask_vfmadd_ss_rm(<4 x float> %x0, <4 x float> %x1,float *%ptr_b ,i8 %x3,i32 %x4) {
10384 ; X86-LABEL: test_int_x86_avx512_mask_vfmadd_ss_rm:
10386 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
10387 ; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ## encoding: [0x8a,0x4c,0x24,0x08]
10388 ; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
10389 ; X86-NEXT: vfmadd132ss (%eax), %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0x99,0x00]
10390 ; X86-NEXT: ## xmm0 {%k1} = (xmm0 * mem) + xmm1
10391 ; X86-NEXT: retl ## encoding: [0xc3]
10393 ; X64-LABEL: test_int_x86_avx512_mask_vfmadd_ss_rm:
10395 ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
10396 ; X64-NEXT: vfmadd132ss (%rdi), %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0x99,0x07]
10397 ; X64-NEXT: ## xmm0 {%k1} = (xmm0 * mem) + xmm1
10398 ; X64-NEXT: retq ## encoding: [0xc3]
10399 %q = load float, float* %ptr_b
10400 %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
10401 %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %x0,<4 x float> %vecinit.i, <4 x float> %x1, i8 %x3, i32 4)
10402 ret < 4 x float> %res
10406 define <4 x float>@test_int_x86_avx512_maskz_vfmadd_ss_rm(<4 x float> %x0, <4 x float> %x1,float *%ptr_b ,i8 %x3,i32 %x4) {
10407 ; CHECK-LABEL: test_int_x86_avx512_maskz_vfmadd_ss_rm:
10409 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf0,0x57,0xc9]
10410 ; CHECK-NEXT: vblendps $1, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x01]
10411 ; CHECK-NEXT: ## xmm0 = xmm1[0],xmm0[1,2,3]
10412 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
10413 %q = load float, float* %ptr_b
10414 %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
10415 %res = call <4 x float> @llvm.x86.avx512.maskz.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %vecinit.i, i8 0, i32 4)
10416 ret < 4 x float> %res
10419 declare <8 x i32> @llvm.x86.avx512.mask.pmov.qd.512(<8 x i64>, <8 x i32>, i8)
10421 define <8 x i32>@test_int_x86_avx512_pmov_qd_512(<8 x i64> %x0, <8 x i32> %x1) {
10422 ; CHECK-LABEL: test_int_x86_avx512_pmov_qd_512:
10424 ; CHECK-NEXT: vpmovqd %zmm0, %ymm0 ## encoding: [0x62,0xf2,0x7e,0x48,0x35,0xc0]
10425 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
10426 %res = call <8 x i32> @llvm.x86.avx512.mask.pmov.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 -1)
10430 define <8 x i32>@test_int_x86_avx512_mask_pmov_qd_512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2) {
10431 ; X86-LABEL: test_int_x86_avx512_mask_pmov_qd_512:
10433 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
10434 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
10435 ; X86-NEXT: vpmovqd %zmm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x49,0x35,0xc1]
10436 ; X86-NEXT: vmovdqa %ymm1, %ymm0 ## encoding: [0xc5,0xfd,0x6f,0xc1]
10437 ; X86-NEXT: retl ## encoding: [0xc3]
10439 ; X64-LABEL: test_int_x86_avx512_mask_pmov_qd_512:
10441 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
10442 ; X64-NEXT: vpmovqd %zmm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x49,0x35,0xc1]
10443 ; X64-NEXT: vmovdqa %ymm1, %ymm0 ## encoding: [0xc5,0xfd,0x6f,0xc1]
10444 ; X64-NEXT: retq ## encoding: [0xc3]
10445 %res = call <8 x i32> @llvm.x86.avx512.mask.pmov.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2)
10449 define <8 x i32>@test_int_x86_avx512_maskz_pmov_qd_512(<8 x i64> %x0, i8 %x2) {
10450 ; X86-LABEL: test_int_x86_avx512_maskz_pmov_qd_512:
10452 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
10453 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
10454 ; X86-NEXT: vpmovqd %zmm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0xc9,0x35,0xc0]
10455 ; X86-NEXT: retl ## encoding: [0xc3]
10457 ; X64-LABEL: test_int_x86_avx512_maskz_pmov_qd_512:
10459 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
10460 ; X64-NEXT: vpmovqd %zmm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0xc9,0x35,0xc0]
10461 ; X64-NEXT: retq ## encoding: [0xc3]
10462 %res = call <8 x i32> @llvm.x86.avx512.mask.pmov.qd.512(<8 x i64> %x0, <8 x i32> zeroinitializer, i8 %x2)
10466 declare <16 x float> @llvm.x86.avx512.mask.cvtdq2ps.512(<16 x i32>, <16 x float>, i16, i32)
10468 define <16 x float> @test_int_x86_avx512_mask_cvt_dq2ps_512(<16 x i32> %x0, <16 x float> %x1, i16 %x2) {
10469 ; X86-LABEL: test_int_x86_avx512_mask_cvt_dq2ps_512:
10471 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
10472 ; X86-NEXT: vcvtdq2ps %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x5b,0xc8]
10473 ; X86-NEXT: vcvtdq2ps {rn-sae}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x18,0x5b,0xc0]
10474 ; X86-NEXT: vaddps %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x74,0x48,0x58,0xc0]
10475 ; X86-NEXT: retl ## encoding: [0xc3]
10477 ; X64-LABEL: test_int_x86_avx512_mask_cvt_dq2ps_512:
10479 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
10480 ; X64-NEXT: vcvtdq2ps %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x5b,0xc8]
10481 ; X64-NEXT: vcvtdq2ps {rn-sae}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x18,0x5b,0xc0]
10482 ; X64-NEXT: vaddps %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x74,0x48,0x58,0xc0]
10483 ; X64-NEXT: retq ## encoding: [0xc3]
10484 %res = call <16 x float> @llvm.x86.avx512.mask.cvtdq2ps.512(<16 x i32> %x0, <16 x float> %x1, i16 %x2, i32 4)
10485 %res1 = call <16 x float> @llvm.x86.avx512.mask.cvtdq2ps.512(<16 x i32> %x0, <16 x float> %x1, i16 -1, i32 8)
10486 %res2 = fadd <16 x float> %res, %res1
10487 ret <16 x float> %res2
10490 declare <16 x float> @llvm.x86.avx512.mask.cvtudq2ps.512(<16 x i32>, <16 x float>, i16, i32)
10492 define <16 x float> @test_int_x86_avx512_mask_cvt_udq2ps_512(<16 x i32> %x0, <16 x float> %x1, i16 %x2) {
10493 ; X86-LABEL: test_int_x86_avx512_mask_cvt_udq2ps_512:
10495 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
10496 ; X86-NEXT: vcvtudq2ps %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7f,0x49,0x7a,0xc8]
10497 ; X86-NEXT: vcvtudq2ps {rn-sae}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7f,0x18,0x7a,0xc0]
10498 ; X86-NEXT: vaddps %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x74,0x48,0x58,0xc0]
10499 ; X86-NEXT: retl ## encoding: [0xc3]
10501 ; X64-LABEL: test_int_x86_avx512_mask_cvt_udq2ps_512:
10503 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
10504 ; X64-NEXT: vcvtudq2ps %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7f,0x49,0x7a,0xc8]
10505 ; X64-NEXT: vcvtudq2ps {rn-sae}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7f,0x18,0x7a,0xc0]
10506 ; X64-NEXT: vaddps %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x74,0x48,0x58,0xc0]
10507 ; X64-NEXT: retq ## encoding: [0xc3]
10508 %res = call <16 x float> @llvm.x86.avx512.mask.cvtudq2ps.512(<16 x i32> %x0, <16 x float> %x1, i16 %x2, i32 4)
10509 %res1 = call <16 x float> @llvm.x86.avx512.mask.cvtudq2ps.512(<16 x i32> %x0, <16 x float> %x1, i16 -1, i32 8)
10510 %res2 = fadd <16 x float> %res, %res1
10511 ret <16 x float> %res2
10514 define <8 x double> @test_mask_compress_pd_512(<8 x double> %data, <8 x double> %passthru, i8 %mask) {
10515 ; X86-LABEL: test_mask_compress_pd_512:
10517 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
10518 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
10519 ; X86-NEXT: vcompresspd %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x8a,0xc1]
10520 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
10521 ; X86-NEXT: retl ## encoding: [0xc3]
10523 ; X64-LABEL: test_mask_compress_pd_512:
10525 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
10526 ; X64-NEXT: vcompresspd %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x8a,0xc1]
10527 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
10528 ; X64-NEXT: retq ## encoding: [0xc3]
10529 %res = call <8 x double> @llvm.x86.avx512.mask.compress.pd.512(<8 x double> %data, <8 x double> %passthru, i8 %mask)
10530 ret <8 x double> %res
10533 define <8 x double> @test_maskz_compress_pd_512(<8 x double> %data, i8 %mask) {
10534 ; X86-LABEL: test_maskz_compress_pd_512:
10536 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
10537 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
10538 ; X86-NEXT: vcompresspd %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x8a,0xc0]
10539 ; X86-NEXT: retl ## encoding: [0xc3]
10541 ; X64-LABEL: test_maskz_compress_pd_512:
10543 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
10544 ; X64-NEXT: vcompresspd %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x8a,0xc0]
10545 ; X64-NEXT: retq ## encoding: [0xc3]
10546 %res = call <8 x double> @llvm.x86.avx512.mask.compress.pd.512(<8 x double> %data, <8 x double> zeroinitializer, i8 %mask)
10547 ret <8 x double> %res
10550 define <8 x double> @test_compress_pd_512(<8 x double> %data) {
10551 ; CHECK-LABEL: test_compress_pd_512:
10553 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
10554 %res = call <8 x double> @llvm.x86.avx512.mask.compress.pd.512(<8 x double> %data, <8 x double> undef, i8 -1)
10555 ret <8 x double> %res
10558 declare <8 x double> @llvm.x86.avx512.mask.compress.pd.512(<8 x double> %data, <8 x double> %src0, i8 %mask)
10560 define <16 x float> @test_mask_compress_ps_512(<16 x float> %data, <16 x float> %passthru, i16 %mask) {
10561 ; X86-LABEL: test_mask_compress_ps_512:
10563 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
10564 ; X86-NEXT: vcompressps %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x8a,0xc1]
10565 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
10566 ; X86-NEXT: retl ## encoding: [0xc3]
10568 ; X64-LABEL: test_mask_compress_ps_512:
10570 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
10571 ; X64-NEXT: vcompressps %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x8a,0xc1]
10572 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
10573 ; X64-NEXT: retq ## encoding: [0xc3]
10574 %res = call <16 x float> @llvm.x86.avx512.mask.compress.ps.512(<16 x float> %data, <16 x float> %passthru, i16 %mask)
10575 ret <16 x float> %res
10578 define <16 x float> @test_maskz_compress_ps_512(<16 x float> %data, i16 %mask) {
10579 ; X86-LABEL: test_maskz_compress_ps_512:
10581 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
10582 ; X86-NEXT: vcompressps %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x8a,0xc0]
10583 ; X86-NEXT: retl ## encoding: [0xc3]
10585 ; X64-LABEL: test_maskz_compress_ps_512:
10587 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
10588 ; X64-NEXT: vcompressps %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x8a,0xc0]
10589 ; X64-NEXT: retq ## encoding: [0xc3]
10590 %res = call <16 x float> @llvm.x86.avx512.mask.compress.ps.512(<16 x float> %data, <16 x float> zeroinitializer, i16 %mask)
10591 ret <16 x float> %res
10594 define <16 x float> @test_compress_ps_512(<16 x float> %data) {
10595 ; CHECK-LABEL: test_compress_ps_512:
10597 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
10598 %res = call <16 x float> @llvm.x86.avx512.mask.compress.ps.512(<16 x float> %data, <16 x float> undef, i16 -1)
10599 ret <16 x float> %res
10602 declare <16 x float> @llvm.x86.avx512.mask.compress.ps.512(<16 x float> %data, <16 x float> %src0, i16 %mask)
10604 define <8 x i64> @test_mask_compress_q_512(<8 x i64> %data, <8 x i64> %passthru, i8 %mask) {
10605 ; X86-LABEL: test_mask_compress_q_512:
10607 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
10608 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
10609 ; X86-NEXT: vpcompressq %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x8b,0xc1]
10610 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
10611 ; X86-NEXT: retl ## encoding: [0xc3]
10613 ; X64-LABEL: test_mask_compress_q_512:
10615 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
10616 ; X64-NEXT: vpcompressq %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x8b,0xc1]
10617 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
10618 ; X64-NEXT: retq ## encoding: [0xc3]
10619 %res = call <8 x i64> @llvm.x86.avx512.mask.compress.q.512(<8 x i64> %data, <8 x i64> %passthru, i8 %mask)
10623 define <8 x i64> @test_maskz_compress_q_512(<8 x i64> %data, i8 %mask) {
10624 ; X86-LABEL: test_maskz_compress_q_512:
10626 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
10627 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
10628 ; X86-NEXT: vpcompressq %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x8b,0xc0]
10629 ; X86-NEXT: retl ## encoding: [0xc3]
10631 ; X64-LABEL: test_maskz_compress_q_512:
10633 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
10634 ; X64-NEXT: vpcompressq %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x8b,0xc0]
10635 ; X64-NEXT: retq ## encoding: [0xc3]
10636 %res = call <8 x i64> @llvm.x86.avx512.mask.compress.q.512(<8 x i64> %data, <8 x i64> zeroinitializer, i8 %mask)
10640 define <8 x i64> @test_compress_q_512(<8 x i64> %data) {
10641 ; CHECK-LABEL: test_compress_q_512:
10643 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
10644 %res = call <8 x i64> @llvm.x86.avx512.mask.compress.q.512(<8 x i64> %data, <8 x i64> undef, i8 -1)
10648 declare <8 x i64> @llvm.x86.avx512.mask.compress.q.512(<8 x i64> %data, <8 x i64> %src0, i8 %mask)
10650 define <16 x i32> @test_mask_compress_d_512(<16 x i32> %data, <16 x i32> %passthru, i16 %mask) {
10651 ; X86-LABEL: test_mask_compress_d_512:
10653 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
10654 ; X86-NEXT: vpcompressd %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x8b,0xc1]
10655 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
10656 ; X86-NEXT: retl ## encoding: [0xc3]
10658 ; X64-LABEL: test_mask_compress_d_512:
10660 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
10661 ; X64-NEXT: vpcompressd %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x8b,0xc1]
10662 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
10663 ; X64-NEXT: retq ## encoding: [0xc3]
10664 %res = call <16 x i32> @llvm.x86.avx512.mask.compress.d.512(<16 x i32> %data, <16 x i32> %passthru, i16 %mask)
10665 ret <16 x i32> %res
10668 define <16 x i32> @test_maskz_compress_d_512(<16 x i32> %data, i16 %mask) {
10669 ; X86-LABEL: test_maskz_compress_d_512:
10671 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
10672 ; X86-NEXT: vpcompressd %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x8b,0xc0]
10673 ; X86-NEXT: retl ## encoding: [0xc3]
10675 ; X64-LABEL: test_maskz_compress_d_512:
10677 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
10678 ; X64-NEXT: vpcompressd %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x8b,0xc0]
10679 ; X64-NEXT: retq ## encoding: [0xc3]
10680 %res = call <16 x i32> @llvm.x86.avx512.mask.compress.d.512(<16 x i32> %data, <16 x i32> zeroinitializer, i16 %mask)
10681 ret <16 x i32> %res
10684 define <16 x i32> @test_compress_d_512(<16 x i32> %data) {
10685 ; CHECK-LABEL: test_compress_d_512:
10687 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
10688 %res = call <16 x i32> @llvm.x86.avx512.mask.compress.d.512(<16 x i32> %data, <16 x i32> undef, i16 -1)
10689 ret <16 x i32> %res
10692 declare <16 x i32> @llvm.x86.avx512.mask.compress.d.512(<16 x i32> %data, <16 x i32> %src0, i16 %mask)
10694 define <8 x double> @test_expand_pd_512(<8 x double> %data) {
10695 ; CHECK-LABEL: test_expand_pd_512:
10697 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
10698 %res = call <8 x double> @llvm.x86.avx512.mask.expand.pd.512(<8 x double> %data, <8 x double> undef, i8 -1)
10699 ret <8 x double> %res
10702 define <8 x double> @test_mask_expand_pd_512(<8 x double> %data, <8 x double> %passthru, i8 %mask) {
10703 ; X86-LABEL: test_mask_expand_pd_512:
10705 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
10706 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
10707 ; X86-NEXT: vexpandpd %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x88,0xc8]
10708 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
10709 ; X86-NEXT: retl ## encoding: [0xc3]
10711 ; X64-LABEL: test_mask_expand_pd_512:
10713 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
10714 ; X64-NEXT: vexpandpd %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x88,0xc8]
10715 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
10716 ; X64-NEXT: retq ## encoding: [0xc3]
10717 %res = call <8 x double> @llvm.x86.avx512.mask.expand.pd.512(<8 x double> %data, <8 x double> %passthru, i8 %mask)
10718 ret <8 x double> %res
10721 define <8 x double> @test_maskz_expand_pd_512(<8 x double> %data, i8 %mask) {
10722 ; X86-LABEL: test_maskz_expand_pd_512:
10724 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
10725 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
10726 ; X86-NEXT: vexpandpd %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x88,0xc0]
10727 ; X86-NEXT: retl ## encoding: [0xc3]
10729 ; X64-LABEL: test_maskz_expand_pd_512:
10731 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
10732 ; X64-NEXT: vexpandpd %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x88,0xc0]
10733 ; X64-NEXT: retq ## encoding: [0xc3]
10734 %res = call <8 x double> @llvm.x86.avx512.mask.expand.pd.512(<8 x double> %data, <8 x double> zeroinitializer, i8 %mask)
10735 ret <8 x double> %res
10738 declare <8 x double> @llvm.x86.avx512.mask.expand.pd.512(<8 x double> %data, <8 x double> %src0, i8 %mask)
10740 define <16 x float> @test_expand_ps_512(<16 x float> %data) {
10741 ; CHECK-LABEL: test_expand_ps_512:
10743 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
10744 %res = call <16 x float> @llvm.x86.avx512.mask.expand.ps.512(<16 x float> %data, <16 x float> undef, i16 -1)
10745 ret <16 x float> %res
10748 define <16 x float> @test_mask_expand_ps_512(<16 x float> %data, <16 x float> %passthru, i16 %mask) {
10749 ; X86-LABEL: test_mask_expand_ps_512:
10751 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
10752 ; X86-NEXT: vexpandps %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x88,0xc8]
10753 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
10754 ; X86-NEXT: retl ## encoding: [0xc3]
10756 ; X64-LABEL: test_mask_expand_ps_512:
10758 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
10759 ; X64-NEXT: vexpandps %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x88,0xc8]
10760 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
10761 ; X64-NEXT: retq ## encoding: [0xc3]
10762 %res = call <16 x float> @llvm.x86.avx512.mask.expand.ps.512(<16 x float> %data, <16 x float> %passthru, i16 %mask)
10763 ret <16 x float> %res
10766 define <16 x float> @test_maskz_expand_ps_512(<16 x float> %data, i16 %mask) {
10767 ; X86-LABEL: test_maskz_expand_ps_512:
10769 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
10770 ; X86-NEXT: vexpandps %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x88,0xc0]
10771 ; X86-NEXT: retl ## encoding: [0xc3]
10773 ; X64-LABEL: test_maskz_expand_ps_512:
10775 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
10776 ; X64-NEXT: vexpandps %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x88,0xc0]
10777 ; X64-NEXT: retq ## encoding: [0xc3]
10778 %res = call <16 x float> @llvm.x86.avx512.mask.expand.ps.512(<16 x float> %data, <16 x float> zeroinitializer, i16 %mask)
10779 ret <16 x float> %res
10782 declare <16 x float> @llvm.x86.avx512.mask.expand.ps.512(<16 x float> %data, <16 x float> %src0, i16 %mask)
10784 define <8 x i64> @test_expand_q_512(<8 x i64> %data) {
10785 ; CHECK-LABEL: test_expand_q_512:
10787 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
10788 %res = call <8 x i64> @llvm.x86.avx512.mask.expand.q.512(<8 x i64> %data, <8 x i64> undef, i8 -1)
10792 define <8 x i64> @test_mask_expand_q_512(<8 x i64> %data, <8 x i64> %passthru, i8 %mask) {
10793 ; X86-LABEL: test_mask_expand_q_512:
10795 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
10796 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
10797 ; X86-NEXT: vpexpandq %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x89,0xc8]
10798 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
10799 ; X86-NEXT: retl ## encoding: [0xc3]
10801 ; X64-LABEL: test_mask_expand_q_512:
10803 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
10804 ; X64-NEXT: vpexpandq %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x89,0xc8]
10805 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
10806 ; X64-NEXT: retq ## encoding: [0xc3]
10807 %res = call <8 x i64> @llvm.x86.avx512.mask.expand.q.512(<8 x i64> %data, <8 x i64> %passthru, i8 %mask)
10811 define <8 x i64> @test_maskz_expand_q_512(<8 x i64> %data, i8 %mask) {
10812 ; X86-LABEL: test_maskz_expand_q_512:
10814 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
10815 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
10816 ; X86-NEXT: vpexpandq %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x89,0xc0]
10817 ; X86-NEXT: retl ## encoding: [0xc3]
10819 ; X64-LABEL: test_maskz_expand_q_512:
10821 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
10822 ; X64-NEXT: vpexpandq %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x89,0xc0]
10823 ; X64-NEXT: retq ## encoding: [0xc3]
10824 %res = call <8 x i64> @llvm.x86.avx512.mask.expand.q.512(<8 x i64> %data, <8 x i64> zeroinitializer, i8 %mask)
10828 declare <8 x i64> @llvm.x86.avx512.mask.expand.q.512(<8 x i64> %data, <8 x i64> %src0, i8 %mask)
10830 define <16 x i32> @test_expand_d_512(<16 x i32> %data) {
10831 ; CHECK-LABEL: test_expand_d_512:
10833 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
10834 %res = call <16 x i32> @llvm.x86.avx512.mask.expand.d.512(<16 x i32> %data, <16 x i32> undef, i16 -1)
10835 ret <16 x i32> %res
10838 define <16 x i32> @test_mask_expand_d_512(<16 x i32> %data, <16 x i32> %passthru, i16 %mask) {
10839 ; X86-LABEL: test_mask_expand_d_512:
10841 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
10842 ; X86-NEXT: vpexpandd %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x89,0xc8]
10843 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
10844 ; X86-NEXT: retl ## encoding: [0xc3]
10846 ; X64-LABEL: test_mask_expand_d_512:
10848 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
10849 ; X64-NEXT: vpexpandd %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x89,0xc8]
10850 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
10851 ; X64-NEXT: retq ## encoding: [0xc3]
10852 %res = call <16 x i32> @llvm.x86.avx512.mask.expand.d.512(<16 x i32> %data, <16 x i32> %passthru, i16 %mask)
10853 ret <16 x i32> %res
10856 define <16 x i32> @test_maskz_expand_d_512(<16 x i32> %data, i16 %mask) {
10857 ; X86-LABEL: test_maskz_expand_d_512:
10859 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
10860 ; X86-NEXT: vpexpandd %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x89,0xc0]
10861 ; X86-NEXT: retl ## encoding: [0xc3]
10863 ; X64-LABEL: test_maskz_expand_d_512:
10865 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
10866 ; X64-NEXT: vpexpandd %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x89,0xc0]
10867 ; X64-NEXT: retq ## encoding: [0xc3]
10868 %res = call <16 x i32> @llvm.x86.avx512.mask.expand.d.512(<16 x i32> %data, <16 x i32> zeroinitializer, i16 %mask)
10869 ret <16 x i32> %res
10872 declare <16 x i32> @llvm.x86.avx512.mask.expand.d.512(<16 x i32> %data, <16 x i32> %src0, i16 %mask)
10874 define <16 x float> @test_cmp_512(<16 x float> %a, <16 x float> %b, <16 x float> %c, <16 x float> %d, float* %p) {
10875 ; X86-LABEL: test_cmp_512:
10876 ; X86: ## %bb.0: ## %entry
10877 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
10878 ; X86-NEXT: vcmpltps {sae}, %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf1,0x7c,0x18,0xc2,0xc1,0x01]
10879 ; X86-NEXT: vcmpltps %zmm3, %zmm2, %k1 ## encoding: [0x62,0xf1,0x6c,0x48,0xc2,0xcb,0x01]
10880 ; X86-NEXT: kxnorw %k1, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc9]
10881 ; X86-NEXT: vmovaps (%eax), %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x28,0x00]
10882 ; X86-NEXT: retl ## encoding: [0xc3]
10884 ; X64-LABEL: test_cmp_512:
10885 ; X64: ## %bb.0: ## %entry
10886 ; X64-NEXT: vcmpltps {sae}, %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf1,0x7c,0x18,0xc2,0xc1,0x01]
10887 ; X64-NEXT: vcmpltps %zmm3, %zmm2, %k1 ## encoding: [0x62,0xf1,0x6c,0x48,0xc2,0xcb,0x01]
10888 ; X64-NEXT: kxnorw %k1, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc9]
10889 ; X64-NEXT: vmovaps (%rdi), %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x28,0x07]
10890 ; X64-NEXT: retq ## encoding: [0xc3]
10892 %0 = tail call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> %a, <16 x float> %b, i32 1, i32 8)
10893 %1 = tail call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> %c, <16 x float> %d, i32 1, i32 4)
10894 %2 = bitcast float* %p to <16 x float>*
10895 %3 = load <16 x float>, <16 x float>* %2
10896 %4 = xor <16 x i1> %0, %1
10897 %5 = select <16 x i1> %4, <16 x float> zeroinitializer, <16 x float> %3
10898 ret <16 x float> %5
10901 declare <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float>, <16 x float>, i32, i32)